PageRenderTime 660ms CodeModel.GetById 6ms RepoModel.GetById 0ms app.codeStats 1ms

/impl/docgen/docstract.py

https://github.com/CoderPuppy/chromeless
Python | 877 lines | 745 code | 52 blank | 80 comment | 66 complexity | 9010f479f12c5de88c5b141c835f916a MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception
  1. #!/usr/bin/env python
  2. #
  3. # Copyright (c) 2011, Lloyd Hilaiel <lloyd@hilaiel.com>
  4. #
  5. # Permission to use, copy, modify, and/or distribute this software for any
  6. # purpose with or without fee is hereby granted, provided that the above
  7. # copyright notice and this permission notice appear in all copies.
  8. #
  9. # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10. # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11. # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12. # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13. # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14. # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15. # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. import re
  17. import os
  18. import types
  19. class DocStract():
  20. class Type():
  21. def __init__(self, val, orig):
  22. val.pop('source_lines')
  23. if (val.has_key('name') and len(val) == 1):
  24. self.value = val['name']
  25. else:
  26. self.value = val
  27. self.orig = orig
  28. def __repr__(self):
  29. return self.orig
  30. def __init__(self):
  31. # the patterns for finding and processing documentation blocks (and the source line
  32. # Note: these two patterns are identical, except the latter captures groups. The
  33. # first is used to split a source file into chunks of text which are either doc blocks
  34. # or source code, the second extracts information from doc blocks.
  35. self.docBlockFindPat = re.compile('(/\*\* .*? \*/ (?:[\s\n]*[^\/\n]*)?)', re.S | re.X)
  36. self.docBlockProcessPat = re.compile('(/\*\*)(.*?)(\*/)( [\s\n]*[^\/\n]*)? ', re.S | re.X)
  37. # after extracting the comment, fix it up (remove *s and leading spaces)
  38. self.blockFilterPat = re.compile('^\s*\* ?', re.M)
  39. # '@' can be escaped with an '@', i.e. @@function when occurs in text blocks
  40. # will not be eaten by the parser. This pattern is used to unescape text
  41. # blocks.
  42. self.unescapeTagPat = re.compile('@@(?=\w+)', re.M)
  43. # the pattern used to split a comment block to create our token stream.
  44. # The token stream consists of:
  45. # * tags: "@tagname"
  46. # * types: "{typename [optional text content]}
  47. # * text: "freeform text that's not either of the above"
  48. #
  49. # This pattern checks for either of the top two, and is applied using
  50. # .split() which handles the third.
  51. self.tokenizePat = re.compile(r'''
  52. (?:
  53. (?<![@\w]) (@\w+) # a tag that's not preceeded by an @ sign (escape)
  54. )
  55. |
  56. (?:
  57. (?<!\\)
  58. ({\w
  59. (?:[^{}]+|
  60. (?:{
  61. (?:[^{}]+|
  62. (?:{[^{}]}) # a regex is the wrong tool for the job, support
  63. # 3 levels of nested curlies.
  64. )*
  65. })
  66. )*
  67. })
  68. )
  69. ''', re.M | re.X);
  70. self.markerPat = re.compile(r'''^ (?<! [@\w] ) ( @ \w+ ) $''', re.M | re.X)
  71. # block types. Each document block is of one of these types.
  72. self.blockTypes = {
  73. '@constructor': ConstructorBlockHandler("@constructor"),
  74. '@function': FunctionBlockHandler("@function"),
  75. '@module': ModuleBlockHandler("@module"),
  76. '@property': PropertyBlockHandler("@property"),
  77. '@class': ClassBlockHandler("@class"),
  78. '@endclass': EndClassBlockHandler("@endclass"),
  79. '@typedef': TypedefBlockHandler("@typedef"),
  80. '@endtypedef': EndTypedefBlockHandler("@endtypedef")
  81. }
  82. # tag aliases, direct equivalences. Note, RHS is normal form.
  83. self.aliases = {
  84. '@func': '@function',
  85. '@params': '@param',
  86. '@parameter': '@param',
  87. '@parameters': '@param',
  88. '@argument': '@param',
  89. '@arg': '@param',
  90. '@prop': '@property',
  91. '@returns': '@return',
  92. '@description': '@desc',
  93. '@seealso': '@see',
  94. '@see_also': '@see',
  95. '@beginclass': '@class',
  96. '@begin_class': '@class',
  97. '@end_class': '@endclass',
  98. '@throw': '@throws',
  99. '@exception': '@throws'
  100. }
  101. # lookup table of tag handlers, lil' object that can parse and inject
  102. # for different tags.
  103. self.tags = {
  104. '@param': ParamTagHandler('@param'),
  105. '@desc': DescTagHandler('@desc'),
  106. '@return': ReturnTagHandler('@return'),
  107. '@see': SeeTagHandler('@see'),
  108. '@throws': ThrowsTagHandler('@throws'),
  109. '@type': TypeTagHandler('@type'),
  110. }
  111. # these are a list of functions that examine extraction state and try to guess
  112. # what type of construct a documentation block documents
  113. self.typeGuessers = [
  114. hasGetSetterIsPropertyTypeGuesser,
  115. isFunctionIfKeywordInCodeTypeGuesser,
  116. firstBlockIsModuleTypeGuesser,
  117. assignmentIsProbablyPropertyTypeGuesser,
  118. typeWithoutReturnsIsProbablyPropertyTypeGuesser
  119. ]
  120. # these are a list of functions that, given a block type and subsequent chunk of code,
  121. # try to guess the name of the construct being documented
  122. self.nameGuessers = [
  123. standardFunctionNameGuesser,
  124. getSetterNameGuesser,
  125. objectPropertyNameGuesser,
  126. commonJSNameGuesser,
  127. assignToPropertyNameGuesser
  128. ]
  129. def _isMarker(self, tok):
  130. return type(tok) == types.StringType and self.markerPat.match(tok)
  131. def _popNonMarker(self, toks):
  132. nxt = None
  133. if (len(toks) == 0):
  134. return None
  135. if not self._isMarker(self._peekTok(toks)):
  136. nxt = toks.pop(0)
  137. return nxt
  138. def _peekTok(self, toks):
  139. if (len(toks)):
  140. return toks[0]
  141. return None
  142. def _consumeToks(self, tokens, currentObj):
  143. cur = tokens.pop(0)
  144. handler = None
  145. # is this a blocktype declaration?
  146. if cur in self.blockTypes:
  147. handler = self.blockTypes[cur]
  148. if currentObj['blockHandler']:
  149. raise RuntimeError("%s and %s may " %
  150. (currentObj['blockHandler'].tagName,
  151. handler.tagName) +
  152. "not occur in same documentation block")
  153. currentObj['blockHandler'] = handler
  154. elif cur in self.tags:
  155. handler = self.tags[cur]
  156. # now let's gather together all the arguments (non-tags)
  157. args = [ ]
  158. while len(tokens) > 0 and not self._isMarker(self._peekTok(tokens)):
  159. t = tokens.pop(0)
  160. args.append(t)
  161. # do we have a handler for this tag?
  162. if not handler == None:
  163. arg = None
  164. # get argument if required
  165. if handler.takesArg:
  166. if len(args) == 0 and not handler.argOptional:
  167. raise RuntimeError("%s tag requires an argument" % cur)
  168. elif not len(args) == 0:
  169. raise RuntimeError("no arguments allowed to %s tag" % cur)
  170. ctx = handler.parse(args)
  171. if handler.mayRecur:
  172. if cur not in currentObj["tagData"]:
  173. currentObj["tagData"][cur] = []
  174. currentObj["tagData"][cur].append(ctx)
  175. else:
  176. if cur in currentObj["tagData"]:
  177. raise RuntimeError("%s tag may not occur multiple times in the same documentation block" % cur)
  178. currentObj["tagData"][cur] = ctx
  179. # ooops. Dunno what that is!
  180. else:
  181. raise RuntimeError("unrecognized tag: %s" % cur)
  182. def _guessBlockName(self, codeChunk, blockType):
  183. # given the first line of source code after the block, and it's type
  184. # we'll invoke our name guessers to try to figure out the name of the
  185. # construct being documented
  186. # now let's invoke our type guessers, in order
  187. for func in self.nameGuessers:
  188. t = func(codeChunk, blockType)
  189. if t != None:
  190. return t
  191. return None
  192. def _guessBlockType(self, firstBlock, codeChunk, context, tags):
  193. # first we'll prune possibilities by figuring out which supported blocktypes
  194. # are valid in the current context, and support all of the required tags
  195. tagSet = set(tags)
  196. possibilities = [ ]
  197. for bt in self.blockTypes:
  198. bt = self.blockTypes[bt]
  199. if context not in bt.allowedContexts:
  200. continue
  201. if not tagSet.issubset(bt.allowedTags):
  202. continue
  203. possibilities.append(bt.tagName)
  204. # if we've reduced to exactly one possibility, then we don't need to guess
  205. if len(possibilities) == 1:
  206. return possibilities[0]
  207. # now let's invoke our type guessers, in order
  208. for func in self.typeGuessers:
  209. t = func(firstBlock, codeChunk, context, tags, possibilities)
  210. if t != None:
  211. return t
  212. raise RuntimeError("Can't determine what this block documents (from %s)" % ", ".join(possibilities))
  213. def _whatContext(self, stack):
  214. return stack[-1][0]
  215. def _analyzeBlock(self, block, codeChunk, firstBlock, stack, lineStart, lineEnd):
  216. # Ye' ol' block analysis process. block at this point contains
  217. # a chunk of text that has already had comment markers stripped out.
  218. # Step 1: split the chunk of text into a token stream, each token
  219. # is either a tag /@\w+/ or a chunk of text (tag argument).
  220. # whitespace on either side of tokens is stripped. Also, unescape
  221. # @@tags.
  222. tokens = self.tokenizePat.split(block)
  223. tokens = [n for n in tokens if not n == None]
  224. tokens = [n.lstrip(" \t").lstrip('\r\n').rstrip() for n in tokens if n.strip()]
  225. tokens = [self.unescapeTagPat.sub("@", t) for t in tokens]
  226. # Step 3: Treat initial text as if it were a description.
  227. if not self._isMarker(tokens[0]):
  228. tokens.insert(0, '@desc')
  229. # Step 4: collapse aliases
  230. tokens = [self.aliases[n] if self.aliases.has_key(n) else n for n in tokens]
  231. # "autosplitting". this feature allows multiple blocks to reside in the
  232. # same documentation block (/** */)
  233. tokenGroups = []
  234. while len(tokens):
  235. i = 1
  236. while i < len(tokens):
  237. if (tokens[i] in self.blockTypes):
  238. break
  239. i += 1
  240. tokenGroups.append(tokens[:i])
  241. tokens = tokens[i:]
  242. for tokens in tokenGroups:
  243. # Step 2: initialize an object which will hold the intermediate
  244. # representation of parsed block data.
  245. parseData = {
  246. 'blockHandler': None,
  247. 'tagData': { }
  248. }
  249. # Step 4.5: depth first recursion for inline type parsing
  250. newtoks = []
  251. for t in tokens:
  252. if len(t) >= 2 and t[0:1] == '{' and t[-1:] == '}':
  253. stack.append( ('embedded', {}) )
  254. tokObj = { }
  255. t2 = "@typedef " + t[1:-1]
  256. self._analyzeBlock(t2, codeChunk, False, stack, lineStart, lineEnd)
  257. self._analyzeBlock("@endtypedef", codeChunk, False, stack, lineStart, lineEnd)
  258. newtoks.append(DocStract.Type(stack[-1][1]['val'], t))
  259. stack.pop()
  260. else:
  261. newtoks.append(t)
  262. tokens = newtoks
  263. # Step 5: parse all tokens from the token stream, populating the
  264. # output representation as we go.
  265. while len(tokens):
  266. self._consumeToks(tokens, parseData)
  267. thisContext = self._whatContext(stack)
  268. # Step 6: Heuristics! Apply a set of functions which use the current state of
  269. # documentation extractor and some source code to figure out what
  270. # type of construct (@function, @property, etc) this documentation
  271. # block is documenting, and what its name is.
  272. # only invoke guessing logic if type wasn't explicitly declared
  273. if parseData['blockHandler'] == None:
  274. guessedType = self._guessBlockType(firstBlock, codeChunk, thisContext, parseData['tagData'].keys())
  275. if guessedType not in self.blockTypes:
  276. raise RuntimeError("Don't know how to handle a '%s' documentation block" % guessedType)
  277. parseData['blockHandler'] = self.blockTypes[guessedType]
  278. # always try to guess the name, a name guesser has the first interesting line of code
  279. # after the documentation block and the type of block (it's string name) to work with
  280. guessedName = self._guessBlockName(codeChunk, parseData['blockHandler'].tagName)
  281. # Step 7: Validation phase! Not all tags are allowed in all types of
  282. # documentation blocks. like '@returns' inside a '@classend' block
  283. # would just be nutty. let's scrutinize this block to make sure it's
  284. # sane.
  285. # first check that this doc block type is valid in present context
  286. if thisContext not in parseData['blockHandler'].allowedContexts:
  287. raise RuntimeError("%s not allowed in %s context" %
  288. (parseData['blockHandler'].tagName,
  289. thisContext))
  290. # now check that all present tags are allowed in this block
  291. for tag in parseData['tagData']:
  292. if not tag == parseData['blockHandler'].tagName and tag not in parseData['blockHandler'].allowedTags:
  293. raise RuntimeError("%s not allowed in %s block" %
  294. (tag, parseData['blockHandler'].tagName))
  295. # Step 8: Generation of output document
  296. doc = { }
  297. for tag in parseData['tagData']:
  298. val = parseData['tagData'][tag]
  299. if not type(val) == types.ListType:
  300. val = [ val ]
  301. for v in val:
  302. handler = self.tags[tag] if tag in self.tags else self.blockTypes[tag]
  303. handler.attach(v, doc, parseData['blockHandler'].tagName)
  304. parseData['blockHandler'].setLineNumber(lineStart, lineEnd, doc)
  305. # special case for classes and typedefs
  306. if parseData['blockHandler'].tagName in ('@endclass', '@endtypedef'):
  307. doc = stack.pop()[1]
  308. parseData['blockHandler'].merge(doc, stack[-1][1], guessedName, self._whatContext(stack))
  309. if parseData['blockHandler'].tagName == '@class':
  310. stack.append( ('class', doc) )
  311. elif parseData['blockHandler'].tagName == '@typedef':
  312. stack.append( ('type', doc) )
  313. def extractFromFile(self, filename):
  314. # next read the whole file into memory
  315. contents = ""
  316. with open(filename, "r") as f:
  317. contents = f.read()
  318. data = self.extract(contents)
  319. # first determine the module name, it's always the same as the file name
  320. mod = os.path.basename(filename)
  321. dotLoc = mod.rfind(".")
  322. if (dotLoc > 0):
  323. mod = mod[:dotLoc]
  324. if not "module" in data:
  325. data["module"] = mod
  326. if not "filename" in data:
  327. data["filename"] = filename
  328. return data
  329. def extract(self, contents):
  330. # clear the lil' context flag that lets us know when we're parsing
  331. # classes (class definitions cannot span files)
  332. stack = [ ( 'global', {} ) ]
  333. # now parse out and combine comment blocks
  334. firstBlock = True
  335. line = 0
  336. for text in self.docBlockFindPat.split(contents):
  337. lineStart = line + 1
  338. line += text.count('\n')
  339. # if this isn't a documentation block, carry on
  340. m = self.docBlockProcessPat.match(text)
  341. if m:
  342. block = self.blockFilterPat.sub("", m.group(2)).strip()
  343. context = m.group(4).strip()
  344. # data will be mutated!
  345. try:
  346. self._analyzeBlock(block, context, firstBlock, stack, lineStart, line)
  347. except RuntimeError, exc:
  348. args = exc.args
  349. if not args:
  350. arg0 = ''
  351. else:
  352. arg0 = args[0]
  353. arg0 += ' at line %s' % lineStart
  354. exc.args = (arg0,) + args[1:]
  355. raise
  356. firstBlock = False
  357. return stack[0][1]
  358. # begin definition of Tag Handler classes.
  359. # TagHandler is the base class for a handler of tags. This is an
  360. # object that is capable of parsing tags and merging them into
  361. # the output JSON document.
  362. class TagHandler(object):
  363. # if takesArg is true, then text may occur after the tag
  364. # (it "accepts" a single text blob as an argument)
  365. takesArg = False
  366. # if takesArg is True, argOptional specifies whether the
  367. # argument is required
  368. argOptional = False
  369. # if mayRecur is True the tag may be specified multiple times
  370. # in a single document text blob.
  371. mayRecur = False
  372. def __init__(self, tagname):
  373. self.tagName = tagname
  374. # the parse method attempts to parse the text blob and returns
  375. # any representation of it that it likes. This method should throw
  376. # if there's a syntactic error in the text argument. text may be
  377. # 'None' if the tag accepts no argument.
  378. def parse(self, args):
  379. return " ".join([str(a) for a in args]) if len(args) > 0 else None
  380. # attach merges the results of parsing a tag into the output
  381. # JSON document for a documentation block. `obj` is the value
  382. # returned by parse(), and parent is the json document that
  383. # the function should mutate
  384. def attach(self, obj, parent, blockType):
  385. parent[self.tagName[1:]] = obj
  386. # utility function for determining if an argument is a type
  387. def _isType(self, arg):
  388. return isinstance(arg, DocStract.Type)
  389. # utility function for rendering arguments
  390. def _argPrint(self, args):
  391. return ("(" + str(len(args)) + "): ") + " | ".join([str(x)[:10] for x in args])[:40] + "..."
  392. class ParamTagHandler(TagHandler):
  393. mayRecur = True
  394. takesArg = True
  395. _nameAndDescPat = re.compile('^([\w.\[\]]+)?\s*(.*)$', re.S);
  396. # a pattern used to detect & strip optional brackets
  397. _optionalPat = re.compile('^\[(.*)\]$')
  398. def parse(self, args):
  399. p = { }
  400. # collapse two arg case into one arg
  401. if (len(args) == 2 and self._isType(args[0])):
  402. p['type'] = args[0].value
  403. args = args[1:]
  404. if len(args) == 1:
  405. m = self._nameAndDescPat.match(args[0])
  406. if not m or self._isType(args[0]):
  407. raise RuntimeError("Malformed args to %s: %s" %
  408. (self.tagName, self._argPrint(args)))
  409. if m.group(1):
  410. p['name'] = m.group(1)
  411. if m.group(2):
  412. p['desc'] = m.group(2)
  413. elif len(args) == 2:
  414. # @param name {type}
  415. if self._isType(args[0]) or not self._isType(args[1]):
  416. raise RuntimeError("Malformed args to %s: %s" %
  417. (self.tagName, self._argPrint(args)))
  418. p['name'] = args[0]
  419. p['type'] = args[1].value
  420. elif len(args) == 3:
  421. # this is
  422. # @param name {type} desc
  423. if self._isType(args[0]) or not self._isType(args[1]) or self._isType(args[2]):
  424. raise RuntimeError("Malformed args to %s: %s" %
  425. (self.tagName, self._argPrint(args)))
  426. p['name'] = args[0]
  427. p['type'] = args[1].value
  428. p['desc'] = args[2]
  429. else:
  430. raise RuntimeError("Malformed args to %s: %s" %
  431. (self.tagName, self._argPrint(args)))
  432. return p
  433. def _handleOptionalSyntax(self, obj):
  434. # handle optional syntax: [name]
  435. if ('name' in obj):
  436. m = self._optionalPat.match(obj['name'])
  437. if m:
  438. obj['name'] = m.group(1)
  439. obj['optional'] = True
  440. def attach(self, obj, current, blockType):
  441. self._handleOptionalSyntax(obj)
  442. if not 'params' in current:
  443. current['params'] = [ ]
  444. current['params'].append(obj)
  445. class SeeTagHandler(TagHandler):
  446. takesArg = True
  447. mayRecur = True
  448. def attach(self, obj, current, blockType):
  449. if not 'see' in current:
  450. current['see'] = [ ]
  451. current['see'].append(obj)
  452. class DescTagHandler(TagHandler):
  453. takesArg = True
  454. mayRecur = True
  455. def attach(self, obj, current, blockType):
  456. if 'desc' in current:
  457. current['desc'] = current['desc'] + "\n\n" + obj
  458. else:
  459. current['desc'] = obj
  460. class ReturnTagHandler(TagHandler):
  461. takesArg = True
  462. _pat = re.compile('^\s*(?:{(\w+)})?\s*(.*)$', re.S);
  463. def parse(self, args):
  464. rv = { }
  465. for a in args:
  466. if self._isType(a):
  467. if 'type' in rv:
  468. raise RuntimeError("Return type multiply decalared")
  469. rv['type'] = a.value
  470. else:
  471. if 'desc' in rv:
  472. raise RuntimeError("Bogus arguments to %s: %s" %
  473. (self.tagName, self._argPrint(args)))
  474. rv['desc'] = a
  475. return rv
  476. def attach(self, obj, current, blockType):
  477. # The only way this can occur (returns already defined) is if
  478. # someone added an extension that behaves badly, or if @type and
  479. # @returns occur in the same block.
  480. if 'returns' in current:
  481. for k in current['returns']:
  482. if k in obj:
  483. raise RuntimeError("Return %s redefined (@type and @returns in " % k +
  484. "same function block?)")
  485. else:
  486. current['returns'] = {}
  487. for k in obj:
  488. current['returns'][k] = obj[k]
  489. class TypeTagHandler(TagHandler):
  490. takesArg = True
  491. _isWordPat = re.compile('^\w+$', re.S);
  492. def parse(self, args):
  493. if len(args) > 1:
  494. raise RuntimeError("Bogus arguments to %s: %s" %
  495. (self.tagName, self._argPrint(args)))
  496. if self._isType(args[0]):
  497. args[0] = args[0].value.strip()
  498. else:
  499. m = self._isWordPat.match(args[0])
  500. if not m:
  501. raise RuntimeError("Bogus argument to %s: %s" % (self.tagName, args[0]))
  502. return args[0]
  503. # type is special. it means different things
  504. # when it occurs in a '@property' vs. a '@function'
  505. # context. in the former it's the property type, in
  506. # the later, it's an alias for '@return'
  507. def attach(self, obj, current, blockType):
  508. if (blockType == '@property'):
  509. current['type'] = obj
  510. else:
  511. if 'returns' not in current:
  512. current['returns'] = { }
  513. if 'type' in current['returns']:
  514. raise RuntimeError("Return type redefined (@type and @returns in " +
  515. "same function block?)")
  516. current['returns']['type'] = obj
  517. class ThrowsTagHandler(ReturnTagHandler):
  518. mayRecur = True
  519. def attach(self, obj, current, blockType):
  520. if 'throws' not in current:
  521. current['throws'] = [ ]
  522. current['throws'].append(obj)
  523. # a block handler is slightly different than a tag
  524. # handler. Each document block is of a certain type,
  525. # it describes *something*. Block handlers do
  526. # everything that TagHandlers do, but also:
  527. # * one block handler per code block, they're mutually
  528. # exclusive (a docblock can't describe a *function*
  529. # AND a *property*)
  530. # * express what tags may occur inside of them
  531. # * express what contexts they may occur in ('global'
  532. # and 'class' are the only two meaninful contexts at
  533. # present).
  534. class BlockHandler(TagHandler):
  535. allowedTags = [ ]
  536. allowedContexts = [ 'global', 'class' ]
  537. def merge(self, doc, parent, guessedName, context):
  538. for k in doc:
  539. parent[k] = doc[k]
  540. def setLineNumber(self, lineStart, lineEnd, doc):
  541. doc['source_lines'] = [ lineStart, lineEnd ]
  542. class ModuleBlockHandler(BlockHandler):
  543. allowedTags = [ '@desc', '@see' ]
  544. allowedContexts = [ 'global' ]
  545. takesArg = True
  546. _pat = re.compile('^(\w+)$|^(?:([\w.\[\]]+)\s*\n)?\s*(.*)$', re.S);
  547. def parse(self, args):
  548. if len(args) != 1:
  549. raise RuntimeError("You may not pass args (like, {string}) to %s" %
  550. self.tagName)
  551. text = args[0]
  552. m = self._pat.match(text)
  553. if not m:
  554. raise RuntimeError("Malformed args to %s: %s" %
  555. (self.tagName, (text[:20] + "...")))
  556. a = { }
  557. if m.group(1):
  558. a["name"] = m.group(1)
  559. else:
  560. if m.group(2):
  561. a["name"] = m.group(2)
  562. if m.group(3):
  563. a["desc"] = m.group(3)
  564. return a
  565. def attach(self, obj, current, blockType):
  566. if "name" in obj:
  567. current['module'] = obj["name"]
  568. if "desc" in obj:
  569. if "desc" in current:
  570. obj['desc'] = current['desc'] + "\n\n" + obj['desc']
  571. current['desc'] = obj['desc']
  572. def merge(self, doc, parent, guessedName, context):
  573. # first fields that we wish to not overwrite
  574. for f in doc:
  575. if f == 'desc':
  576. parent['desc'] = parent['desc'] + "\n\n" + doc['desc'] if 'desc' in parent else doc['desc']
  577. elif f == "module":
  578. parent['module'] = doc['module']
  579. elif (f in doc and f not in parent):
  580. parent[f] = doc[f]
  581. class FunctionBlockHandler(ModuleBlockHandler):
  582. allowedTags = [ '@see', '@param', '@return', '@throws', '@desc', '@type' ]
  583. allowedContexts = [ 'global', 'class' ]
  584. def attach(self, obj, current, blockType):
  585. if "name" in obj:
  586. current['name'] = obj["name"]
  587. if "desc" in obj:
  588. if "desc" in current:
  589. obj['desc'] = current['desc'] + "\n\n" + obj['desc']
  590. current['desc'] = obj['desc']
  591. def merge(self, doc, parent, guessedName, context):
  592. if "name" not in doc:
  593. doc['name'] = guessedName
  594. if doc['name'] == None:
  595. raise RuntimeError("can't determine function name")
  596. if not "functions" in parent:
  597. parent["functions"] = []
  598. for f in parent["functions"]:
  599. if doc["name"] == f['name']:
  600. raise RuntimeError("function '%s' redefined" % doc["name"])
  601. parent["functions"].append(doc)
  602. class ConstructorBlockHandler(BlockHandler):
  603. allowedTags = [ '@see', '@param', '@throws', '@desc', '@return', '@type' ]
  604. takesArg = True
  605. argOptional = True
  606. allowedContexts = [ 'class' ]
  607. def attach(self, obj, current, blockType):
  608. if obj:
  609. if "desc" in current:
  610. obj = current['desc'] + "\n\n" + obj
  611. current['desc'] = obj
  612. def merge(self, doc, parent, guessedName, context):
  613. if not "constructors" in parent:
  614. parent["constructors"] = []
  615. parent["constructors"].append(doc)
  616. class ClassBlockHandler(FunctionBlockHandler):
  617. allowedTags = [ '@see', '@desc' ]
  618. def merge(self, doc, parent, guessedName, context):
  619. if "name" not in doc:
  620. doc['name'] = guessedName
  621. return doc
  622. class EndClassBlockHandler(BlockHandler):
  623. allowedContexts = [ 'class' ]
  624. def attach(self, obj, current, blockType):
  625. pass
  626. def merge(self, doc, parent, guessedName, context):
  627. if not "classes" in parent:
  628. parent["classes"] = []
  629. for c in parent["classes"]:
  630. if doc["name"] == c['name']:
  631. raise RuntimeError("class '%s' redefined" % doc["name"])
  632. parent["classes"].append(doc)
  633. class TypedefBlockHandler(FunctionBlockHandler):
  634. allowedTags = [ ]
  635. allowedContexts = [ 'embedded' ]
  636. takesArg = True
  637. _pat = re.compile('^(\w+)$|^(?:([\w.\[\]]+)\s*\n)?\s*(.*)$', re.S);
  638. def parse(self, args):
  639. if len(args) != 1 or self._isType(args[0]):
  640. raise RuntimeError("%s accepts a string argument" % self.tagName)
  641. return args[0]
  642. def attach(self, obj, current, blockType):
  643. current['name'] = obj
  644. def merge(self, doc, parent, guessedName, context):
  645. parent['val'] = doc
  646. class EndTypedefBlockHandler(BlockHandler):
  647. allowedContexts = [ 'type' ]
  648. def attach(self, obj, current, blockType):
  649. pass
  650. def merge(self, doc, parent, guessedName, context):
  651. pass
  652. class PropertyBlockHandler(ParamTagHandler, BlockHandler):
  653. allowedTags = [ '@see', '@throws', '@desc', '@type' ]
  654. allowedContexts = [ 'type', 'class', 'global' ]
  655. def attach(self, obj, current, blockType):
  656. for x in obj:
  657. current[x] = obj[x]
  658. def merge(self, doc, parent, guessedName, context):
  659. if "name" not in doc:
  660. doc['name'] = guessedName
  661. if doc["name"] == None:
  662. raise RuntimeError("can't determine property name")
  663. if not "properties" in parent:
  664. parent["properties"] = []
  665. for p in parent["properties"]:
  666. if doc["name"] == p['name']:
  667. raise RuntimeError("property '%s' redefined" % doc["name"])
  668. if context == "type":
  669. self._handleOptionalSyntax(doc)
  670. parent["properties"].append(doc)
  671. # A type guesser that assumes the first documentation block of a source file is
  672. # probably a '@module' documentation block
  673. def firstBlockIsModuleTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  674. if '@module' in possibilities and firstBlock:
  675. return '@module'
  676. return None
  677. # A type guesser that checks the codeChunk for appearance of the keyword 'function'
  678. _functionKeywordPat = re.compile('(?<!\w)function(?!\w)');
  679. def isFunctionIfKeywordInCodeTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  680. if '@function' in possibilities and _functionKeywordPat.search(codeChunk):
  681. return '@function'
  682. return None
  683. # A type guesser that assumes '@property' based on the presence of @type and the absence of @return.
  684. def typeWithoutReturnsIsProbablyPropertyTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  685. if '@type' in tags and '@return' not in tags and '@property' in possibilities:
  686. return '@property'
  687. return None
  688. # a guesser which assumes if a documentation block occurs before an assignment, its probably a
  689. # property (this is a bit questionable, folks)
  690. _assignmentPat = re.compile('^.*=.*;\s*$', re.M);
  691. def assignmentIsProbablyPropertyTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  692. if '@property' in possibilities and _assignmentPat.match(codeChunk):
  693. return '@property'
  694. return None
  695. _hasGetSetterPat = re.compile('__define[GS]etter__');
  696. def hasGetSetterIsPropertyTypeGuesser(firstBlock, codeChunk, context, tags, possibilities):
  697. if '@property' in possibilities and _hasGetSetterPat.search(codeChunk):
  698. return '@property'
  699. return None
  700. # A name guesser that looks for exports.XXX and assumes XXX is the name we want
  701. # define the pattern globally in this module so we don't recompile it all the time
  702. _findExportsPat = re.compile('(?:^|\s)exports\.(\w+)\s', re.M);
  703. def commonJSNameGuesser(codeChunk, blockType):
  704. m = _findExportsPat.search(codeChunk)
  705. if m:
  706. return m.group(1)
  707. return None
  708. # A name guesser that catches assignment to properties and guesses the name based
  709. # on that. like `this.foo` or `stream.bar`. Very general, but requires rooting
  710. # at the beginning of line, whereas exports guesser does not
  711. _findPropPat = re.compile('^\s*\w+\.(\w+)\s*=', re.M);
  712. def assignToPropertyNameGuesser(codeChunk, blockType):
  713. m = _findPropPat.search(codeChunk)
  714. if m:
  715. return m.group(1)
  716. return None
  717. _standardFunctionPat = re.compile('^\s*function\s*(\w+)\(.*$');
  718. def standardFunctionNameGuesser(codeChunk, blockType):
  719. m = _standardFunctionPat.match(codeChunk)
  720. if m:
  721. return m.group(1)
  722. return None
  723. _objectPropertyPat = re.compile('^\s*(\w+)\s*:.*$');
  724. def objectPropertyNameGuesser(codeChunk, blockType):
  725. m = _objectPropertyPat.match(codeChunk)
  726. if m:
  727. return m.group(1)
  728. return None
  729. _getSetterNameGuesserPat = re.compile(r'''__define[GS]etter__\s* \( \s* (?:"(\w+)" | '(\w+)') ''', re.X);
  730. def getSetterNameGuesser(codeChunk, blockType):
  731. m = _getSetterNameGuesserPat.search(codeChunk)
  732. if m:
  733. return m.group(1) if m.group(1) else m.group(2)
  734. return None
  735. if __name__ == '__main__':
  736. import sys
  737. import json
  738. ds = DocStract()
  739. docs = None
  740. if len (sys.argv) == 2:
  741. docs = ds.extractFromFile(sys.argv[1])
  742. elif len (sys.argv) == 1:
  743. docs = ds.extract(sys.stdin.read())
  744. else:
  745. print >> sys.stderr, "Usage: docstract [file]"
  746. sys.exit(1)
  747. print json.dumps(docs, indent=2, sort_keys=True) + "\n"