PageRenderTime 80ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 0ms

/doc/sphinxext/numpy_ext_old/docscrape.py

https://github.com/ronnix/scikit-learn
Python | 492 lines | 481 code | 1 blank | 10 comment | 0 complexity | 7fac197fef77c17caafe431ab371aace MD5 | raw file
  1. """Extract reference documentation from the NumPy source tree.
  2. """
  3. import inspect
  4. import textwrap
  5. import re
  6. import pydoc
  7. from StringIO import StringIO
  8. from warnings import warn
  9. 4
  10. class Reader(object):
  11. """A line-based string reader.
  12. """
  13. def __init__(self, data):
  14. """
  15. Parameters
  16. ----------
  17. data : str
  18. String with lines separated by '\n'.
  19. """
  20. if isinstance(data,list):
  21. self._str = data
  22. else:
  23. self._str = data.split('\n') # store string as list of lines
  24. self.reset()
  25. def __getitem__(self, n):
  26. return self._str[n]
  27. def reset(self):
  28. self._l = 0 # current line nr
  29. def read(self):
  30. if not self.eof():
  31. out = self[self._l]
  32. self._l += 1
  33. return out
  34. else:
  35. return ''
  36. def seek_next_non_empty_line(self):
  37. for l in self[self._l:]:
  38. if l.strip():
  39. break
  40. else:
  41. self._l += 1
  42. def eof(self):
  43. return self._l >= len(self._str)
  44. def read_to_condition(self, condition_func):
  45. start = self._l
  46. for line in self[start:]:
  47. if condition_func(line):
  48. return self[start:self._l]
  49. self._l += 1
  50. if self.eof():
  51. return self[start:self._l+1]
  52. return []
  53. def read_to_next_empty_line(self):
  54. self.seek_next_non_empty_line()
  55. def is_empty(line):
  56. return not line.strip()
  57. return self.read_to_condition(is_empty)
  58. def read_to_next_unindented_line(self):
  59. def is_unindented(line):
  60. return (line.strip() and (len(line.lstrip()) == len(line)))
  61. return self.read_to_condition(is_unindented)
  62. def peek(self,n=0):
  63. if self._l + n < len(self._str):
  64. return self[self._l + n]
  65. else:
  66. return ''
  67. def is_empty(self):
  68. return not ''.join(self._str).strip()
  69. class NumpyDocString(object):
  70. def __init__(self,docstring):
  71. docstring = textwrap.dedent(docstring).split('\n')
  72. self._doc = Reader(docstring)
  73. self._parsed_data = {
  74. 'Signature': '',
  75. 'Summary': [''],
  76. 'Extended Summary': [],
  77. 'Parameters': [],
  78. 'Returns': [],
  79. 'Raises': [],
  80. 'Warns': [],
  81. 'Other Parameters': [],
  82. 'Attributes': [],
  83. 'Methods': [],
  84. 'See Also': [],
  85. 'Notes': [],
  86. 'Warnings': [],
  87. 'References': '',
  88. 'Examples': '',
  89. 'index': {}
  90. }
  91. self._parse()
  92. def __getitem__(self,key):
  93. return self._parsed_data[key]
  94. def __setitem__(self,key,val):
  95. if not self._parsed_data.has_key(key):
  96. warn("Unknown section %s" % key)
  97. else:
  98. self._parsed_data[key] = val
  99. def _is_at_section(self):
  100. self._doc.seek_next_non_empty_line()
  101. if self._doc.eof():
  102. return False
  103. l1 = self._doc.peek().strip() # e.g. Parameters
  104. if l1.startswith('.. index::'):
  105. return True
  106. l2 = self._doc.peek(1).strip() # ---------- or ==========
  107. return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
  108. def _strip(self,doc):
  109. i = 0
  110. j = 0
  111. for i,line in enumerate(doc):
  112. if line.strip(): break
  113. for j,line in enumerate(doc[::-1]):
  114. if line.strip(): break
  115. return doc[i:len(doc)-j]
  116. def _read_to_next_section(self):
  117. section = self._doc.read_to_next_empty_line()
  118. while not self._is_at_section() and not self._doc.eof():
  119. if not self._doc.peek(-1).strip(): # previous line was empty
  120. section += ['']
  121. section += self._doc.read_to_next_empty_line()
  122. return section
  123. def _read_sections(self):
  124. while not self._doc.eof():
  125. data = self._read_to_next_section()
  126. name = data[0].strip()
  127. if name.startswith('..'): # index section
  128. yield name, data[1:]
  129. elif len(data) < 2:
  130. yield StopIteration
  131. else:
  132. yield name, self._strip(data[2:])
  133. def _parse_param_list(self,content):
  134. r = Reader(content)
  135. params = []
  136. while not r.eof():
  137. header = r.read().strip()
  138. if ' : ' in header:
  139. arg_name, arg_type = header.split(' : ')[:2]
  140. else:
  141. arg_name, arg_type = header, ''
  142. desc = r.read_to_next_unindented_line()
  143. desc = dedent_lines(desc)
  144. params.append((arg_name,arg_type,desc))
  145. return params
  146. _name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|"
  147. r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X)
  148. def _parse_see_also(self, content):
  149. """
  150. func_name : Descriptive text
  151. continued text
  152. another_func_name : Descriptive text
  153. func_name1, func_name2, :meth:`func_name`, func_name3
  154. """
  155. items = []
  156. def parse_item_name(text):
  157. """Match ':role:`name`' or 'name'"""
  158. m = self._name_rgx.match(text)
  159. if m:
  160. g = m.groups()
  161. if g[1] is None:
  162. return g[3], None
  163. else:
  164. return g[2], g[1]
  165. raise ValueError("%s is not a item name" % text)
  166. def push_item(name, rest):
  167. if not name:
  168. return
  169. name, role = parse_item_name(name)
  170. items.append((name, list(rest), role))
  171. del rest[:]
  172. current_func = None
  173. rest = []
  174. for line in content:
  175. if not line.strip(): continue
  176. m = self._name_rgx.match(line)
  177. if m and line[m.end():].strip().startswith(':'):
  178. push_item(current_func, rest)
  179. current_func, line = line[:m.end()], line[m.end():]
  180. rest = [line.split(':', 1)[1].strip()]
  181. if not rest[0]:
  182. rest = []
  183. elif not line.startswith(' '):
  184. push_item(current_func, rest)
  185. current_func = None
  186. if ',' in line:
  187. for func in line.split(','):
  188. push_item(func, [])
  189. elif line.strip():
  190. current_func = line
  191. elif current_func is not None:
  192. rest.append(line.strip())
  193. push_item(current_func, rest)
  194. return items
  195. def _parse_index(self, section, content):
  196. """
  197. .. index: default
  198. :refguide: something, else, and more
  199. """
  200. def strip_each_in(lst):
  201. return [s.strip() for s in lst]
  202. out = {}
  203. section = section.split('::')
  204. if len(section) > 1:
  205. out['default'] = strip_each_in(section[1].split(','))[0]
  206. for line in content:
  207. line = line.split(':')
  208. if len(line) > 2:
  209. out[line[1]] = strip_each_in(line[2].split(','))
  210. return out
  211. def _parse_summary(self):
  212. """Grab signature (if given) and summary"""
  213. if self._is_at_section():
  214. return
  215. summary = self._doc.read_to_next_empty_line()
  216. summary_str = " ".join([s.strip() for s in summary]).strip()
  217. if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str):
  218. self['Signature'] = summary_str
  219. if not self._is_at_section():
  220. self['Summary'] = self._doc.read_to_next_empty_line()
  221. else:
  222. self['Summary'] = summary
  223. if not self._is_at_section():
  224. self['Extended Summary'] = self._read_to_next_section()
  225. def _parse(self):
  226. self._doc.reset()
  227. self._parse_summary()
  228. for (section,content) in self._read_sections():
  229. if not section.startswith('..'):
  230. section = ' '.join([s.capitalize() for s in section.split(' ')])
  231. if section in ('Parameters', 'Attributes', 'Methods',
  232. 'Returns', 'Raises', 'Warns'):
  233. self[section] = self._parse_param_list(content)
  234. elif section.startswith('.. index::'):
  235. self['index'] = self._parse_index(section, content)
  236. elif section == 'See Also':
  237. self['See Also'] = self._parse_see_also(content)
  238. else:
  239. self[section] = content
  240. # string conversion routines
  241. def _str_header(self, name, symbol='-'):
  242. return [name, len(name)*symbol]
  243. def _str_indent(self, doc, indent=4):
  244. out = []
  245. for line in doc:
  246. out += [' '*indent + line]
  247. return out
  248. def _str_signature(self):
  249. if self['Signature']:
  250. return [self['Signature'].replace('*','\*')] + ['']
  251. else:
  252. return ['']
  253. def _str_summary(self):
  254. if self['Summary']:
  255. return self['Summary'] + ['']
  256. else:
  257. return []
  258. def _str_extended_summary(self):
  259. if self['Extended Summary']:
  260. return self['Extended Summary'] + ['']
  261. else:
  262. return []
  263. def _str_param_list(self, name):
  264. out = []
  265. if self[name]:
  266. out += self._str_header(name)
  267. for param,param_type,desc in self[name]:
  268. out += ['%s : %s' % (param, param_type)]
  269. out += self._str_indent(desc)
  270. out += ['']
  271. return out
  272. def _str_section(self, name):
  273. out = []
  274. if self[name]:
  275. out += self._str_header(name)
  276. out += self[name]
  277. out += ['']
  278. return out
  279. def _str_see_also(self, func_role):
  280. if not self['See Also']: return []
  281. out = []
  282. out += self._str_header("See Also")
  283. last_had_desc = True
  284. for func, desc, role in self['See Also']:
  285. if role:
  286. link = ':%s:`%s`' % (role, func)
  287. elif func_role:
  288. link = ':%s:`%s`' % (func_role, func)
  289. else:
  290. link = "`%s`_" % func
  291. if desc or last_had_desc:
  292. out += ['']
  293. out += [link]
  294. else:
  295. out[-1] += ", %s" % link
  296. if desc:
  297. out += self._str_indent([' '.join(desc)])
  298. last_had_desc = True
  299. else:
  300. last_had_desc = False
  301. out += ['']
  302. return out
  303. def _str_index(self):
  304. idx = self['index']
  305. out = []
  306. out += ['.. index:: %s' % idx.get('default','')]
  307. for section, references in idx.iteritems():
  308. if section == 'default':
  309. continue
  310. out += [' :%s: %s' % (section, ', '.join(references))]
  311. return out
  312. def __str__(self, func_role=''):
  313. out = []
  314. out += self._str_signature()
  315. out += self._str_summary()
  316. out += self._str_extended_summary()
  317. for param_list in ('Parameters','Returns','Raises'):
  318. out += self._str_param_list(param_list)
  319. out += self._str_section('Warnings')
  320. out += self._str_see_also(func_role)
  321. for s in ('Notes','References','Examples'):
  322. out += self._str_section(s)
  323. out += self._str_index()
  324. return '\n'.join(out)
  325. def indent(str,indent=4):
  326. indent_str = ' '*indent
  327. if str is None:
  328. return indent_str
  329. lines = str.split('\n')
  330. return '\n'.join(indent_str + l for l in lines)
  331. def dedent_lines(lines):
  332. """Deindent a list of lines maximally"""
  333. return textwrap.dedent("\n".join(lines)).split("\n")
  334. def header(text, style='-'):
  335. return text + '\n' + style*len(text) + '\n'
  336. class FunctionDoc(NumpyDocString):
  337. def __init__(self, func, role='func'):
  338. self._f = func
  339. self._role = role # e.g. "func" or "meth"
  340. try:
  341. NumpyDocString.__init__(self,inspect.getdoc(func) or '')
  342. except ValueError, e:
  343. print '*'*78
  344. print "ERROR: '%s' while parsing `%s`" % (e, self._f)
  345. print '*'*78
  346. #print "Docstring follows:"
  347. #print doclines
  348. #print '='*78
  349. if not self['Signature']:
  350. func, func_name = self.get_func()
  351. try:
  352. # try to read signature
  353. argspec = inspect.getargspec(func)
  354. argspec = inspect.formatargspec(*argspec)
  355. argspec = argspec.replace('*','\*')
  356. signature = '%s%s' % (func_name, argspec)
  357. except TypeError, e:
  358. signature = '%s()' % func_name
  359. self['Signature'] = signature
  360. def get_func(self):
  361. func_name = getattr(self._f, '__name__', self.__class__.__name__)
  362. if inspect.isclass(self._f):
  363. func = getattr(self._f, '__call__', self._f.__init__)
  364. else:
  365. func = self._f
  366. return func, func_name
  367. def __str__(self):
  368. out = ''
  369. func, func_name = self.get_func()
  370. signature = self['Signature'].replace('*', '\*')
  371. roles = {'func': 'function',
  372. 'meth': 'method'}
  373. if self._role:
  374. if not roles.has_key(self._role):
  375. print "Warning: invalid role %s" % self._role
  376. out += '.. %s:: %s\n \n\n' % (roles.get(self._role,''),
  377. func_name)
  378. out += super(FunctionDoc, self).__str__(func_role=self._role)
  379. return out
  380. class ClassDoc(NumpyDocString):
  381. def __init__(self,cls,modulename='',func_doc=FunctionDoc):
  382. if not inspect.isclass(cls):
  383. raise ValueError("Initialise using a class. Got %r" % cls)
  384. self._cls = cls
  385. if modulename and not modulename.endswith('.'):
  386. modulename += '.'
  387. self._mod = modulename
  388. self._name = cls.__name__
  389. self._func_doc = func_doc
  390. NumpyDocString.__init__(self, pydoc.getdoc(cls))
  391. @property
  392. def methods(self):
  393. return [name for name,func in inspect.getmembers(self._cls)
  394. if not name.startswith('_') and callable(func)]
  395. def __str__(self):
  396. out = ''
  397. out += super(ClassDoc, self).__str__()
  398. out += "\n\n"
  399. #for m in self.methods:
  400. # print "Parsing `%s`" % m
  401. # out += str(self._func_doc(getattr(self._cls,m), 'meth')) + '\n\n'
  402. # out += '.. index::\n single: %s; %s\n\n' % (self._name, m)
  403. return out