PageRenderTime 66ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/benchmark/app/rdoc-2.4.3/lib/rdoc/parser/c.rb

http://github.com/rubinius/rubinius
Ruby | 678 lines | 427 code | 80 blank | 171 comment | 51 complexity | e65f8f4719f9d4485cd3691cb1c671d7 MD5 | raw file
Possible License(s): BSD-3-Clause, MPL-2.0-no-copyleft-exception, 0BSD, GPL-2.0, LGPL-2.1
  1. require 'rdoc/parser'
  2. require 'rdoc/parser/ruby'
  3. require 'rdoc/known_classes'
  4. ##
  5. # We attempt to parse C extension files. Basically we look for
  6. # the standard patterns that you find in extensions: <tt>rb_define_class,
  7. # rb_define_method</tt> and so on. We also try to find the corresponding
  8. # C source for the methods and extract comments, but if we fail
  9. # we don't worry too much.
  10. #
  11. # The comments associated with a Ruby method are extracted from the C
  12. # comment block associated with the routine that _implements_ that
  13. # method, that is to say the method whose name is given in the
  14. # <tt>rb_define_method</tt> call. For example, you might write:
  15. #
  16. # /*
  17. # * Returns a new array that is a one-dimensional flattening of this
  18. # * array (recursively). That is, for every element that is an array,
  19. # * extract its elements into the new array.
  20. # *
  21. # * s = [ 1, 2, 3 ] #=> [1, 2, 3]
  22. # * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]]
  23. # * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10]
  24. # * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  25. # */
  26. # static VALUE
  27. # rb_ary_flatten(ary)
  28. # VALUE ary;
  29. # {
  30. # ary = rb_obj_dup(ary);
  31. # rb_ary_flatten_bang(ary);
  32. # return ary;
  33. # }
  34. #
  35. # ...
  36. #
  37. # void
  38. # Init_Array()
  39. # {
  40. # ...
  41. # rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
  42. #
  43. # Here RDoc will determine from the rb_define_method line that there's a
  44. # method called "flatten" in class Array, and will look for the implementation
  45. # in the method rb_ary_flatten. It will then use the comment from that
  46. # method in the HTML output. This method must be in the same source file
  47. # as the rb_define_method.
  48. #
  49. # C classes can be diagrammed (see /tc/dl/ruby/ruby/error.c), and RDoc
  50. # integrates C and Ruby source into one tree
  51. #
  52. # The comment blocks may include special directives:
  53. #
  54. # [Document-class: <i>name</i>]
  55. # This comment block is documentation for the given class. Use this
  56. # when the <tt>Init_xxx</tt> method is not named after the class.
  57. #
  58. # [Document-method: <i>name</i>]
  59. # This comment documents the named method. Use when RDoc cannot
  60. # automatically find the method from it's declaration
  61. #
  62. # [call-seq: <i>text up to an empty line</i>]
  63. # Because C source doesn't give descripive names to Ruby-level parameters,
  64. # you need to document the calling sequence explicitly
  65. #
  66. # In addition, RDoc assumes by default that the C method implementing a
  67. # Ruby function is in the same source file as the rb_define_method call.
  68. # If this isn't the case, add the comment:
  69. #
  70. # rb_define_method(....); // in: filename
  71. #
  72. # As an example, we might have an extension that defines multiple classes
  73. # in its Init_xxx method. We could document them using
  74. #
  75. # /*
  76. # * Document-class: MyClass
  77. # *
  78. # * Encapsulate the writing and reading of the configuration
  79. # * file. ...
  80. # */
  81. #
  82. # /*
  83. # * Document-method: read_value
  84. # *
  85. # * call-seq:
  86. # * cfg.read_value(key) -> value
  87. # * cfg.read_value(key} { |key| } -> value
  88. # *
  89. # * Return the value corresponding to +key+ from the configuration.
  90. # * In the second form, if the key isn't found, invoke the
  91. # * block and return its value.
  92. # */
  93. class RDoc::Parser::C < RDoc::Parser
  94. parse_files_matching(/\.(?:([CcHh])\1?|c([+xp])\2|y)\z/)
  95. ##
  96. # C file the parser is parsing
  97. attr_accessor :content
  98. ##
  99. # Resets cross-file state. Call when parsing different projects that need
  100. # separate documentation.
  101. def self.reset
  102. @@enclosure_classes = {}
  103. @@known_bodies = {}
  104. end
  105. reset
  106. ##
  107. # Prepare to parse a C file
  108. def initialize(top_level, file_name, content, options, stats)
  109. super
  110. @known_classes = RDoc::KNOWN_CLASSES.dup
  111. @content = handle_tab_width handle_ifdefs_in(@content)
  112. @classes = Hash.new
  113. @file_dir = File.dirname(@file_name)
  114. end
  115. def do_aliases
  116. @content.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do
  117. |var_name, new_name, old_name|
  118. class_name = @known_classes[var_name] || var_name
  119. class_obj = find_class(var_name, class_name)
  120. as = class_obj.add_alias RDoc::Alias.new("", old_name, new_name, "")
  121. @stats.add_alias as
  122. end
  123. end
  124. def do_classes
  125. @content.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do
  126. |var_name, class_name|
  127. handle_class_module(var_name, "module", class_name, nil, nil)
  128. end
  129. # The '.' lets us handle SWIG-generated files
  130. @content.scan(/([\w\.]+)\s* = \s*rb_define_class\s*
  131. \(
  132. \s*"(\w+)",
  133. \s*(\w+)\s*
  134. \)/mx) do |var_name, class_name, parent|
  135. handle_class_module(var_name, "class", class_name, parent, nil)
  136. end
  137. @content.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do
  138. |var_name, class_name, parent|
  139. parent = nil if parent == "0"
  140. handle_class_module(var_name, "class", class_name, parent, nil)
  141. end
  142. @content.scan(/(\w+)\s* = \s*rb_define_module_under\s*
  143. \(
  144. \s*(\w+),
  145. \s*"(\w+)"
  146. \s*\)/mx) do |var_name, in_module, class_name|
  147. handle_class_module(var_name, "module", class_name, nil, in_module)
  148. end
  149. @content.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s*
  150. \(
  151. \s*(\w+),
  152. \s*"(\w+)",
  153. \s*([\w\*\s\(\)\.\->]+)\s* # for SWIG
  154. \s*\)/mx) do |var_name, in_module, class_name, parent|
  155. handle_class_module(var_name, "class", class_name, parent, in_module)
  156. end
  157. end
  158. def do_constants
  159. @content.scan(%r{\Wrb_define_
  160. (
  161. variable |
  162. readonly_variable |
  163. const |
  164. global_const |
  165. )
  166. \s*\(
  167. (?:\s*(\w+),)?
  168. \s*"(\w+)",
  169. \s*(.*?)\s*\)\s*;
  170. }xm) do |type, var_name, const_name, definition|
  171. var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel"
  172. handle_constants(type, var_name, const_name, definition)
  173. end
  174. end
  175. ##
  176. # Look for includes of the form:
  177. #
  178. # rb_include_module(rb_cArray, rb_mEnumerable);
  179. def do_includes
  180. @content.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m|
  181. if cls = @classes[c]
  182. m = @known_classes[m] || m
  183. cls.add_include RDoc::Include.new(m, "")
  184. end
  185. end
  186. end
  187. def do_methods
  188. @content.scan(%r{rb_define_
  189. (
  190. singleton_method |
  191. method |
  192. module_function |
  193. private_method
  194. )
  195. \s*\(\s*([\w\.]+),
  196. \s*"([^"]+)",
  197. \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
  198. \s*(-?\w+)\s*\)
  199. (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
  200. }xm) do
  201. |type, var_name, meth_name, meth_body, param_count, source_file|
  202. # Ignore top-object and weird struct.c dynamic stuff
  203. next if var_name == "ruby_top_self"
  204. next if var_name == "nstr"
  205. next if var_name == "envtbl"
  206. next if var_name == "argf" # it'd be nice to handle this one
  207. var_name = "rb_cObject" if var_name == "rb_mKernel"
  208. handle_method(type, var_name, meth_name,
  209. meth_body, param_count, source_file)
  210. end
  211. @content.scan(%r{rb_define_attr\(
  212. \s*([\w\.]+),
  213. \s*"([^"]+)",
  214. \s*(\d+),
  215. \s*(\d+)\s*\);
  216. }xm) do |var_name, attr_name, attr_reader, attr_writer|
  217. #var_name = "rb_cObject" if var_name == "rb_mKernel"
  218. handle_attr(var_name, attr_name,
  219. attr_reader.to_i != 0,
  220. attr_writer.to_i != 0)
  221. end
  222. @content.scan(%r{rb_define_global_function\s*\(
  223. \s*"([^"]+)",
  224. \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
  225. \s*(-?\w+)\s*\)
  226. (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
  227. }xm) do |meth_name, meth_body, param_count, source_file|
  228. handle_method("method", "rb_mKernel", meth_name,
  229. meth_body, param_count, source_file)
  230. end
  231. @content.scan(/define_filetest_function\s*\(
  232. \s*"([^"]+)",
  233. \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
  234. \s*(-?\w+)\s*\)/xm) do
  235. |meth_name, meth_body, param_count|
  236. handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count)
  237. handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count)
  238. end
  239. end
  240. def find_attr_comment(attr_name)
  241. if @content =~ %r{((?>/\*.*?\*/\s+))
  242. rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi
  243. $1
  244. elsif @content =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m
  245. $1
  246. else
  247. ''
  248. end
  249. end
  250. ##
  251. # Find the C code corresponding to a Ruby method
  252. def find_body(class_name, meth_name, meth_obj, body, quiet = false)
  253. case body
  254. when %r"((?>/\*.*?\*/\s*))((?:(?:static|SWIGINTERN)\s+)?(?:intern\s+)?VALUE\s+#{meth_name}
  255. \s*(\([^)]*\))([^;]|$))"xm
  256. comment = $1
  257. body_text = $2
  258. params = $3
  259. remove_private_comments comment if comment
  260. # see if we can find the whole body
  261. re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}'
  262. body_text = $& if /#{re}/m =~ body
  263. # The comment block may have been overridden with a 'Document-method'
  264. # block. This happens in the interpreter when multiple methods are
  265. # vectored through to the same C method but those methods are logically
  266. # distinct (for example Kernel.hash and Kernel.object_id share the same
  267. # implementation
  268. override_comment = find_override_comment class_name, meth_obj.name
  269. comment = override_comment if override_comment
  270. find_modifiers comment, meth_obj if comment
  271. # meth_obj.params = params
  272. meth_obj.start_collecting_tokens
  273. meth_obj.add_token RDoc::RubyToken::Token.new(1,1).set_text(body_text)
  274. meth_obj.comment = mangle_comment comment
  275. when %r{((?>/\*.*?\*/\s*))^\s*(\#\s*define\s+#{meth_name}\s+(\w+))}m
  276. comment = $1
  277. body_text = $2
  278. find_body class_name, $3, meth_obj, body, true
  279. find_modifiers comment, meth_obj
  280. meth_obj.start_collecting_tokens
  281. meth_obj.add_token RDoc::RubyToken::Token.new(1,1).set_text(body_text)
  282. meth_obj.comment = mangle_comment(comment) + meth_obj.comment.to_s
  283. when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
  284. unless find_body(class_name, $1, meth_obj, body, true)
  285. warn "No definition for #{meth_name}" unless @options.quiet
  286. return false
  287. end
  288. else
  289. # No body, but might still have an override comment
  290. comment = find_override_comment(class_name, meth_obj.name)
  291. if comment
  292. find_modifiers(comment, meth_obj)
  293. meth_obj.comment = mangle_comment(comment)
  294. else
  295. warn "No definition for #{meth_name}" unless @options.quiet
  296. return false
  297. end
  298. end
  299. true
  300. end
  301. def find_class(raw_name, name)
  302. unless @classes[raw_name]
  303. if raw_name =~ /^rb_m/
  304. container = @top_level.add_module RDoc::NormalModule, name
  305. else
  306. container = @top_level.add_class RDoc::NormalClass, name
  307. end
  308. container.record_location @top_level
  309. @classes[raw_name] = container
  310. end
  311. @classes[raw_name]
  312. end
  313. ##
  314. # Look for class or module documentation above Init_+class_name+(void),
  315. # in a Document-class +class_name+ (or module) comment or above an
  316. # rb_define_class (or module). If a comment is supplied above a matching
  317. # Init_ and a rb_define_class the Init_ comment is used.
  318. #
  319. # /*
  320. # * This is a comment for Foo
  321. # */
  322. # Init_Foo(void) {
  323. # VALUE cFoo = rb_define_class("Foo", rb_cObject);
  324. # }
  325. #
  326. # /*
  327. # * Document-class: Foo
  328. # * This is a comment for Foo
  329. # */
  330. # Init_foo(void) {
  331. # VALUE cFoo = rb_define_class("Foo", rb_cObject);
  332. # }
  333. #
  334. # /*
  335. # * This is a comment for Foo
  336. # */
  337. # VALUE cFoo = rb_define_class("Foo", rb_cObject);
  338. def find_class_comment(class_name, class_meth)
  339. comment = nil
  340. if @content =~ %r{((?>/\*.*?\*/\s+))
  341. (static\s+)?void\s+Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)\)}xmi then
  342. comment = $1
  343. elsif @content =~ %r{Document-(?:class|module):\s+#{class_name}\s*?(?:<\s+[:,\w]+)?\n((?>.*?\*/))}m then
  344. comment = $1
  345. elsif @content =~ %r{((?>/\*.*?\*/\s+))
  346. ([\w\.\s]+\s* = \s+)?rb_define_(class|module).*?"(#{class_name})"}xm then
  347. comment = $1
  348. end
  349. class_meth.comment = mangle_comment comment if comment
  350. end
  351. ##
  352. # Finds a comment matching +type+ and +const_name+ either above the
  353. # comment or in the matching Document- section.
  354. def find_const_comment(type, const_name)
  355. if @content =~ %r{((?>^\s*/\*.*?\*/\s+))
  356. rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi
  357. $1
  358. elsif @content =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m
  359. $1
  360. else
  361. ''
  362. end
  363. end
  364. ##
  365. # If the comment block contains a section that looks like:
  366. #
  367. # call-seq:
  368. # Array.new
  369. # Array.new(10)
  370. #
  371. # use it for the parameters.
  372. def find_modifiers(comment, meth_obj)
  373. if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or
  374. comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '')
  375. meth_obj.document_self = false
  376. end
  377. if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or
  378. comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '')
  379. seq = $1
  380. seq.gsub!(/^\s*\*\s*/, '')
  381. meth_obj.call_seq = seq
  382. end
  383. end
  384. def find_override_comment(class_name, meth_name)
  385. name = Regexp.escape(meth_name)
  386. if @content =~ %r{Document-method:\s+#{class_name}(?:\.|::|#)#{name}\s*?\n((?>.*?\*/))}m then
  387. $1
  388. elsif @content =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m then
  389. $1
  390. end
  391. end
  392. def handle_attr(var_name, attr_name, reader, writer)
  393. rw = ''
  394. if reader
  395. #@stats.num_methods += 1
  396. rw << 'R'
  397. end
  398. if writer
  399. #@stats.num_methods += 1
  400. rw << 'W'
  401. end
  402. class_name = @known_classes[var_name]
  403. return unless class_name
  404. class_obj = find_class(var_name, class_name)
  405. if class_obj
  406. comment = find_attr_comment(attr_name)
  407. unless comment.empty?
  408. comment = mangle_comment(comment)
  409. end
  410. att = RDoc::Attr.new '', attr_name, rw, comment
  411. class_obj.add_attribute(att)
  412. end
  413. end
  414. def handle_class_module(var_name, class_mod, class_name, parent, in_module)
  415. parent_name = @known_classes[parent] || parent
  416. if in_module
  417. enclosure = @classes[in_module] || @@enclosure_classes[in_module]
  418. unless enclosure
  419. if enclosure = @known_classes[in_module]
  420. handle_class_module(in_module, (/^rb_m/ =~ in_module ? "module" : "class"),
  421. enclosure, nil, nil)
  422. enclosure = @classes[in_module]
  423. end
  424. end
  425. unless enclosure
  426. warn("Enclosing class/module '#{in_module}' for " +
  427. "#{class_mod} #{class_name} not known")
  428. return
  429. end
  430. else
  431. enclosure = @top_level
  432. end
  433. if class_mod == "class" then
  434. full_name = if RDoc::ClassModule === enclosure then
  435. enclosure.full_name + "::#{class_name}"
  436. else
  437. class_name
  438. end
  439. if @content =~ %r{Document-class:\s+#{full_name}\s*<\s+([:,\w]+)} then
  440. parent_name = $1
  441. end
  442. cm = enclosure.add_class RDoc::NormalClass, class_name, parent_name
  443. @stats.add_class cm
  444. else
  445. cm = enclosure.add_module RDoc::NormalModule, class_name
  446. @stats.add_module cm
  447. end
  448. cm.record_location enclosure.top_level
  449. find_class_comment cm.full_name, cm
  450. @classes[var_name] = cm
  451. @@enclosure_classes[var_name] = cm
  452. @known_classes[var_name] = cm.full_name
  453. end
  454. ##
  455. # Adds constant comments. By providing some_value: at the start ofthe
  456. # comment you can override the C value of the comment to give a friendly
  457. # definition.
  458. #
  459. # /* 300: The perfect score in bowling */
  460. # rb_define_const(cFoo, "PERFECT", INT2FIX(300);
  461. #
  462. # Will override +INT2FIX(300)+ with the value +300+ in the output RDoc.
  463. # Values may include quotes and escaped colons (\:).
  464. def handle_constants(type, var_name, const_name, definition)
  465. #@stats.num_constants += 1
  466. class_name = @known_classes[var_name]
  467. return unless class_name
  468. class_obj = find_class(var_name, class_name)
  469. unless class_obj
  470. warn("Enclosing class/module '#{const_name}' for not known")
  471. return
  472. end
  473. comment = find_const_comment(type, const_name)
  474. # In the case of rb_define_const, the definition and comment are in
  475. # "/* definition: comment */" form. The literal ':' and '\' characters
  476. # can be escaped with a backslash.
  477. if type.downcase == 'const' then
  478. elements = mangle_comment(comment).split(':')
  479. if elements.nil? or elements.empty? then
  480. con = RDoc::Constant.new(const_name, definition,
  481. mangle_comment(comment))
  482. else
  483. new_definition = elements[0..-2].join(':')
  484. if new_definition.empty? then # Default to literal C definition
  485. new_definition = definition
  486. else
  487. new_definition.gsub!("\:", ":")
  488. new_definition.gsub!("\\", '\\')
  489. end
  490. new_definition.sub!(/\A(\s+)/, '')
  491. new_comment = $1.nil? ? elements.last : "#{$1}#{elements.last.lstrip}"
  492. con = RDoc::Constant.new(const_name, new_definition,
  493. mangle_comment(new_comment))
  494. end
  495. else
  496. con = RDoc::Constant.new const_name, definition, mangle_comment(comment)
  497. end
  498. class_obj.add_constant(con)
  499. end
  500. ##
  501. # Removes #ifdefs that would otherwise confuse us
  502. def handle_ifdefs_in(body)
  503. body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m, '\1')
  504. end
  505. def handle_method(type, var_name, meth_name, meth_body, param_count,
  506. source_file = nil)
  507. class_name = @known_classes[var_name]
  508. return unless class_name
  509. class_obj = find_class var_name, class_name
  510. if class_obj then
  511. if meth_name == "initialize" then
  512. meth_name = "new"
  513. type = "singleton_method"
  514. end
  515. meth_obj = RDoc::AnyMethod.new '', meth_name
  516. meth_obj.singleton = %w[singleton_method module_function].include? type
  517. p_count = (Integer(param_count) rescue -1)
  518. if p_count < 0
  519. meth_obj.params = "(...)"
  520. elsif p_count == 0
  521. meth_obj.params = "()"
  522. else
  523. meth_obj.params = "(" + (1..p_count).map{|i| "p#{i}"}.join(", ") + ")"
  524. end
  525. if source_file then
  526. file_name = File.join(@file_dir, source_file)
  527. body = (@@known_bodies[source_file] ||= File.read(file_name))
  528. else
  529. body = @content
  530. end
  531. if find_body(class_name, meth_body, meth_obj, body) and meth_obj.document_self then
  532. class_obj.add_method meth_obj
  533. @stats.add_method meth_obj
  534. meth_obj.visibility = :private if 'private_method' == type
  535. end
  536. end
  537. end
  538. def handle_tab_width(body)
  539. if /\t/ =~ body
  540. tab_width = @options.tab_width
  541. body.split(/\n/).map do |line|
  542. 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #`
  543. line
  544. end .join("\n")
  545. else
  546. body
  547. end
  548. end
  549. ##
  550. # Remove the /*'s and leading asterisks from C comments
  551. def mangle_comment(comment)
  552. comment.sub!(%r{/\*+}) { " " * $&.length }
  553. comment.sub!(%r{\*+/}) { " " * $&.length }
  554. comment.gsub!(/^[ \t]*\*/m) { " " * $&.length }
  555. comment
  556. end
  557. ##
  558. # Removes lines that are commented out that might otherwise get picked up
  559. # when scanning for classes and methods
  560. def remove_commented_out_lines
  561. @content.gsub!(%r{//.*rb_define_}, '//')
  562. end
  563. def remove_private_comments(comment)
  564. comment.gsub!(/\/?\*--\n(.*?)\/?\*\+\+/m, '')
  565. comment.sub!(/\/?\*--\n.*/m, '')
  566. end
  567. ##
  568. # Extract the classes/modules and methods from a C file and return the
  569. # corresponding top-level object
  570. def scan
  571. remove_commented_out_lines
  572. do_classes
  573. do_constants
  574. do_methods
  575. do_includes
  576. do_aliases
  577. @top_level
  578. end
  579. def warn(msg)
  580. $stderr.puts
  581. $stderr.puts msg
  582. $stderr.flush
  583. end
  584. end