PageRenderTime 246ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/sources/ruby-1.8.5/lib/rdoc/parsers/parse_c.rb

http://rubyworks.googlecode.com/
Ruby | 697 lines | 447 code | 108 blank | 142 comment | 54 complexity | 97e6c0830dc0870be816e747ab19510d MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, LGPL-2.1, AGPL-3.0, 0BSD, Unlicense
  1. # We attempt to parse C extension files. Basically we look for
  2. # the standard patterns that you find in extensions: <tt>rb_define_class,
  3. # rb_define_method</tt> and so on. We also try to find the corresponding
  4. # C source for the methods and extract comments, but if we fail
  5. # we don't worry too much.
  6. #
  7. # The comments associated with a Ruby method are extracted from the C
  8. # comment block associated with the routine that _implements_ that
  9. # method, that is to say the method whose name is given in the
  10. # <tt>rb_define_method</tt> call. For example, you might write:
  11. #
  12. # /*
  13. # * Returns a new array that is a one-dimensional flattening of this
  14. # * array (recursively). That is, for every element that is an array,
  15. # * extract its elements into the new array.
  16. # *
  17. # * s = [ 1, 2, 3 ] #=> [1, 2, 3]
  18. # * t = [ 4, 5, 6, [7, 8] ] #=> [4, 5, 6, [7, 8]]
  19. # * a = [ s, t, 9, 10 ] #=> [[1, 2, 3], [4, 5, 6, [7, 8]], 9, 10]
  20. # * a.flatten #=> [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
  21. # */
  22. # static VALUE
  23. # rb_ary_flatten(ary)
  24. # VALUE ary;
  25. # {
  26. # ary = rb_obj_dup(ary);
  27. # rb_ary_flatten_bang(ary);
  28. # return ary;
  29. # }
  30. #
  31. # ...
  32. #
  33. # void
  34. # Init_Array()
  35. # {
  36. # ...
  37. # rb_define_method(rb_cArray, "flatten", rb_ary_flatten, 0);
  38. #
  39. # Here RDoc will determine from the rb_define_method line that there's a
  40. # method called "flatten" in class Array, and will look for the implementation
  41. # in the method rb_ary_flatten. It will then use the comment from that
  42. # method in the HTML output. This method must be in the same source file
  43. # as the rb_define_method.
  44. #
  45. # C classes can be diagramed (see /tc/dl/ruby/ruby/error.c), and RDoc
  46. # integrates C and Ruby source into one tree
  47. #
  48. # The comment blocks may include special direcives:
  49. #
  50. # [Document-class: <i>name</i>]
  51. # This comment block is documentation for the given class. Use this
  52. # when the <tt>Init_xxx</tt> method is not named after the class.
  53. #
  54. # [Document-method: <i>name</i>]
  55. # This comment documents the named method. Use when RDoc cannot outomatically
  56. # find the method from it's declaration
  57. #
  58. # [call-seq: <i>text up to an empty line</i>]
  59. # Because C source doesn't give descripive names to Ruby-level parameters,
  60. # you need to document the calling sequence explicitly
  61. #
  62. # In additon, RDoc assumes by default that the C method implementing a
  63. # Ruby function is in the same source file as the rb_define_method call.
  64. # If this isn't the case, add the comment
  65. #
  66. # rb_define_method(....); // in: filename
  67. #
  68. # As an example, we might have an extension that defines multiple classes
  69. # in its Init_xxx method. We could document them using
  70. #
  71. #
  72. # /*
  73. # * Document-class: MyClass
  74. # *
  75. # * Encapsulate the writing and reading of the configuration
  76. # * file. ...
  77. # */
  78. #
  79. # /*
  80. # * Document-method: read_value
  81. # *
  82. # * call-seq:
  83. # * cfg.read_value(key) -> value
  84. # * cfg.read_value(key} { |key| } -> value
  85. # *
  86. # * Return the value corresponding to +key+ from the configuration.
  87. # * In the second form, if the key isn't found, invoke the
  88. # * block and return its value.
  89. # */
  90. #
  91. # Classes and modules built in to the interpreter. We need
  92. # these to define superclasses of user objects
  93. require "rdoc/code_objects"
  94. require "rdoc/parsers/parserfactory"
  95. module RDoc
  96. KNOWN_CLASSES = {
  97. "rb_cObject" => "Object",
  98. "rb_cArray" => "Array",
  99. "rb_cBignum" => "Bignum",
  100. "rb_cClass" => "Class",
  101. "rb_cDir" => "Dir",
  102. "rb_cData" => "Data",
  103. "rb_cFalseClass" => "FalseClass",
  104. "rb_cFile" => "File",
  105. "rb_cFixnum" => "Fixnum",
  106. "rb_cFloat" => "Float",
  107. "rb_cHash" => "Hash",
  108. "rb_cInteger" => "Integer",
  109. "rb_cIO" => "IO",
  110. "rb_cModule" => "Module",
  111. "rb_cNilClass" => "NilClass",
  112. "rb_cNumeric" => "Numeric",
  113. "rb_cProc" => "Proc",
  114. "rb_cRange" => "Range",
  115. "rb_cRegexp" => "Regexp",
  116. "rb_cString" => "String",
  117. "rb_cSymbol" => "Symbol",
  118. "rb_cThread" => "Thread",
  119. "rb_cTime" => "Time",
  120. "rb_cTrueClass" => "TrueClass",
  121. "rb_cStruct" => "Struct",
  122. "rb_eException" => "Exception",
  123. "rb_eStandardError" => "StandardError",
  124. "rb_eSystemExit" => "SystemExit",
  125. "rb_eInterrupt" => "Interrupt",
  126. "rb_eSignal" => "Signal",
  127. "rb_eFatal" => "Fatal",
  128. "rb_eArgError" => "ArgError",
  129. "rb_eEOFError" => "EOFError",
  130. "rb_eIndexError" => "IndexError",
  131. "rb_eRangeError" => "RangeError",
  132. "rb_eIOError" => "IOError",
  133. "rb_eRuntimeError" => "RuntimeError",
  134. "rb_eSecurityError" => "SecurityError",
  135. "rb_eSystemCallError" => "SystemCallError",
  136. "rb_eTypeError" => "TypeError",
  137. "rb_eZeroDivError" => "ZeroDivError",
  138. "rb_eNotImpError" => "NotImpError",
  139. "rb_eNoMemError" => "NoMemError",
  140. "rb_eFloatDomainError" => "FloatDomainError",
  141. "rb_eScriptError" => "ScriptError",
  142. "rb_eNameError" => "NameError",
  143. "rb_eSyntaxError" => "SyntaxError",
  144. "rb_eLoadError" => "LoadError",
  145. "rb_mKernel" => "Kernel",
  146. "rb_mComparable" => "Comparable",
  147. "rb_mEnumerable" => "Enumerable",
  148. "rb_mPrecision" => "Precision",
  149. "rb_mErrno" => "Errno",
  150. "rb_mFileTest" => "FileTest",
  151. "rb_mGC" => "GC",
  152. "rb_mMath" => "Math",
  153. "rb_mProcess" => "Process"
  154. }
  155. # See rdoc/c_parse.rb
  156. class C_Parser
  157. extend ParserFactory
  158. parse_files_matching(/\.(?:([CcHh])\1?|c([+xp])\2|y)\z/)
  159. @@known_bodies = {}
  160. # prepare to parse a C file
  161. def initialize(top_level, file_name, body, options, stats)
  162. @known_classes = KNOWN_CLASSES.dup
  163. @body = handle_tab_width(handle_ifdefs_in(body))
  164. @options = options
  165. @stats = stats
  166. @top_level = top_level
  167. @classes = Hash.new
  168. @file_dir = File.dirname(file_name)
  169. @progress = $stderr unless options.quiet
  170. end
  171. # Extract the classes/modules and methods from a C file
  172. # and return the corresponding top-level object
  173. def scan
  174. remove_commented_out_lines
  175. do_classes
  176. do_constants
  177. do_methods
  178. do_includes
  179. do_aliases
  180. @top_level
  181. end
  182. #######
  183. private
  184. #######
  185. def progress(char)
  186. unless @options.quiet
  187. @progress.print(char)
  188. @progress.flush
  189. end
  190. end
  191. def warn(msg)
  192. $stderr.puts
  193. $stderr.puts msg
  194. $stderr.flush
  195. end
  196. def remove_private_comments(comment)
  197. comment.gsub!(/\/?\*--(.*?)\/?\*\+\+/m, '')
  198. comment.sub!(/\/?\*--.*/m, '')
  199. end
  200. # remove lines that are commented out that might otherwise get
  201. # picked up when scanning for classes and methods
  202. def remove_commented_out_lines
  203. @body.gsub!(%r{//.*rb_define_}, '//')
  204. end
  205. def handle_class_module(var_name, class_mod, class_name, parent, in_module)
  206. progress(class_mod[0, 1])
  207. parent_name = @known_classes[parent] || parent
  208. if in_module
  209. enclosure = @classes[in_module]
  210. unless enclosure
  211. if enclosure = @known_classes[in_module]
  212. handle_class_module(in_module, (/^rb_m/ =~ in_module ? "module" : "class"),
  213. enclosure, nil, nil)
  214. enclosure = @classes[in_module]
  215. end
  216. end
  217. unless enclosure
  218. warn("Enclosing class/module '#{in_module}' for " +
  219. "#{class_mod} #{class_name} not known")
  220. return
  221. end
  222. else
  223. enclosure = @top_level
  224. end
  225. if class_mod == "class"
  226. cm = enclosure.add_class(NormalClass, class_name, parent_name)
  227. @stats.num_classes += 1
  228. else
  229. cm = enclosure.add_module(NormalModule, class_name)
  230. @stats.num_modules += 1
  231. end
  232. cm.record_location(enclosure.toplevel)
  233. find_class_comment(cm.full_name, cm)
  234. @classes[var_name] = cm
  235. @known_classes[var_name] = cm.full_name
  236. end
  237. ############################################################
  238. def find_class_comment(class_name, class_meth)
  239. comment = nil
  240. if @body =~ %r{((?>/\*.*?\*/\s+))
  241. (static\s+)?void\s+Init_#{class_name}\s*(?:_\(\s*)?\(\s*(?:void\s*)?\)}xmi
  242. comment = $1
  243. elsif @body =~ %r{Document-(class|module):\s#{class_name}\s*?\n((?>.*?\*/))}m
  244. comment = $2
  245. end
  246. class_meth.comment = mangle_comment(comment) if comment
  247. end
  248. ############################################################
  249. def do_classes
  250. @body.scan(/(\w+)\s* = \s*rb_define_module\s*\(\s*"(\w+)"\s*\)/mx) do
  251. |var_name, class_name|
  252. handle_class_module(var_name, "module", class_name, nil, nil)
  253. end
  254. # The '.' lets us handle SWIG-generated files
  255. @body.scan(/([\w\.]+)\s* = \s*rb_define_class\s*
  256. \(
  257. \s*"(\w+)",
  258. \s*(\w+)\s*
  259. \)/mx) do
  260. |var_name, class_name, parent|
  261. handle_class_module(var_name, "class", class_name, parent, nil)
  262. end
  263. @body.scan(/(\w+)\s*=\s*boot_defclass\s*\(\s*"(\w+?)",\s*(\w+?)\s*\)/) do
  264. |var_name, class_name, parent|
  265. parent = nil if parent == "0"
  266. handle_class_module(var_name, "class", class_name, parent, nil)
  267. end
  268. @body.scan(/(\w+)\s* = \s*rb_define_module_under\s*
  269. \(
  270. \s*(\w+),
  271. \s*"(\w+)"
  272. \s*\)/mx) do
  273. |var_name, in_module, class_name|
  274. handle_class_module(var_name, "module", class_name, nil, in_module)
  275. end
  276. @body.scan(/([\w\.]+)\s* = \s*rb_define_class_under\s*
  277. \(
  278. \s*(\w+),
  279. \s*"(\w+)",
  280. \s*(\w+)\s*
  281. \s*\)/mx) do
  282. |var_name, in_module, class_name, parent|
  283. handle_class_module(var_name, "class", class_name, parent, in_module)
  284. end
  285. end
  286. ###########################################################
  287. def do_constants
  288. @body.scan(%r{\Wrb_define_
  289. (
  290. variable |
  291. readonly_variable |
  292. const |
  293. global_const |
  294. )
  295. \s*\(
  296. (?:\s*(\w+),)?
  297. \s*"(\w+)",
  298. \s*(.*?)\s*\)\s*;
  299. }xm) do
  300. |type, var_name, const_name, definition|
  301. var_name = "rb_cObject" if !var_name or var_name == "rb_mKernel"
  302. handle_constants(type, var_name, const_name, definition)
  303. end
  304. end
  305. ############################################################
  306. def do_methods
  307. @body.scan(%r{rb_define_
  308. (
  309. singleton_method |
  310. method |
  311. module_function |
  312. private_method
  313. )
  314. \s*\(\s*([\w\.]+),
  315. \s*"([^"]+)",
  316. \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
  317. \s*(-?\w+)\s*\)
  318. (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
  319. }xm) do
  320. |type, var_name, meth_name, meth_body, param_count, source_file|
  321. #"
  322. # Ignore top-object and weird struct.c dynamic stuff
  323. next if var_name == "ruby_top_self"
  324. next if var_name == "nstr"
  325. next if var_name == "envtbl"
  326. next if var_name == "argf" # it'd be nice to handle this one
  327. var_name = "rb_cObject" if var_name == "rb_mKernel"
  328. handle_method(type, var_name, meth_name,
  329. meth_body, param_count, source_file)
  330. end
  331. @body.scan(%r{rb_define_attr\(
  332. \s*([\w\.]+),
  333. \s*"([^"]+)",
  334. \s*(\d+),
  335. \s*(\d+)\s*\);
  336. }xm) do #"
  337. |var_name, attr_name, attr_reader, attr_writer|
  338. #var_name = "rb_cObject" if var_name == "rb_mKernel"
  339. handle_attr(var_name, attr_name,
  340. attr_reader.to_i != 0,
  341. attr_writer.to_i != 0)
  342. end
  343. @body.scan(%r{rb_define_global_function\s*\(
  344. \s*"([^"]+)",
  345. \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
  346. \s*(-?\w+)\s*\)
  347. (?:;\s*/[*/]\s+in\s+(\w+?\.[cy]))?
  348. }xm) do #"
  349. |meth_name, meth_body, param_count, source_file|
  350. handle_method("method", "rb_mKernel", meth_name,
  351. meth_body, param_count, source_file)
  352. end
  353. @body.scan(/define_filetest_function\s*\(
  354. \s*"([^"]+)",
  355. \s*(?:RUBY_METHOD_FUNC\(|VALUEFUNC\()?(\w+)\)?,
  356. \s*(-?\w+)\s*\)/xm) do #"
  357. |meth_name, meth_body, param_count|
  358. handle_method("method", "rb_mFileTest", meth_name, meth_body, param_count)
  359. handle_method("singleton_method", "rb_cFile", meth_name, meth_body, param_count)
  360. end
  361. end
  362. ############################################################
  363. def do_aliases
  364. @body.scan(%r{rb_define_alias\s*\(\s*(\w+),\s*"([^"]+)",\s*"([^"]+)"\s*\)}m) do
  365. |var_name, new_name, old_name|
  366. @stats.num_methods += 1
  367. class_name = @known_classes[var_name] || var_name
  368. class_obj = find_class(var_name, class_name)
  369. class_obj.add_alias(Alias.new("", old_name, new_name, ""))
  370. end
  371. end
  372. ############################################################
  373. def handle_constants(type, var_name, const_name, definition)
  374. #@stats.num_constants += 1
  375. class_name = @known_classes[var_name]
  376. return unless class_name
  377. class_obj = find_class(var_name, class_name)
  378. unless class_obj
  379. warn("Enclosing class/module '#{const_name}' for not known")
  380. return
  381. end
  382. comment = find_const_comment(type, const_name)
  383. con = Constant.new(const_name, definition, mangle_comment(comment))
  384. class_obj.add_constant(con)
  385. end
  386. ###########################################################
  387. def find_const_comment(type, const_name)
  388. if @body =~ %r{((?>/\*.*?\*/\s+))
  389. rb_define_#{type}\((?:\s*(\w+),)?\s*"#{const_name}"\s*,.*?\)\s*;}xmi
  390. $1
  391. elsif @body =~ %r{Document-(?:const|global|variable):\s#{const_name}\s*?\n((?>.*?\*/))}m
  392. $1
  393. else
  394. ''
  395. end
  396. end
  397. ###########################################################
  398. def handle_attr(var_name, attr_name, reader, writer)
  399. rw = ''
  400. if reader
  401. #@stats.num_methods += 1
  402. rw << 'R'
  403. end
  404. if writer
  405. #@stats.num_methods += 1
  406. rw << 'W'
  407. end
  408. class_name = @known_classes[var_name]
  409. return unless class_name
  410. class_obj = find_class(var_name, class_name)
  411. if class_obj
  412. comment = find_attr_comment(attr_name)
  413. unless comment.empty?
  414. comment = mangle_comment(comment)
  415. end
  416. att = Attr.new('', attr_name, rw, comment)
  417. class_obj.add_attribute(att)
  418. end
  419. end
  420. ###########################################################
  421. def find_attr_comment(attr_name)
  422. if @body =~ %r{((?>/\*.*?\*/\s+))
  423. rb_define_attr\((?:\s*(\w+),)?\s*"#{attr_name}"\s*,.*?\)\s*;}xmi
  424. $1
  425. elsif @body =~ %r{Document-attr:\s#{attr_name}\s*?\n((?>.*?\*/))}m
  426. $1
  427. else
  428. ''
  429. end
  430. end
  431. ###########################################################
  432. def handle_method(type, var_name, meth_name,
  433. meth_body, param_count, source_file = nil)
  434. progress(".")
  435. @stats.num_methods += 1
  436. class_name = @known_classes[var_name]
  437. return unless class_name
  438. class_obj = find_class(var_name, class_name)
  439. if class_obj
  440. if meth_name == "initialize"
  441. meth_name = "new"
  442. type = "singleton_method"
  443. end
  444. meth_obj = AnyMethod.new("", meth_name)
  445. meth_obj.singleton =
  446. %w{singleton_method module_function}.include?(type)
  447. p_count = (Integer(param_count) rescue -1)
  448. if p_count < 0
  449. meth_obj.params = "(...)"
  450. elsif p_count == 0
  451. meth_obj.params = "()"
  452. else
  453. meth_obj.params = "(" +
  454. (1..p_count).map{|i| "p#{i}"}.join(", ") +
  455. ")"
  456. end
  457. if source_file
  458. file_name = File.join(@file_dir, source_file)
  459. body = (@@known_bodies[source_file] ||= File.read(file_name))
  460. else
  461. body = @body
  462. end
  463. if find_body(meth_body, meth_obj, body) and meth_obj.document_self
  464. class_obj.add_method(meth_obj)
  465. end
  466. end
  467. end
  468. ############################################################
  469. # Find the C code corresponding to a Ruby method
  470. def find_body(meth_name, meth_obj, body, quiet = false)
  471. case body
  472. when %r{((?>/\*.*?\*/\s*))(?:static\s+)?VALUE\s+#{meth_name}
  473. \s*(\(.*?\)).*?^}xm
  474. comment, params = $1, $2
  475. body_text = $&
  476. remove_private_comments(comment) if comment
  477. # see if we can find the whole body
  478. re = Regexp.escape(body_text) + '[^(]*^\{.*?^\}'
  479. if Regexp.new(re, Regexp::MULTILINE).match(body)
  480. body_text = $&
  481. end
  482. # The comment block may have been overridden with a
  483. # 'Document-method' block. This happens in the interpreter
  484. # when multiple methods are vectored through to the same
  485. # C method but those methods are logically distinct (for
  486. # example Kernel.hash and Kernel.object_id share the same
  487. # implementation
  488. override_comment = find_override_comment(meth_obj.name)
  489. comment = override_comment if override_comment
  490. find_modifiers(comment, meth_obj) if comment
  491. # meth_obj.params = params
  492. meth_obj.start_collecting_tokens
  493. meth_obj.add_token(RubyToken::Token.new(1,1).set_text(body_text))
  494. meth_obj.comment = mangle_comment(comment)
  495. when %r{((?>/\*.*?\*/\s*))^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
  496. comment = $1
  497. find_body($2, meth_obj, body, true)
  498. find_modifiers(comment, meth_obj)
  499. meth_obj.comment = mangle_comment(comment) + meth_obj.comment
  500. when %r{^\s*\#\s*define\s+#{meth_name}\s+(\w+)}m
  501. unless find_body($1, meth_obj, body, true)
  502. warn "No definition for #{meth_name}" unless quiet
  503. return false
  504. end
  505. else
  506. # No body, but might still have an override comment
  507. comment = find_override_comment(meth_obj.name)
  508. if comment
  509. find_modifiers(comment, meth_obj)
  510. meth_obj.comment = mangle_comment(comment)
  511. else
  512. warn "No definition for #{meth_name}" unless quiet
  513. return false
  514. end
  515. end
  516. true
  517. end
  518. ##################################################
  519. #
  520. # If the comment block contains a section that looks like
  521. # call-seq:
  522. # Array.new
  523. # Array.new(10)
  524. # use it for the parameters
  525. def find_modifiers(comment, meth_obj)
  526. if comment.sub!(/:nodoc:\s*^\s*\*?\s*$/m, '') or
  527. comment.sub!(/\A\/\*\s*:nodoc:\s*\*\/\Z/, '')
  528. meth_obj.document_self = false
  529. end
  530. if comment.sub!(/call-seq:(.*?)^\s*\*?\s*$/m, '') or
  531. comment.sub!(/\A\/\*\s*call-seq:(.*?)\*\/\Z/, '')
  532. seq = $1
  533. seq.gsub!(/^\s*\*\s*/, '')
  534. meth_obj.call_seq = seq
  535. end
  536. end
  537. ############################################################
  538. def find_override_comment(meth_name)
  539. name = Regexp.escape(meth_name)
  540. if @body =~ %r{Document-method:\s#{name}\s*?\n((?>.*?\*/))}m
  541. $1
  542. end
  543. end
  544. ############################################################
  545. # Look for includes of the form
  546. # rb_include_module(rb_cArray, rb_mEnumerable);
  547. def do_includes
  548. @body.scan(/rb_include_module\s*\(\s*(\w+?),\s*(\w+?)\s*\)/) do |c,m|
  549. if cls = @classes[c]
  550. m = @known_classes[m] || m
  551. cls.add_include(Include.new(m, ""))
  552. end
  553. end
  554. end
  555. ############################################################
  556. # Remove the /*'s and leading asterisks from C comments
  557. def mangle_comment(comment)
  558. comment.sub!(%r{/\*+}) { " " * $&.length }
  559. comment.sub!(%r{\*+/}) { " " * $&.length }
  560. comment.gsub!(/^[ \t]*\*/m) { " " * $&.length }
  561. comment
  562. end
  563. def find_class(raw_name, name)
  564. unless @classes[raw_name]
  565. if raw_name =~ /^rb_m/
  566. @classes[raw_name] = @top_level.add_module(NormalModule, name)
  567. else
  568. @classes[raw_name] = @top_level.add_class(NormalClass, name, nil)
  569. end
  570. end
  571. @classes[raw_name]
  572. end
  573. def handle_tab_width(body)
  574. if /\t/ =~ body
  575. tab_width = Options.instance.tab_width
  576. body.split(/\n/).map do |line|
  577. 1 while line.gsub!(/\t+/) { ' ' * (tab_width*$&.length - $`.length % tab_width)} && $~ #`
  578. line
  579. end .join("\n")
  580. else
  581. body
  582. end
  583. end
  584. # Remove #ifdefs that would otherwise confuse us
  585. def handle_ifdefs_in(body)
  586. body.gsub(/^#ifdef HAVE_PROTOTYPES.*?#else.*?\n(.*?)#endif.*?\n/m) { $1 }
  587. end
  588. end
  589. end