PageRenderTime 27ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/langscan/ocaml.rb

https://github.com/nakao/langscan
Ruby | 298 lines | 240 code | 40 blank | 18 comment | 71 complexity | 25749a9972bf424c3166f98d514a7a6e MD5 | raw file
  1. #
  2. # ocaml.rb - a OCaml module of LangScan
  3. #
  4. # Copyright (C) 2005 Soutaro Matsumoto <matsumoto@soutaro.com>
  5. # All rights reserved.
  6. # This is free software with ABSOLUTELY NO WARRANTY.
  7. #
  8. # You can redistribute it and/or modify it under the terms of
  9. # the GNU General Public License version 2.
  10. #
  11. require 'langscan/_common'
  12. module LangScan
  13. module OCaml
  14. CAMLEXER_PATH = $LOAD_PATH.map{|path|
  15. File.join(path, "langscan/ocaml/camlexer")
  16. }.find {|path| File.file?(path) }
  17. class Eof < Exception
  18. end
  19. class Tokenizer
  20. SYMBOL_TBL = {
  21. "text" => :text,
  22. "ident" => :ident,
  23. "punct" => :punct,
  24. "keyword" => :keyword,
  25. "comment" => :comment,
  26. "integer" => :integer,
  27. "float" => :float,
  28. "string" => :string,
  29. "character" => :character,
  30. "funcdef" => :funcdef # not implemented yet
  31. }
  32. def initialize(input)
  33. @io = IO.popen(CAMLEXER_PATH, "r+")
  34. @tin = Thread.start {
  35. input.each {|l|
  36. @io.puts(l)
  37. }
  38. @io.close_write()
  39. }
  40. end
  41. def dispose()
  42. @tin.join()
  43. @io.close()
  44. end
  45. def denormalize(str)
  46. str.gsub(/([^\\])\\o/,'\1'+"\n")
  47. end
  48. def get_token()
  49. if @io.eof?
  50. nil
  51. else
  52. lno, cno, tp, wd = @io.gets().chomp().split(":",4)
  53. Fragment.new(SYMBOL_TBL[tp], denormalize(wd), lno.to_i(), cno.to_i())
  54. end
  55. end
  56. end
  57. module_function
  58. def name
  59. "Objective Caml"
  60. end
  61. def abbrev
  62. "ocaml"
  63. end
  64. def extnames
  65. [".ml", ".mli", ".mll", ".mly"]
  66. end
  67. def check_token(tkns, index, type, name = nil)
  68. t = tkns[index]
  69. raise Eof.new if !t
  70. return t.type == type && (!name || t.text == name)
  71. end
  72. def go_next(tkns, index, step)
  73. for i in 0...step
  74. index += 1
  75. index += 1 while (check_token(tkns, index, :comment))
  76. end
  77. index
  78. end
  79. def check_token_next(tkns, index, step, type, name = nil)
  80. index = go_next(tkns, index, step)
  81. check_token(tkns, index, type, name)
  82. end
  83. def go_prev(tkns, index, step)
  84. for i in 0...step
  85. index -= 1
  86. index -= 1 while (check_token(tkns, index, :comment))
  87. end
  88. index
  89. end
  90. def check_token_prev(tkns, index, step, type, name = nil)
  91. index = go_prev(tkns, index, step)
  92. check_token(tkns, index, type, name)
  93. end
  94. def skip_type(tkns, i)
  95. while (check_token(tkns, i, :punct, '->') ||
  96. check_token(tkns, i, :punct, '.') ||
  97. check_token(tkns, i, :punct, ':') ||
  98. check_token(tkns, i, :punct, '(') ||
  99. check_token(tkns, i, :punct, ')') ||
  100. check_token(tkns, i, :punct, '*') ||
  101. check_token(tkns, i, :comment) ||
  102. check_token(tkns, i, :ident))
  103. i += 1
  104. end
  105. i
  106. end
  107. def skip_parameter(tkns, i)
  108. t = tkns[i]
  109. return i if !t
  110. if (t.type == :punct && (t.text == '(' || t.text =~ /^\[\|?/))
  111. i = go_next(tkns, i, 1)
  112. first = i
  113. del = { '(' => ')', '[' => ']', '[|' => '|]' }[t.text]
  114. while (!check_token(tkns, i, :punct, del))
  115. i = go_next(tkns, i, 1)
  116. end
  117. convert_fun(tkns, first, i)
  118. end
  119. i = go_next(tkns, i, 1)
  120. end
  121. def is_first_parameter?(tkns, index)
  122. t = tkns[index]
  123. return false if !t
  124. if (t.type == :string || t.type == :character)
  125. return true
  126. end
  127. if (t.type == :keyword)
  128. return (t.text == '()')
  129. end
  130. if (t.type == :integer || t.type == :float)
  131. return (t.text !~ /^-/)
  132. end
  133. if (t.type == :ident)
  134. return (t.text != 'array' && t.text != 'list' && t.text != 'option')
  135. end
  136. if (t.type == :punct)
  137. return (t.text == '(' || t.text =~ /^\[\|?/)
  138. end
  139. return false
  140. end
  141. def is_method?(tkns, i)
  142. if (check_token_prev(tkns, i, 1, :keyword, 'virtual'))
  143. i = go_prev(tkns, i, 1)
  144. end
  145. if (check_token_prev(tkns, i, 1, :keyword, 'private'))
  146. i = go_prev(tkns, i, 1)
  147. end
  148. check_token_prev(tkns, i, 1, :keyword, 'method')
  149. end
  150. def is_parameter?(tkns, index)
  151. return true if (is_first_parameter?(tkns, index))
  152. t = tkns[index]
  153. return false if !t
  154. if (t.type == :punct)
  155. if (t.text == '.' || t.text == '#')
  156. return true
  157. end
  158. end
  159. return false
  160. end
  161. def convert_fun(tkns, i, max)
  162. begin
  163. while (i < max)
  164. t = tkns[i]
  165. if (t.type == :ident)
  166. if (check_token_prev(tkns, i, 1, :keyword, 'fun'))
  167. while (!check_token(tkns, i, :punct, '->'))
  168. i = go_next(tkns, i, 1)
  169. end
  170. elsif (check_token_prev(tkns, i, 1, :keyword, 'let') ||
  171. (check_token_prev(tkns, i, 1, :keyword, 'rec') &&
  172. check_token_prev(tkns, i, 2, :keyword, 'let')))
  173. if (!check_token_next(tkns, i, 1, :punct, '='))
  174. t.type = :fundef
  175. i += 1 while (!check_token(tkns, i, :punct, '='))
  176. end
  177. elsif (check_token_prev(tkns, i, 1, :keyword, 'val') ||
  178. (check_token_prev(tkns, i, 1, :keyword, 'mutable') &&
  179. check_token_prev(tkns, i, 2, :keyword, 'val')) ||
  180. check_token_prev(tkns, i, 1, :keyword, 'external'))
  181. if (check_token_next(tkns, i, 1, :punct, ':'))
  182. # not strict
  183. i = go_next(tkns, i, 2)
  184. while (!check_token(tkns, i, :keyword))
  185. if (check_token(tkns, i, :punct, '->'))
  186. t.type = :fundecl
  187. i = skip_type(tkns, i+1)
  188. break
  189. end
  190. i = go_next(tkns, i, 1)
  191. end
  192. else
  193. # what?
  194. end
  195. elsif (is_method?(tkns, i))
  196. if (check_token_next(tkns, i, 1, :punct, ':'))
  197. t.type = :fundecl
  198. i = go_next(tkns, i, 2)
  199. i = skip_type(tkns, i)
  200. else
  201. t.type = :fundef
  202. i += 1 while (!check_token(tkns, i, :punct, '='))
  203. end
  204. elsif (!check_token_prev(tkns, i, 1, :punct, ':') &&
  205. !check_token_prev(tkns, i, 1, :punct, '*') &&
  206. !check_token_prev(tkns, i, 1, :punct, '\'') &&
  207. !check_token_prev(tkns, i, 1, :punct, '~') &&
  208. !check_token_prev(tkns, i, 1, :punct, '?') &&
  209. !check_token_prev(tkns, i, 1, :punct, '|') &&
  210. !check_token_prev(tkns, i, 1, :keyword, 'with'))
  211. # is it call?
  212. i = go_next(tkns, i, 1)
  213. if (check_token_prev(tkns, i, 2, :punct, '#') ||
  214. is_first_parameter?(tkns, i))
  215. t.type = :funcall
  216. i = skip_parameter(tkns, i)
  217. while (is_parameter?(tkns, i))
  218. i = skip_parameter(tkns, i)
  219. end
  220. end
  221. end
  222. elsif (check_token(tkns, i, :keyword, 'of') ||
  223. # check_token(tkns, i, :punct, '|') || # overrun ->
  224. check_token(tkns, i, :punct, ':>'))
  225. # is it needed?
  226. i = skip_type(tkns, i+1)
  227. i = go_prev(tkns, i, 1)
  228. elsif (check_token(tkns, i, :keyword, 'class'))
  229. i = go_next(tkns, i, 1) while (!check_token(tkns, i, :punct, '='))
  230. end
  231. i += 1
  232. end
  233. rescue Eof
  234. end
  235. end
  236. # LangScan::OCaml.scan iterates over Objective Caml program.
  237. # It yields for each Fragment.
  238. def scan(input, &block)
  239. tokenizer = Tokenizer.new(input)
  240. tkns = Array.new
  241. while (tkn = tokenizer.get_token())
  242. # is it ok?
  243. if (tkn.type == :ident && tkn.text =~ /^\W/)
  244. tkn.type = :punct
  245. end
  246. tkns << tkn
  247. end
  248. convert_fun(tkns, 0, tkns.size)
  249. tkns.each do |tkn|
  250. yield tkn
  251. end
  252. tokenizer.dispose()
  253. end
  254. if CAMLEXER_PATH
  255. LangScan.register(self)
  256. end
  257. end
  258. end