PageRenderTime 212ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/coderay/scanners/ruby.rb

https://github.com/benbasson/coderay
Ruby | 470 lines | 398 code | 54 blank | 18 comment | 74 complexity | 034a4d14bfdededa9ea8fe2e92ef5e89 MD5 | raw file
  1. module CodeRay
  2. module Scanners
  3. # This scanner is really complex, since Ruby _is_ a complex language!
  4. #
  5. # It tries to highlight 100% of all common code,
  6. # and 90% of strange codes.
  7. #
  8. # It is optimized for HTML highlighting, and is not very useful for
  9. # parsing or pretty printing.
  10. class Ruby < Scanner
  11. register_for :ruby
  12. file_extension 'rb'
  13. autoload :Patterns, CodeRay.coderay_path('scanners', 'ruby', 'patterns')
  14. autoload :StringState, CodeRay.coderay_path('scanners', 'ruby', 'string_state')
  15. def interpreted_string_state
  16. StringState.new :string, true, '"'
  17. end
  18. protected
  19. def setup
  20. @state = :initial
  21. end
  22. def scan_tokens encoder, options
  23. state, heredocs = options[:state] || @state
  24. heredocs = heredocs.dup if heredocs.is_a?(Array)
  25. if state && state.instance_of?(StringState)
  26. encoder.begin_group state.type
  27. end
  28. last_state = nil
  29. method_call_expected = false
  30. value_expected = true
  31. inline_block_stack = nil
  32. inline_block_curly_depth = 0
  33. if heredocs
  34. state = heredocs.shift
  35. encoder.begin_group state.type
  36. heredocs = nil if heredocs.empty?
  37. end
  38. # def_object_stack = nil
  39. # def_object_paren_depth = 0
  40. patterns = Patterns # avoid constant lookup
  41. unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
  42. until eos?
  43. if state.instance_of? ::Symbol
  44. if match = scan(/[ \t\f\v]+/)
  45. encoder.text_token match, :space
  46. elsif match = scan(/\n/)
  47. if heredocs
  48. unscan # heredoc scanning needs \n at start
  49. state = heredocs.shift
  50. encoder.begin_group state.type
  51. heredocs = nil if heredocs.empty?
  52. else
  53. state = :initial if state == :undef_comma_expected
  54. encoder.text_token match, :space
  55. value_expected = true
  56. end
  57. elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
  58. encoder.text_token match, self[1] ? :doctype : :comment
  59. elsif match = scan(/\\\n/)
  60. if heredocs
  61. unscan # heredoc scanning needs \n at start
  62. encoder.text_token scan(/\\/), :space
  63. state = heredocs.shift
  64. encoder.begin_group state.type
  65. heredocs = nil if heredocs.empty?
  66. else
  67. encoder.text_token match, :space
  68. end
  69. elsif state == :initial
  70. # IDENTS #
  71. if !method_call_expected &&
  72. match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
  73. /#{patterns::METHOD_NAME}/o)
  74. kind = patterns::IDENT_KIND[match]
  75. if value_expected != :colon_expected && scan(/:(?!:)/)
  76. value_expected = true
  77. encoder.text_token match, :key
  78. encoder.text_token ':', :operator
  79. else
  80. value_expected = false
  81. if kind == :ident
  82. if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
  83. kind = :constant
  84. end
  85. elsif kind == :keyword
  86. state = patterns::KEYWORD_NEW_STATE[match]
  87. if patterns::KEYWORDS_EXPECTING_VALUE[match]
  88. value_expected = match == 'when' ? :colon_expected : true
  89. end
  90. end
  91. value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
  92. encoder.text_token match, kind
  93. end
  94. elsif method_call_expected &&
  95. match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
  96. /#{patterns::METHOD_AFTER_DOT}/o)
  97. if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
  98. encoder.text_token match, :constant
  99. else
  100. encoder.text_token match, :ident
  101. end
  102. method_call_expected = false
  103. value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
  104. # OPERATORS #
  105. elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | ( \.\.\.? | ==?=? | [,\(\[\{] ) | [\)\]\}] /x)
  106. method_call_expected = self[1]
  107. value_expected = !method_call_expected && !!self[2]
  108. if inline_block_stack
  109. case match
  110. when '{'
  111. inline_block_curly_depth += 1
  112. when '}'
  113. inline_block_curly_depth -= 1
  114. if inline_block_curly_depth == 0 # closing brace of inline block reached
  115. state, inline_block_curly_depth, heredocs = inline_block_stack.pop
  116. inline_block_stack = nil if inline_block_stack.empty?
  117. heredocs = nil if heredocs && heredocs.empty?
  118. encoder.text_token match, :inline_delimiter
  119. encoder.end_group :inline
  120. next
  121. end
  122. end
  123. end
  124. encoder.text_token match, :operator
  125. elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
  126. /#{patterns::SYMBOL}/o)
  127. case delim = match[1]
  128. when ?', ?"
  129. encoder.begin_group :symbol
  130. encoder.text_token ':', :symbol
  131. match = delim.chr
  132. encoder.text_token match, :delimiter
  133. state = self.class::StringState.new :symbol, delim == ?", match
  134. else
  135. encoder.text_token match, :symbol
  136. value_expected = false
  137. end
  138. elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
  139. encoder.begin_group :string
  140. if match.size == 1
  141. encoder.text_token match, :delimiter
  142. state = self.class::StringState.new :string, match == '"', match # important for streaming
  143. else
  144. encoder.text_token match[0,1], :delimiter
  145. encoder.text_token match[1..-2], :content if match.size > 2
  146. encoder.text_token match[-1,1], :delimiter
  147. encoder.end_group :string
  148. value_expected = false
  149. end
  150. elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
  151. /#{patterns::INSTANCE_VARIABLE}/o)
  152. value_expected = false
  153. encoder.text_token match, :instance_variable
  154. elsif value_expected && match = scan(/\//)
  155. encoder.begin_group :regexp
  156. encoder.text_token match, :delimiter
  157. state = self.class::StringState.new :regexp, true, '/'
  158. elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
  159. if method_call_expected
  160. encoder.text_token match, :error
  161. method_call_expected = false
  162. else
  163. encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
  164. end
  165. value_expected = false
  166. elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
  167. value_expected = true
  168. encoder.text_token match, :operator
  169. elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
  170. quote = self[3]
  171. delim = self[quote ? 4 : 2]
  172. kind = patterns::QUOTE_TO_TYPE[quote]
  173. encoder.begin_group kind
  174. encoder.text_token match, :delimiter
  175. encoder.end_group kind
  176. heredocs ||= [] # create heredocs if empty
  177. heredocs << self.class::StringState.new(kind, quote != "'", delim,
  178. self[1] == '-' ? :indented : :linestart)
  179. value_expected = false
  180. elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
  181. kind = patterns::FANCY_STRING_KIND[self[1]]
  182. encoder.begin_group kind
  183. state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
  184. encoder.text_token match, :delimiter
  185. elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
  186. value_expected = false
  187. encoder.text_token match, :integer
  188. elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
  189. value_expected = match == '?' ? :colon_expected : true
  190. encoder.text_token match, :operator
  191. elsif match = scan(/`/)
  192. encoder.begin_group :shell
  193. encoder.text_token match, :delimiter
  194. state = self.class::StringState.new :shell, true, match
  195. elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
  196. /#{patterns::GLOBAL_VARIABLE}/o)
  197. encoder.text_token match, :global_variable
  198. value_expected = false
  199. elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
  200. /#{patterns::CLASS_VARIABLE}/o)
  201. encoder.text_token match, :class_variable
  202. value_expected = false
  203. elsif match = scan(/\\\z/)
  204. encoder.text_token match, :space
  205. else
  206. if method_call_expected
  207. method_call_expected = false
  208. next
  209. end
  210. unless unicode
  211. # check for unicode
  212. $DEBUG_BEFORE, $DEBUG = $DEBUG, false
  213. begin
  214. if check(/./mu).size > 1
  215. # seems like we should try again with unicode
  216. unicode = true
  217. end
  218. rescue
  219. # bad unicode char; use getch
  220. ensure
  221. $DEBUG = $DEBUG_BEFORE
  222. end
  223. next if unicode
  224. end
  225. encoder.text_token getch, :error
  226. end
  227. if last_state
  228. state = last_state unless state.is_a?(StringState) # otherwise, a simple 'def"' results in unclosed tokens
  229. last_state = nil
  230. end
  231. elsif state == :def_expected
  232. if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
  233. /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
  234. encoder.text_token match, :method
  235. state = :initial
  236. else
  237. last_state = :dot_expected
  238. state = :initial
  239. end
  240. elsif state == :dot_expected
  241. if match = scan(/\.|::/)
  242. # invalid definition
  243. state = :def_expected
  244. encoder.text_token match, :operator
  245. else
  246. state = :initial
  247. end
  248. elsif state == :module_expected
  249. if match = scan(/<</)
  250. encoder.text_token match, :operator
  251. else
  252. state = :initial
  253. if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
  254. / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
  255. encoder.text_token match, :class
  256. end
  257. end
  258. elsif state == :undef_expected
  259. state = :undef_comma_expected
  260. if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
  261. /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
  262. encoder.text_token match, :method
  263. elsif match = scan(/#{patterns::SYMBOL}/o)
  264. case delim = match[1]
  265. when ?', ?"
  266. encoder.begin_group :symbol
  267. encoder.text_token ':', :symbol
  268. match = delim.chr
  269. encoder.text_token match, :delimiter
  270. state = self.class::StringState.new :symbol, delim == ?", match
  271. state.next_state = :undef_comma_expected
  272. else
  273. encoder.text_token match, :symbol
  274. end
  275. else
  276. state = :initial
  277. end
  278. elsif state == :undef_comma_expected
  279. if match = scan(/,/)
  280. encoder.text_token match, :operator
  281. state = :undef_expected
  282. else
  283. state = :initial
  284. end
  285. elsif state == :alias_expected
  286. match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
  287. /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
  288. if match
  289. encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
  290. encoder.text_token self[2], :space
  291. encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
  292. end
  293. state = :initial
  294. else
  295. #:nocov:
  296. raise_inspect 'Unknown state: %p' % [state], encoder
  297. #:nocov:
  298. end
  299. else # StringState
  300. match = scan_until(state.pattern) || scan_rest
  301. unless match.empty?
  302. encoder.text_token match, :content
  303. break if eos?
  304. end
  305. if state.heredoc && self[1] # end of heredoc
  306. match = getch
  307. match << scan_until(/$/) unless eos?
  308. encoder.text_token match, :delimiter unless match.empty?
  309. encoder.end_group state.type
  310. state = state.next_state
  311. next
  312. end
  313. case match = getch
  314. when state.delim
  315. if state.paren_depth
  316. state.paren_depth -= 1
  317. if state.paren_depth > 0
  318. encoder.text_token match, :content
  319. next
  320. end
  321. end
  322. encoder.text_token match, :delimiter
  323. if state.type == :regexp && !eos?
  324. match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
  325. encoder.text_token match, :modifier unless match.empty?
  326. end
  327. encoder.end_group state.type
  328. value_expected = false
  329. state = state.next_state
  330. when '\\'
  331. if state.interpreted
  332. if esc = scan(/#{patterns::ESCAPE}/o)
  333. encoder.text_token match + esc, :char
  334. else
  335. encoder.text_token match, :error
  336. end
  337. else
  338. case esc = getch
  339. when nil
  340. encoder.text_token match, :content
  341. when state.delim, '\\'
  342. encoder.text_token match + esc, :char
  343. else
  344. encoder.text_token match + esc, :content
  345. end
  346. end
  347. when '#'
  348. case peek(1)
  349. when '{'
  350. inline_block_stack ||= []
  351. inline_block_stack << [state, inline_block_curly_depth, heredocs]
  352. value_expected = true
  353. state = :initial
  354. inline_block_curly_depth = 1
  355. encoder.begin_group :inline
  356. encoder.text_token match + getch, :inline_delimiter
  357. when '$', '@'
  358. encoder.text_token match, :escape
  359. last_state = state
  360. state = :initial
  361. else
  362. #:nocov:
  363. raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
  364. #:nocov:
  365. end
  366. when state.opening_paren
  367. state.paren_depth += 1
  368. encoder.text_token match, :content
  369. else
  370. #:nocov
  371. raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
  372. #:nocov:
  373. end
  374. end
  375. end
  376. # cleaning up
  377. if state.is_a? StringState
  378. encoder.end_group state.type
  379. end
  380. if options[:keep_state]
  381. if state.is_a?(StringState) && state.heredoc
  382. (heredocs ||= []).unshift state
  383. state = :initial
  384. elsif heredocs && heredocs.empty?
  385. heredocs = nil
  386. end
  387. @state = state, heredocs
  388. end
  389. if inline_block_stack
  390. until inline_block_stack.empty?
  391. state, = *inline_block_stack.pop
  392. encoder.end_group :inline
  393. encoder.end_group state.type
  394. end
  395. end
  396. encoder
  397. end
  398. end
  399. end
  400. end