PageRenderTime 50ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/coderay/lib/coderay/scanners/ruby.rb

https://gitlab.com/xilinus/prototypeui
Ruby | 367 lines | 315 code | 34 blank | 18 comment | 38 complexity | 3e286e0584b802f2d46bcd12d76c3365 MD5 | raw file
  1. module CodeRay
  2. module Scanners
  3. # This scanner is really complex, since Ruby _is_ a complex language!
  4. #
  5. # It tries to highlight 100% of all common code,
  6. # and 90% of strange codes.
  7. #
  8. # It is optimized for HTML highlighting, and is not very useful for
  9. # parsing or pretty printing.
  10. #
  11. # For now, I think it's better than the scanners in VIM or Syntax, or
  12. # any highlighter I was able to find, except Caleb's RubyLexer.
  13. #
  14. # I hope it's also better than the rdoc/irb lexer.
  15. class Ruby < Scanner
  16. include Streamable
  17. register_for :ruby
  18. helper :patterns
  19. private
  20. def scan_tokens tokens, options
  21. last_token_dot = false
  22. value_expected = true
  23. heredocs = nil
  24. last_state = nil
  25. state = :initial
  26. depth = nil
  27. inline_block_stack = []
  28. patterns = Patterns # avoid constant lookup
  29. until eos?
  30. match = nil
  31. kind = nil
  32. if state.instance_of? patterns::StringState
  33. # {{{
  34. match = scan_until(state.pattern) || scan_until(/\z/)
  35. tokens << [match, :content] unless match.empty?
  36. break if eos?
  37. if state.heredoc and self[1] # end of heredoc
  38. match = getch.to_s
  39. match << scan_until(/$/) unless eos?
  40. tokens << [match, :delimiter]
  41. tokens << [:close, state.type]
  42. state = state.next_state
  43. next
  44. end
  45. case match = getch
  46. when state.delim
  47. if state.paren
  48. state.paren_depth -= 1
  49. if state.paren_depth > 0
  50. tokens << [match, :nesting_delimiter]
  51. next
  52. end
  53. end
  54. tokens << [match, :delimiter]
  55. if state.type == :regexp and not eos?
  56. modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
  57. tokens << [modifiers, :modifier] unless modifiers.empty?
  58. end
  59. tokens << [:close, state.type]
  60. value_expected = false
  61. state = state.next_state
  62. when '\\'
  63. if state.interpreted
  64. if esc = scan(/ #{patterns::ESCAPE} /ox)
  65. tokens << [match + esc, :char]
  66. else
  67. tokens << [match, :error]
  68. end
  69. else
  70. case m = getch
  71. when state.delim, '\\'
  72. tokens << [match + m, :char]
  73. when nil
  74. tokens << [match, :error]
  75. else
  76. tokens << [match + m, :content]
  77. end
  78. end
  79. when '#'
  80. case peek(1)[0]
  81. when ?{
  82. inline_block_stack << [state, depth, heredocs]
  83. value_expected = true
  84. state = :initial
  85. depth = 1
  86. tokens << [:open, :inline]
  87. tokens << [match + getch, :inline_delimiter]
  88. when ?$, ?@
  89. tokens << [match, :escape]
  90. last_state = state # scan one token as normal code, then return here
  91. state = :initial
  92. else
  93. raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
  94. end
  95. when state.paren
  96. state.paren_depth += 1
  97. tokens << [match, :nesting_delimiter]
  98. when /#{patterns::REGEXP_SYMBOLS}/ox
  99. tokens << [match, :function]
  100. else
  101. raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
  102. end
  103. next
  104. # }}}
  105. else
  106. # {{{
  107. if match = scan(/ [ \t\f]+ | \\? \n | \# .* /x) or
  108. ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
  109. case m = match[0]
  110. when ?\s, ?\t, ?\f
  111. match << scan(/\s*/) unless eos? or heredocs
  112. kind = :space
  113. when ?\n, ?\\
  114. kind = :space
  115. if m == ?\n
  116. value_expected = true # FIXME not quite true
  117. state = :initial if state == :undef_comma_expected
  118. end
  119. if heredocs
  120. unscan # heredoc scanning needs \n at start
  121. state = heredocs.shift
  122. tokens << [:open, state.type]
  123. heredocs = nil if heredocs.empty?
  124. next
  125. else
  126. match << scan(/\s*/) unless eos?
  127. end
  128. when ?#, ?=, ?_
  129. kind = :comment
  130. value_expected = true
  131. else
  132. raise_inspect 'else-case _ reached, because case %p was
  133. not handled' % [matched[0].chr], tokens
  134. end
  135. tokens << [match, kind]
  136. next
  137. elsif state == :initial
  138. # IDENTS #
  139. if match = scan(/#{patterns::METHOD_NAME}/o)
  140. if last_token_dot
  141. kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
  142. else
  143. kind = patterns::IDENT_KIND[match]
  144. if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
  145. kind = :constant
  146. elsif kind == :reserved
  147. state = patterns::DEF_NEW_STATE[match]
  148. end
  149. end
  150. ## experimental!
  151. value_expected = :set if
  152. patterns::REGEXP_ALLOWED[match] or check(/#{patterns::VALUE_FOLLOWS}/o)
  153. elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)
  154. kind = :ident
  155. value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
  156. # OPERATORS #
  157. elsif not last_token_dot and match = scan(/ ==?=? | \.\.?\.? | [\(\)\[\]\{\}] | :: | , /x)
  158. if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
  159. value_expected = :set
  160. end
  161. last_token_dot = :set if match == '.' or match == '::'
  162. kind = :operator
  163. unless inline_block_stack.empty?
  164. case match
  165. when '{'
  166. depth += 1
  167. when '}'
  168. depth -= 1
  169. if depth == 0 # closing brace of inline block reached
  170. state, depth, heredocs = inline_block_stack.pop
  171. tokens << [match, :inline_delimiter]
  172. kind = :inline
  173. match = :close
  174. end
  175. end
  176. end
  177. elsif match = scan(/ ['"] /mx)
  178. tokens << [:open, :string]
  179. kind = :delimiter
  180. state = patterns::StringState.new :string, match == '"', match # important for streaming
  181. elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
  182. kind = :instance_variable
  183. elsif value_expected and match = scan(/\//)
  184. tokens << [:open, :regexp]
  185. kind = :delimiter
  186. interpreted = true
  187. state = patterns::StringState.new :regexp, interpreted, match
  188. elsif match = scan(/#{patterns::NUMERIC}/o)
  189. kind = if self[1] then :float else :integer end
  190. elsif match = scan(/#{patterns::SYMBOL}/o)
  191. case delim = match[1]
  192. when ?', ?"
  193. tokens << [:open, :symbol]
  194. tokens << [':', :symbol]
  195. match = delim.chr
  196. kind = :delimiter
  197. state = patterns::StringState.new :symbol, delim == ?", match
  198. else
  199. kind = :symbol
  200. end
  201. elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
  202. value_expected = :set
  203. kind = :operator
  204. elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
  205. indented = self[1] == '-'
  206. quote = self[3]
  207. delim = self[quote ? 4 : 2]
  208. kind = patterns::QUOTE_TO_TYPE[quote]
  209. tokens << [:open, kind]
  210. tokens << [match, :delimiter]
  211. match = :close
  212. heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
  213. heredocs ||= [] # create heredocs if empty
  214. heredocs << heredoc
  215. elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
  216. kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
  217. raise_inspect 'Unknown fancy string: %%%p' % k, tokens
  218. end
  219. tokens << [:open, kind]
  220. state = patterns::StringState.new kind, interpreted, self[2]
  221. kind = :delimiter
  222. elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
  223. kind = :integer
  224. elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
  225. value_expected = :set
  226. kind = :operator
  227. elsif match = scan(/`/)
  228. if last_token_dot
  229. kind = :operator
  230. else
  231. tokens << [:open, :shell]
  232. kind = :delimiter
  233. state = patterns::StringState.new :shell, true, match
  234. end
  235. elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
  236. kind = :global_variable
  237. elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
  238. kind = :class_variable
  239. else
  240. kind = :error
  241. match = getch
  242. end
  243. elsif state == :def_expected
  244. state = :initial
  245. if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
  246. kind = :method
  247. else
  248. next
  249. end
  250. elsif state == :undef_expected
  251. state = :undef_comma_expected
  252. if match = scan(/#{patterns::METHOD_NAME_EX}/o)
  253. kind = :method
  254. elsif match = scan(/#{patterns::SYMBOL}/o)
  255. case delim = match[1]
  256. when ?', ?"
  257. tokens << [:open, :symbol]
  258. tokens << [':', :symbol]
  259. match = delim.chr
  260. kind = :delimiter
  261. state = patterns::StringState.new :symbol, delim == ?", match
  262. state.next_state = :undef_comma_expected
  263. else
  264. kind = :symbol
  265. end
  266. else
  267. state = :initial
  268. next
  269. end
  270. elsif state == :undef_comma_expected
  271. if match = scan(/,/)
  272. kind = :operator
  273. state = :undef_expected
  274. else
  275. state = :initial
  276. next
  277. end
  278. elsif state == :module_expected
  279. if match = scan(/<</)
  280. kind = :operator
  281. else
  282. state = :initial
  283. if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
  284. kind = :class
  285. else
  286. next
  287. end
  288. end
  289. end
  290. # }}}
  291. value_expected = value_expected == :set
  292. last_token_dot = last_token_dot == :set
  293. if $DEBUG and not kind
  294. raise_inspect 'Error token %p in line %d' %
  295. [[match, kind], line], tokens, state
  296. end
  297. raise_inspect 'Empty token', tokens unless match
  298. tokens << [match, kind]
  299. if last_state
  300. state = last_state
  301. last_state = nil
  302. end
  303. end
  304. end
  305. inline_block_stack << [state] if state.is_a? patterns::StringState
  306. until inline_block_stack.empty?
  307. this_block = inline_block_stack.pop
  308. tokens << [:close, :inline] if this_block.size > 1
  309. state = this_block.first
  310. tokens << [:close, state.type]
  311. end
  312. tokens
  313. end
  314. end
  315. end
  316. end
  317. # vim:fdm=marker