/lib/coderay/scanners/ruby.rb
Ruby | 470 lines | 398 code | 54 blank | 18 comment | 74 complexity | 034a4d14bfdededa9ea8fe2e92ef5e89 MD5 | raw file
- module CodeRay
- module Scanners
-
- # This scanner is really complex, since Ruby _is_ a complex language!
- #
- # It tries to highlight 100% of all common code,
- # and 90% of strange codes.
- #
- # It is optimized for HTML highlighting, and is not very useful for
- # parsing or pretty printing.
- class Ruby < Scanner
-
- register_for :ruby
- file_extension 'rb'
-
- autoload :Patterns, CodeRay.coderay_path('scanners', 'ruby', 'patterns')
- autoload :StringState, CodeRay.coderay_path('scanners', 'ruby', 'string_state')
-
- def interpreted_string_state
- StringState.new :string, true, '"'
- end
-
- protected
-
- def setup
- @state = :initial
- end
-
- def scan_tokens encoder, options
- state, heredocs = options[:state] || @state
- heredocs = heredocs.dup if heredocs.is_a?(Array)
-
- if state && state.instance_of?(StringState)
- encoder.begin_group state.type
- end
-
- last_state = nil
-
- method_call_expected = false
- value_expected = true
-
- inline_block_stack = nil
- inline_block_curly_depth = 0
-
- if heredocs
- state = heredocs.shift
- encoder.begin_group state.type
- heredocs = nil if heredocs.empty?
- end
-
- # def_object_stack = nil
- # def_object_paren_depth = 0
-
- patterns = Patterns # avoid constant lookup
-
- unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
-
- until eos?
-
- if state.instance_of? ::Symbol
-
- if match = scan(/[ \t\f\v]+/)
- encoder.text_token match, :space
-
- elsif match = scan(/\n/)
- if heredocs
- unscan # heredoc scanning needs \n at start
- state = heredocs.shift
- encoder.begin_group state.type
- heredocs = nil if heredocs.empty?
- else
- state = :initial if state == :undef_comma_expected
- encoder.text_token match, :space
- value_expected = true
- end
-
- elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
- encoder.text_token match, self[1] ? :doctype : :comment
-
- elsif match = scan(/\\\n/)
- if heredocs
- unscan # heredoc scanning needs \n at start
- encoder.text_token scan(/\\/), :space
- state = heredocs.shift
- encoder.begin_group state.type
- heredocs = nil if heredocs.empty?
- else
- encoder.text_token match, :space
- end
-
- elsif state == :initial
-
- # IDENTS #
- if !method_call_expected &&
- match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
- /#{patterns::METHOD_NAME}/o)
-
- kind = patterns::IDENT_KIND[match]
- if value_expected != :colon_expected && scan(/:(?!:)/)
- value_expected = true
- encoder.text_token match, :key
- encoder.text_token ':', :operator
- else
- value_expected = false
- if kind == :ident
- if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
- kind = :constant
- end
- elsif kind == :keyword
- state = patterns::KEYWORD_NEW_STATE[match]
- if patterns::KEYWORDS_EXPECTING_VALUE[match]
- value_expected = match == 'when' ? :colon_expected : true
- end
- end
- value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
- encoder.text_token match, kind
- end
-
- elsif method_call_expected &&
- match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
- /#{patterns::METHOD_AFTER_DOT}/o)
- if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
- encoder.text_token match, :constant
- else
- encoder.text_token match, :ident
- end
- method_call_expected = false
- value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
-
- # OPERATORS #
- elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | ( \.\.\.? | ==?=? | [,\(\[\{] ) | [\)\]\}] /x)
- method_call_expected = self[1]
- value_expected = !method_call_expected && !!self[2]
- if inline_block_stack
- case match
- when '{'
- inline_block_curly_depth += 1
- when '}'
- inline_block_curly_depth -= 1
- if inline_block_curly_depth == 0 # closing brace of inline block reached
- state, inline_block_curly_depth, heredocs = inline_block_stack.pop
- inline_block_stack = nil if inline_block_stack.empty?
- heredocs = nil if heredocs && heredocs.empty?
- encoder.text_token match, :inline_delimiter
- encoder.end_group :inline
- next
- end
- end
- end
- encoder.text_token match, :operator
-
- elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
- /#{patterns::SYMBOL}/o)
- case delim = match[1]
- when ?', ?"
- encoder.begin_group :symbol
- encoder.text_token ':', :symbol
- match = delim.chr
- encoder.text_token match, :delimiter
- state = self.class::StringState.new :symbol, delim == ?", match
- else
- encoder.text_token match, :symbol
- value_expected = false
- end
-
- elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
- encoder.begin_group :string
- if match.size == 1
- encoder.text_token match, :delimiter
- state = self.class::StringState.new :string, match == '"', match # important for streaming
- else
- encoder.text_token match[0,1], :delimiter
- encoder.text_token match[1..-2], :content if match.size > 2
- encoder.text_token match[-1,1], :delimiter
- encoder.end_group :string
- value_expected = false
- end
-
- elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
- /#{patterns::INSTANCE_VARIABLE}/o)
- value_expected = false
- encoder.text_token match, :instance_variable
-
- elsif value_expected && match = scan(/\//)
- encoder.begin_group :regexp
- encoder.text_token match, :delimiter
- state = self.class::StringState.new :regexp, true, '/'
-
- elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
- if method_call_expected
- encoder.text_token match, :error
- method_call_expected = false
- else
- encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
- end
- value_expected = false
-
- elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
- value_expected = true
- encoder.text_token match, :operator
-
- elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
- quote = self[3]
- delim = self[quote ? 4 : 2]
- kind = patterns::QUOTE_TO_TYPE[quote]
- encoder.begin_group kind
- encoder.text_token match, :delimiter
- encoder.end_group kind
- heredocs ||= [] # create heredocs if empty
- heredocs << self.class::StringState.new(kind, quote != "'", delim,
- self[1] == '-' ? :indented : :linestart)
- value_expected = false
-
- elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
- kind = patterns::FANCY_STRING_KIND[self[1]]
- encoder.begin_group kind
- state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
- encoder.text_token match, :delimiter
-
- elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
- value_expected = false
- encoder.text_token match, :integer
-
- elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
- value_expected = match == '?' ? :colon_expected : true
- encoder.text_token match, :operator
-
- elsif match = scan(/`/)
- encoder.begin_group :shell
- encoder.text_token match, :delimiter
- state = self.class::StringState.new :shell, true, match
-
- elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
- /#{patterns::GLOBAL_VARIABLE}/o)
- encoder.text_token match, :global_variable
- value_expected = false
-
- elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
- /#{patterns::CLASS_VARIABLE}/o)
- encoder.text_token match, :class_variable
- value_expected = false
-
- elsif match = scan(/\\\z/)
- encoder.text_token match, :space
-
- else
- if method_call_expected
- method_call_expected = false
- next
- end
- unless unicode
- # check for unicode
- $DEBUG_BEFORE, $DEBUG = $DEBUG, false
- begin
- if check(/./mu).size > 1
- # seems like we should try again with unicode
- unicode = true
- end
- rescue
- # bad unicode char; use getch
- ensure
- $DEBUG = $DEBUG_BEFORE
- end
- next if unicode
- end
-
- encoder.text_token getch, :error
-
- end
-
- if last_state
- state = last_state unless state.is_a?(StringState) # otherwise, a simple 'def"' results in unclosed tokens
- last_state = nil
- end
-
- elsif state == :def_expected
- if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
- /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
- encoder.text_token match, :method
- state = :initial
- else
- last_state = :dot_expected
- state = :initial
- end
-
- elsif state == :dot_expected
- if match = scan(/\.|::/)
- # invalid definition
- state = :def_expected
- encoder.text_token match, :operator
- else
- state = :initial
- end
-
- elsif state == :module_expected
- if match = scan(/<</)
- encoder.text_token match, :operator
- else
- state = :initial
- if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
- / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
- encoder.text_token match, :class
- end
- end
-
- elsif state == :undef_expected
- state = :undef_comma_expected
- if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
- /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
- encoder.text_token match, :method
- elsif match = scan(/#{patterns::SYMBOL}/o)
- case delim = match[1]
- when ?', ?"
- encoder.begin_group :symbol
- encoder.text_token ':', :symbol
- match = delim.chr
- encoder.text_token match, :delimiter
- state = self.class::StringState.new :symbol, delim == ?", match
- state.next_state = :undef_comma_expected
- else
- encoder.text_token match, :symbol
- end
- else
- state = :initial
- end
-
- elsif state == :undef_comma_expected
- if match = scan(/,/)
- encoder.text_token match, :operator
- state = :undef_expected
- else
- state = :initial
- end
-
- elsif state == :alias_expected
- match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
- /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
-
- if match
- encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
- encoder.text_token self[2], :space
- encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
- end
- state = :initial
-
- else
- #:nocov:
- raise_inspect 'Unknown state: %p' % [state], encoder
- #:nocov:
- end
-
- else # StringState
-
- match = scan_until(state.pattern) || scan_rest
- unless match.empty?
- encoder.text_token match, :content
- break if eos?
- end
-
- if state.heredoc && self[1] # end of heredoc
- match = getch
- match << scan_until(/$/) unless eos?
- encoder.text_token match, :delimiter unless match.empty?
- encoder.end_group state.type
- state = state.next_state
- next
- end
-
- case match = getch
-
- when state.delim
- if state.paren_depth
- state.paren_depth -= 1
- if state.paren_depth > 0
- encoder.text_token match, :content
- next
- end
- end
- encoder.text_token match, :delimiter
- if state.type == :regexp && !eos?
- match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
- encoder.text_token match, :modifier unless match.empty?
- end
- encoder.end_group state.type
- value_expected = false
- state = state.next_state
-
- when '\\'
- if state.interpreted
- if esc = scan(/#{patterns::ESCAPE}/o)
- encoder.text_token match + esc, :char
- else
- encoder.text_token match, :error
- end
- else
- case esc = getch
- when nil
- encoder.text_token match, :content
- when state.delim, '\\'
- encoder.text_token match + esc, :char
- else
- encoder.text_token match + esc, :content
- end
- end
-
- when '#'
- case peek(1)
- when '{'
- inline_block_stack ||= []
- inline_block_stack << [state, inline_block_curly_depth, heredocs]
- value_expected = true
- state = :initial
- inline_block_curly_depth = 1
- encoder.begin_group :inline
- encoder.text_token match + getch, :inline_delimiter
- when '$', '@'
- encoder.text_token match, :escape
- last_state = state
- state = :initial
- else
- #:nocov:
- raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
- #:nocov:
- end
-
- when state.opening_paren
- state.paren_depth += 1
- encoder.text_token match, :content
-
- else
- #:nocov
- raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
- #:nocov:
-
- end
-
- end
-
- end
-
- # cleaning up
- if state.is_a? StringState
- encoder.end_group state.type
- end
-
- if options[:keep_state]
- if state.is_a?(StringState) && state.heredoc
- (heredocs ||= []).unshift state
- state = :initial
- elsif heredocs && heredocs.empty?
- heredocs = nil
- end
- @state = state, heredocs
- end
-
- if inline_block_stack
- until inline_block_stack.empty?
- state, = *inline_block_stack.pop
- encoder.end_group :inline
- encoder.end_group state.type
- end
- end
-
- encoder
- end
-
- end
-
- end
- end