PageRenderTime 79ms CodeModel.GetById 35ms RepoModel.GetById 0ms app.codeStats 0ms

/vendor/bundler/ruby/1.9.1/gems/coderay-1.0.9/lib/coderay/scanners/python.rb

https://bitbucket.org/toihrk/fusuma
Ruby | 287 lines | 267 code | 15 blank | 5 comment | 1 complexity | c4f76e2ae1afe6bc1db288fed5b9da8b MD5 | raw file
  1. module CodeRay
  2. module Scanners
  3. # Scanner for Python. Supports Python 3.
  4. #
  5. # Based on pygments' PythonLexer, see
  6. # http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
  7. class Python < Scanner
  8. register_for :python
  9. file_extension 'py'
  10. KEYWORDS = [
  11. 'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
  12. 'del', 'elif', 'else', 'except', 'finally', 'for',
  13. 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
  14. 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
  15. 'nonlocal', # new in Python 3
  16. ] # :nodoc:
  17. OLD_KEYWORDS = [
  18. 'exec', 'print', # gone in Python 3
  19. ] # :nodoc:
  20. PREDEFINED_METHODS_AND_TYPES = %w[
  21. __import__ abs all any apply basestring bin bool buffer
  22. bytearray bytes callable chr classmethod cmp coerce compile
  23. complex delattr dict dir divmod enumerate eval execfile exit
  24. file filter float frozenset getattr globals hasattr hash hex id
  25. input int intern isinstance issubclass iter len list locals
  26. long map max min next object oct open ord pow property range
  27. raw_input reduce reload repr reversed round set setattr slice
  28. sorted staticmethod str sum super tuple type unichr unicode
  29. vars xrange zip
  30. ] # :nodoc:
  31. PREDEFINED_EXCEPTIONS = %w[
  32. ArithmeticError AssertionError AttributeError
  33. BaseException DeprecationWarning EOFError EnvironmentError
  34. Exception FloatingPointError FutureWarning GeneratorExit IOError
  35. ImportError ImportWarning IndentationError IndexError KeyError
  36. KeyboardInterrupt LookupError MemoryError NameError
  37. NotImplemented NotImplementedError OSError OverflowError
  38. OverflowWarning PendingDeprecationWarning ReferenceError
  39. RuntimeError RuntimeWarning StandardError StopIteration
  40. SyntaxError SyntaxWarning SystemError SystemExit TabError
  41. TypeError UnboundLocalError UnicodeDecodeError
  42. UnicodeEncodeError UnicodeError UnicodeTranslateError
  43. UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
  44. ] # :nodoc:
  45. PREDEFINED_VARIABLES_AND_CONSTANTS = [
  46. 'False', 'True', 'None', # "keywords" since Python 3
  47. 'self', 'Ellipsis', 'NotImplemented',
  48. ] # :nodoc:
  49. IDENT_KIND = WordList.new(:ident).
  50. add(KEYWORDS, :keyword).
  51. add(OLD_KEYWORDS, :old_keyword).
  52. add(PREDEFINED_METHODS_AND_TYPES, :predefined).
  53. add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
  54. add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
  55. NAME = / [[:alpha:]_] \w* /x # :nodoc:
  56. ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
  57. UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc:
  58. OPERATOR = /
  59. \.\.\. | # ellipsis
  60. \.(?!\d) | # dot but not decimal point
  61. [,;:()\[\]{}] | # simple delimiters
  62. \/\/=? | \*\*=? | # special math
  63. [-+*\/%&|^]=? | # ordinary math and binary logic
  64. [~`] | # binary complement and inspection
  65. <<=? | >>=? | [<>=]=? | != # comparison and assignment
  66. /x # :nodoc:
  67. STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
  68. h[delimiter] = Regexp.union delimiter # :nodoc:
  69. }
  70. STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
  71. h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc:
  72. }
  73. DEF_NEW_STATE = WordList.new(:initial).
  74. add(%w(def), :def_expected).
  75. add(%w(import from), :include_expected).
  76. add(%w(class), :class_expected) # :nodoc:
  77. DESCRIPTOR = /
  78. #{NAME}
  79. (?: \. #{NAME} )*
  80. | \*
  81. /x # :nodoc:
  82. DOCSTRING_COMING = /
  83. [ \t]* u?r? ("""|''')
  84. /x # :nodoc:
  85. protected
  86. def scan_tokens encoder, options
  87. state = :initial
  88. string_delimiter = nil
  89. string_raw = false
  90. string_type = nil
  91. docstring_coming = match?(/#{DOCSTRING_COMING}/o)
  92. last_token_dot = false
  93. unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
  94. from_import_state = []
  95. until eos?
  96. if state == :string
  97. if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
  98. encoder.text_token match, :delimiter
  99. encoder.end_group string_type
  100. string_type = nil
  101. state = :initial
  102. next
  103. elsif string_delimiter.size == 3 && match = scan(/\n/)
  104. encoder.text_token match, :content
  105. elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
  106. encoder.text_token match, :content
  107. elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
  108. encoder.text_token match, :char
  109. elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
  110. encoder.text_token match, :char
  111. elsif match = scan(/ \\ . /x)
  112. encoder.text_token match, :content
  113. elsif match = scan(/ \\ | $ /x)
  114. encoder.end_group string_type
  115. string_type = nil
  116. encoder.text_token match, :error
  117. state = :initial
  118. else
  119. raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
  120. end
  121. elsif match = scan(/ [ \t]+ | \\?\n /x)
  122. encoder.text_token match, :space
  123. if match == "\n"
  124. state = :initial if state == :include_expected
  125. docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
  126. end
  127. next
  128. elsif match = scan(/ \# [^\n]* /mx)
  129. encoder.text_token match, :comment
  130. next
  131. elsif state == :initial
  132. if match = scan(/#{OPERATOR}/o)
  133. encoder.text_token match, :operator
  134. elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
  135. string_delimiter = self[2]
  136. string_type = docstring_coming ? :docstring : :string
  137. docstring_coming = false if docstring_coming
  138. encoder.begin_group string_type
  139. string_raw = false
  140. modifiers = self[1]
  141. unless modifiers.empty?
  142. string_raw = !!modifiers.index(?r)
  143. encoder.text_token modifiers, :modifier
  144. match = string_delimiter
  145. end
  146. state = :string
  147. encoder.text_token match, :delimiter
  148. # TODO: backticks
  149. elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
  150. kind = IDENT_KIND[match]
  151. # TODO: keyword arguments
  152. kind = :ident if last_token_dot
  153. if kind == :old_keyword
  154. kind = check(/\(/) ? :ident : :keyword
  155. elsif kind == :predefined && check(/ *=/)
  156. kind = :ident
  157. elsif kind == :keyword
  158. state = DEF_NEW_STATE[match]
  159. from_import_state << match.to_sym if state == :include_expected
  160. end
  161. encoder.text_token match, kind
  162. elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
  163. encoder.text_token match, :decorator
  164. elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
  165. encoder.text_token match, :hex
  166. elsif match = scan(/0[bB][01]+[lL]?/)
  167. encoder.text_token match, :binary
  168. elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
  169. if scan(/[jJ]/)
  170. match << matched
  171. encoder.text_token match, :imaginary
  172. else
  173. encoder.text_token match, :float
  174. end
  175. elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
  176. encoder.text_token match, :octal
  177. elsif match = scan(/\d+([lL])?/)
  178. if self[1] == nil && scan(/[jJ]/)
  179. match << matched
  180. encoder.text_token match, :imaginary
  181. else
  182. encoder.text_token match, :integer
  183. end
  184. else
  185. encoder.text_token getch, :error
  186. end
  187. elsif state == :def_expected
  188. state = :initial
  189. if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
  190. encoder.text_token match, :method
  191. else
  192. next
  193. end
  194. elsif state == :class_expected
  195. state = :initial
  196. if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
  197. encoder.text_token match, :class
  198. else
  199. next
  200. end
  201. elsif state == :include_expected
  202. if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
  203. if match == 'as'
  204. encoder.text_token match, :keyword
  205. from_import_state << :as
  206. elsif from_import_state.first == :from && match == 'import'
  207. encoder.text_token match, :keyword
  208. from_import_state << :import
  209. elsif from_import_state.last == :as
  210. # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
  211. encoder.text_token match, :ident
  212. from_import_state.pop
  213. elsif IDENT_KIND[match] == :keyword
  214. unscan
  215. match = nil
  216. state = :initial
  217. next
  218. else
  219. encoder.text_token match, :include
  220. end
  221. elsif match = scan(/,/)
  222. from_import_state.pop if from_import_state.last == :as
  223. encoder.text_token match, :operator
  224. else
  225. from_import_state = []
  226. state = :initial
  227. next
  228. end
  229. else
  230. raise_inspect 'Unknown state', encoder, state
  231. end
  232. last_token_dot = match == '.'
  233. end
  234. if state == :string
  235. encoder.end_group string_type
  236. end
  237. encoder
  238. end
  239. end
  240. end
  241. end