PageRenderTime 58ms CodeModel.GetById 29ms RepoModel.GetById 1ms app.codeStats 0ms

/gluon/highlight.py

https://code.google.com/p/web2py/
Python | 345 lines | 309 code | 13 blank | 23 comment | 22 complexity | 9e11c746054f8cbffd3789758972b32a MD5 | raw file
Possible License(s): LGPL-2.1, BSD-2-Clause, MIT, BSD-3-Clause, Apache-2.0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. | This file is part of the web2py Web Framework
  5. | Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
  6. | License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
  7. """
  8. import re
  9. import cgi
  10. __all__ = ['highlight']
  11. class Highlighter(object):
  12. """Does syntax highlighting.
  13. """
  14. def __init__(
  15. self,
  16. mode,
  17. link=None,
  18. styles=None,
  19. ):
  20. """
  21. Initialize highlighter:
  22. mode = language (PYTHON, WEB2PY,C, CPP, HTML, HTML_PLAIN)
  23. """
  24. styles = styles or {}
  25. mode = mode.upper()
  26. if link and link[-1] != '/':
  27. link = link + '/'
  28. self.link = link
  29. self.styles = styles
  30. self.output = []
  31. self.span_style = None
  32. if mode == 'WEB2PY':
  33. (mode, self.suppress_tokens) = ('PYTHON', [])
  34. elif mode == 'PYTHON':
  35. self.suppress_tokens = ['GOTOHTML']
  36. elif mode == 'CPP':
  37. (mode, self.suppress_tokens) = ('C', [])
  38. elif mode == 'C':
  39. self.suppress_tokens = ['CPPKEYWORD']
  40. elif mode == 'HTML_PLAIN':
  41. (mode, self.suppress_tokens) = ('HTML', ['GOTOPYTHON'])
  42. elif mode == 'HTML':
  43. self.suppress_tokens = []
  44. else:
  45. raise SyntaxError('Unknown mode: %s' % mode)
  46. self.mode = mode
  47. def c_tokenizer(
  48. self,
  49. token,
  50. match,
  51. style,
  52. ):
  53. """
  54. Callback for C specific highlighting.
  55. """
  56. value = cgi.escape(match.group())
  57. self.change_style(token, style)
  58. self.output.append(value)
  59. def python_tokenizer(
  60. self,
  61. token,
  62. match,
  63. style,
  64. ):
  65. """
  66. Callback for python specific highlighting.
  67. """
  68. value = cgi.escape(match.group())
  69. if token == 'MULTILINESTRING':
  70. self.change_style(token, style)
  71. self.output.append(value)
  72. self.strMultilineString = match.group(1)
  73. return 'PYTHONMultilineString'
  74. elif token == 'ENDMULTILINESTRING':
  75. if match.group(1) == self.strMultilineString:
  76. self.output.append(value)
  77. self.strMultilineString = ''
  78. return 'PYTHON'
  79. if style and style[:5] == 'link:':
  80. self.change_style(None, None)
  81. (url, style) = style[5:].split(';', 1)
  82. if url == 'None' or url == '':
  83. self.output.append('<span style="%s">%s</span>'
  84. % (style, value))
  85. else:
  86. self.output.append('<a href="%s%s" style="%s">%s</a>'
  87. % (url, value, style, value))
  88. else:
  89. self.change_style(token, style)
  90. self.output.append(value)
  91. if token == 'GOTOHTML':
  92. return 'HTML'
  93. return None
  94. def html_tokenizer(
  95. self,
  96. token,
  97. match,
  98. style,
  99. ):
  100. """
  101. Callback for HTML specific highlighting.
  102. """
  103. value = cgi.escape(match.group())
  104. self.change_style(token, style)
  105. self.output.append(value)
  106. if token == 'GOTOPYTHON':
  107. return 'PYTHON'
  108. return None
  109. all_styles = {
  110. 'C': (c_tokenizer, (
  111. ('COMMENT', re.compile(r'//.*\r?\n'),
  112. 'color: green; font-style: italic'),
  113. ('MULTILINECOMMENT', re.compile(r'/\*.*?\*/', re.DOTALL),
  114. 'color: green; font-style: italic'),
  115. ('PREPROCESSOR', re.compile(r'\s*#.*?[^\\]\s*\n',
  116. re.DOTALL), 'color: magenta; font-style: italic'),
  117. ('PUNC', re.compile(r'[-+*!&|^~/%\=<>\[\]{}(),.:]'),
  118. 'font-weight: bold'),
  119. ('NUMBER',
  120. re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
  121. 'color: red'),
  122. ('KEYWORD', re.compile(r'(sizeof|int|long|short|char|void|'
  123. + r'signed|unsigned|float|double|'
  124. + r'goto|break|return|continue|asm|'
  125. + r'case|default|if|else|switch|while|for|do|'
  126. + r'struct|union|enum|typedef|'
  127. + r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'),
  128. 'color:#185369; font-weight: bold'),
  129. ('CPPKEYWORD',
  130. re.compile(r'(class|private|protected|public|template|new|delete|'
  131. + r'this|friend|using|inline|export|bool|throw|try|catch|'
  132. + r'operator|typeid|virtual)(?![a-zA-Z0-9_])'),
  133. 'color: blue; font-weight: bold'),
  134. ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'),
  135. 'color: #FF9966'),
  136. ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
  137. None),
  138. ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
  139. )),
  140. 'PYTHON': (python_tokenizer, (
  141. ('GOTOHTML', re.compile(r'\}\}'), 'color: red'),
  142. ('PUNC', re.compile(r'[-+*!|&^~/%\=<>\[\]{}(),.:]'),
  143. 'font-weight: bold'),
  144. ('NUMBER',
  145. re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'
  146. ), 'color: red'),
  147. ('KEYWORD',
  148. re.compile(r'(def|class|break|continue|del|exec|finally|pass|'
  149. + r'print|raise|return|try|except|global|assert|lambda|'
  150. + r'yield|for|while|if|elif|else|and|in|is|not|or|import|'
  151. + r'from|True|False)(?![a-zA-Z0-9_])'),
  152. 'color:#185369; font-weight: bold'),
  153. ('WEB2PY',
  154. re.compile(r'(request|response|session|cache|redirect|local_import|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CAT|CENTER|CODE|COL|COLGROUP|DIV|EM|EMBED|FIELDSET|LEGEND|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|I|IMG|INPUT|LABEL|LI|LINK|MARKMIN|MENU|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|THEAD|TBODY|TFOOT|TAG|TD|TEXTAREA|TH|TITLE|TT|T|UL|XHTML|IS_SLUG|IS_STRONG|IS_LOWER|IS_UPPER|IS_ALPHANUMERIC|IS_DATETIME|IS_DATETIME_IN_RANGE|IS_DATE|IS_DATE_IN_RANGE|IS_DECIMAL_IN_RANGE|IS_EMAIL|IS_EXPR|IS_FLOAT_IN_RANGE|IS_IMAGE|IS_INT_IN_RANGE|IS_IN_SET|IS_IPV4|IS_LIST_OF|IS_LENGTH|IS_MATCH|IS_EQUAL_TO|IS_EMPTY_OR|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_UPLOAD_FILENAME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|DAL|Field|SQLFORM|SQLTABLE|xmlescape|embed64)(?![a-zA-Z0-9_])'
  155. ), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'),
  156. ('MAGIC', re.compile(r'self|None'),
  157. 'color:#185369; font-weight: bold'),
  158. ('MULTILINESTRING', re.compile(r'r?u?(\'\'\'|""")'),
  159. 'color: #FF9966'),
  160. ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'
  161. ), 'color: #FF9966'),
  162. ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
  163. None),
  164. ('COMMENT', re.compile(r'\#.*\r?\n'),
  165. 'color: green; font-style: italic'),
  166. ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
  167. )),
  168. 'PYTHONMultilineString': (python_tokenizer,
  169. (('ENDMULTILINESTRING',
  170. re.compile(r'.*?("""|\'\'\')',
  171. re.DOTALL), 'color: darkred'), )),
  172. 'HTML': (html_tokenizer, (
  173. ('GOTOPYTHON', re.compile(r'\{\{'), 'color: red'),
  174. ('COMMENT', re.compile(r'<!--[^>]*-->|<!>'),
  175. 'color: green; font-style: italic'),
  176. ('XMLCRAP', re.compile(r'<![^>]*>'),
  177. 'color: blue; font-style: italic'),
  178. ('SCRIPT', re.compile(r'<script .*?</script>', re.IGNORECASE
  179. + re.DOTALL), 'color: black'),
  180. ('TAG', re.compile(r'</?\s*[a-zA-Z0-9]+'),
  181. 'color: darkred; font-weight: bold'),
  182. ('ENDTAG', re.compile(r'/?>'),
  183. 'color: darkred; font-weight: bold'),
  184. )),
  185. }
  186. def highlight(self, data):
  187. """
  188. Syntax highlight some python code.
  189. Returns html version of code.
  190. """
  191. i = 0
  192. mode = self.mode
  193. while i < len(data):
  194. for (token, o_re, style) in Highlighter.all_styles[mode][1]:
  195. if not token in self.suppress_tokens:
  196. match = o_re.match(data, i)
  197. if match:
  198. if style:
  199. new_mode = \
  200. Highlighter.all_styles[mode][0](self,
  201. token, match, style
  202. % dict(link=self.link))
  203. else:
  204. new_mode = \
  205. Highlighter.all_styles[mode][0](self,
  206. token, match, style)
  207. if not new_mode is None:
  208. mode = new_mode
  209. i += max(1, len(match.group()))
  210. break
  211. else:
  212. self.change_style(None, None)
  213. self.output.append(data[i])
  214. i += 1
  215. self.change_style(None, None)
  216. return ''.join(self.output).expandtabs(4)
  217. def change_style(self, token, style):
  218. """
  219. Generate output to change from existing style to another style only.
  220. """
  221. if token in self.styles:
  222. style = self.styles[token]
  223. if self.span_style != style:
  224. if style != 'Keep':
  225. if not self.span_style is None:
  226. self.output.append('</span>')
  227. if not style is None:
  228. self.output.append('<span style="%s">' % style)
  229. self.span_style = style
  230. def highlight(
  231. code,
  232. language,
  233. link='/examples/globals/vars/',
  234. counter=1,
  235. styles=None,
  236. highlight_line=None,
  237. context_lines=None,
  238. attributes=None,
  239. ):
  240. styles = styles or {}
  241. attributes = attributes or {}
  242. if not 'CODE' in styles:
  243. code_style = """
  244. font-size: 11px;
  245. font-family: Bitstream Vera Sans Mono,monospace;
  246. background-color: transparent;
  247. margin: 0;
  248. padding: 5px;
  249. border: none;
  250. overflow: auto;
  251. white-space: pre !important;\n"""
  252. else:
  253. code_style = styles['CODE']
  254. if not 'LINENUMBERS' in styles:
  255. linenumbers_style = """
  256. font-size: 11px;
  257. font-family: Bitstream Vera Sans Mono,monospace;
  258. background-color: transparent;
  259. margin: 0;
  260. padding: 5px;
  261. border: none;
  262. color: #A0A0A0;\n"""
  263. else:
  264. linenumbers_style = styles['LINENUMBERS']
  265. if not 'LINEHIGHLIGHT' in styles:
  266. linehighlight_style = "background-color: #EBDDE2;"
  267. else:
  268. linehighlight_style = styles['LINEHIGHLIGHT']
  269. if language and language.upper() in ['PYTHON', 'C', 'CPP', 'HTML',
  270. 'WEB2PY']:
  271. code = Highlighter(language, link, styles).highlight(code)
  272. else:
  273. code = cgi.escape(code)
  274. lines = code.split('\n')
  275. if counter is None:
  276. linenumbers = [''] * len(lines)
  277. elif isinstance(counter, str):
  278. linenumbers = [cgi.escape(counter)] * len(lines)
  279. else:
  280. linenumbers = [str(i + counter) + '.' for i in
  281. xrange(len(lines))]
  282. if highlight_line:
  283. if counter and not isinstance(counter, str):
  284. lineno = highlight_line - counter
  285. else:
  286. lineno = highlight_line
  287. if lineno < len(lines):
  288. lines[lineno] = '<div style="%s">%s</div>' % (
  289. linehighlight_style, lines[lineno])
  290. linenumbers[lineno] = '<div style="%s">%s</div>' % (
  291. linehighlight_style, linenumbers[lineno])
  292. if context_lines:
  293. if lineno + context_lines < len(lines):
  294. del lines[lineno + context_lines:]
  295. del linenumbers[lineno + context_lines:]
  296. if lineno - context_lines > 0:
  297. del lines[0:lineno - context_lines]
  298. del linenumbers[0:lineno - context_lines]
  299. code = '<br/>'.join(lines)
  300. numbers = '<br/>'.join(linenumbers)
  301. items = attributes.items()
  302. fa = ' '.join([key[1:].lower() for (key, value) in items if key[:1]
  303. == '_' and value is None] + ['%s="%s"'
  304. % (key[1:].lower(), str(value).replace('"', "'"))
  305. for (key, value) in attributes.items() if key[:1]
  306. == '_' and value])
  307. if fa:
  308. fa = ' ' + fa
  309. return '<table%s><tr style="vertical-align:top;"><td style="min-width:40px; text-align: right;"><pre style="%s">%s</pre></td><td><pre style="%s">%s</pre></td></tr></table>'\
  310. % (fa, linenumbers_style, numbers, code_style, code)
  311. if __name__ == '__main__':
  312. import sys
  313. argfp = open(sys.argv[1])
  314. data = argfp.read()
  315. argfp.close()
  316. print '<html><body>' + highlight(data, sys.argv[2])\
  317. + '</body></html>'