PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/gluon/highlight.py

http://github.com/web2py/web2py
Python | 363 lines | 317 code | 18 blank | 28 comment | 20 complexity | 64ff5203a40bfd20c8484cd9a758f074 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, BSD-2-Clause, MPL-2.0-no-copyleft-exception, Apache-2.0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. | This file is part of the web2py Web Framework
  5. | Copyrighted by Massimo Di Pierro <mdipierro@cs.depaul.edu>
  6. | License: LGPLv3 (http://www.gnu.org/licenses/lgpl.html)
  7. """
  8. from pydal._compat import xrange
  9. from yatl.sanitizer import xmlescape
  10. import re
  11. __all__ = ['highlight']
  12. class all_styles(object):
  13. """
  14. Custom non-data descriptor for lazy initialization of
  15. Highlighter.all_styles class attribute.
  16. see:
  17. https://docs.python.org/2/reference/datamodel.html#implementing-descriptors
  18. or
  19. https://docs.python.org/3/reference/datamodel.html#implementing-descriptors
  20. """
  21. def __get__(self, instance, owner):
  22. val = _get_all_styles(owner)
  23. setattr(owner, 'all_styles', val)
  24. return val
  25. class Highlighter(object):
  26. """Does syntax highlighting.
  27. """
  28. def __init__(
  29. self,
  30. mode,
  31. link=None,
  32. styles=None,
  33. ):
  34. """
  35. Initialize highlighter:
  36. mode = language (PYTHON, WEB2PY, C, CPP, HTML, HTML_PLAIN)
  37. """
  38. styles = styles or {}
  39. mode = mode.upper()
  40. if link and link[-1] != '/':
  41. link = link + '/'
  42. self.link = link
  43. self.styles = styles
  44. self.output = []
  45. self.span_style = None
  46. if mode == 'WEB2PY':
  47. (mode, self.suppress_tokens) = ('PYTHON', [])
  48. elif mode == 'PYTHON':
  49. self.suppress_tokens = ['GOTOHTML']
  50. elif mode == 'CPP':
  51. (mode, self.suppress_tokens) = ('C', [])
  52. elif mode == 'C':
  53. self.suppress_tokens = ['CPPKEYWORD']
  54. elif mode == 'HTML_PLAIN':
  55. (mode, self.suppress_tokens) = ('HTML', ['GOTOPYTHON'])
  56. elif mode == 'HTML':
  57. self.suppress_tokens = []
  58. else:
  59. raise SyntaxError('Unknown mode: %s' % mode)
  60. self.mode = mode
  61. def c_tokenizer(
  62. self,
  63. token,
  64. match,
  65. style,
  66. ):
  67. """
  68. Callback for C specific highlighting.
  69. """
  70. value = xmlescape(match.group(), quote=False)
  71. self.change_style(token, style)
  72. self.output.append(value)
  73. def python_tokenizer(
  74. self,
  75. token,
  76. match,
  77. style,
  78. ):
  79. """
  80. Callback for python specific highlighting.
  81. """
  82. value = xmlescape(match.group(), quote=False)
  83. if token == 'MULTILINESTRING':
  84. self.change_style(token, style)
  85. self.output.append(value)
  86. self.strMultilineString = match.group(1)
  87. return 'PYTHONMultilineString'
  88. elif token == 'ENDMULTILINESTRING':
  89. if match.group(1) == self.strMultilineString:
  90. self.output.append(value)
  91. self.strMultilineString = ''
  92. return 'PYTHON'
  93. if style and style[:5] == 'link:':
  94. self.change_style(None, None)
  95. (url, style) = style[5:].split(';', 1)
  96. if url == 'None' or url == '':
  97. self.output.append('<span style="%s">%s</span>'
  98. % (style, value))
  99. else:
  100. self.output.append('<a href="%s%s" style="%s">%s</a>'
  101. % (url, value, style, value))
  102. else:
  103. self.change_style(token, style)
  104. self.output.append(value)
  105. if token == 'GOTOHTML':
  106. return 'HTML'
  107. return None
  108. def html_tokenizer(
  109. self,
  110. token,
  111. match,
  112. style,
  113. ):
  114. """
  115. Callback for HTML specific highlighting.
  116. """
  117. value = xmlescape(match.group(), quote=False)
  118. self.change_style(token, style)
  119. self.output.append(value)
  120. if token == 'GOTOPYTHON':
  121. return 'PYTHON'
  122. return None
  123. all_styles = all_styles()
  124. def highlight(self, data):
  125. """
  126. Syntax highlight some python code.
  127. Returns html version of code.
  128. """
  129. i = 0
  130. mode = self.mode
  131. while i < len(data):
  132. for (token, o_re, style) in Highlighter.all_styles[mode][1]:
  133. if token not in self.suppress_tokens:
  134. match = o_re.match(data, i)
  135. if match:
  136. if style:
  137. new_mode = \
  138. Highlighter.all_styles[mode][0](self,
  139. token, match, style
  140. % dict(link=self.link))
  141. else:
  142. new_mode = \
  143. Highlighter.all_styles[mode][0](self,
  144. token, match, style)
  145. if new_mode is not None:
  146. mode = new_mode
  147. i += max(1, len(match.group()))
  148. break
  149. else:
  150. self.change_style(None, None)
  151. self.output.append(data[i])
  152. i += 1
  153. self.change_style(None, None)
  154. return ''.join(self.output).expandtabs(4)
  155. def change_style(self, token, style):
  156. """
  157. Generate output to change from existing style to another style only.
  158. """
  159. if token in self.styles:
  160. style = self.styles[token]
  161. if self.span_style != style:
  162. if style != 'Keep':
  163. if self.span_style is not None:
  164. self.output.append('</span>')
  165. if style is not None:
  166. self.output.append('<span style="%s">' % style)
  167. self.span_style = style
  168. def _get_all_styles(cls):
  169. return {
  170. 'C': (cls.c_tokenizer, (
  171. ('COMMENT', re.compile(r'//.*\r?\n'),
  172. 'color: green; font-style: italic'),
  173. ('MULTILINECOMMENT', re.compile(r'/\*.*?\*/', re.DOTALL),
  174. 'color: green; font-style: italic'),
  175. ('PREPROCESSOR', re.compile(r'\s*#.*?[^\\]\s*\n',
  176. re.DOTALL), 'color: magenta; font-style: italic'),
  177. ('PUNC', re.compile(r'[-+*!&|^~/%\=<>\[\]{}(),.:]'),
  178. 'font-weight: bold'),
  179. ('NUMBER',
  180. re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'),
  181. 'color: red'),
  182. ('KEYWORD', re.compile(r'(sizeof|int|long|short|char|void|'
  183. + r'signed|unsigned|float|double|'
  184. + r'goto|break|return|continue|asm|'
  185. + r'case|default|if|else|switch|while|for|do|'
  186. + r'struct|union|enum|typedef|'
  187. + r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'),
  188. 'color:#185369; font-weight: bold'),
  189. ('CPPKEYWORD',
  190. re.compile(r'(class|private|protected|public|template|new|delete|'
  191. + r'this|friend|using|inline|export|bool|throw|try|catch|'
  192. + r'operator|typeid|virtual)(?![a-zA-Z0-9_])'),
  193. 'color: blue; font-weight: bold'),
  194. ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'),
  195. 'color: #FF9966'),
  196. ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
  197. None),
  198. ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
  199. )),
  200. 'PYTHON': (cls.python_tokenizer, (
  201. ('GOTOHTML', re.compile(r'\}\}'), 'color: red'),
  202. ('PUNC', re.compile(r'[-+*!|&^~/%\=<>\[\]{}(),.:]'),
  203. 'font-weight: bold'),
  204. ('NUMBER',
  205. re.compile(r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'
  206. ), 'color: red'),
  207. ('KEYWORD',
  208. re.compile(r'(def|class|break|continue|del|exec|finally|pass|'
  209. + r'print|raise|return|try|except|global|assert|lambda|'
  210. + r'yield|for|while|if|elif|else|and|in|is|not|or|import|'
  211. + r'from|True|False)(?![a-zA-Z0-9_])'),
  212. 'color:#185369; font-weight: bold'),
  213. ('WEB2PY',
  214. re.compile(r'(request|response|session|cache|redirect|local_import|HTTP|TR|XML|URL|BEAUTIFY|A|BODY|BR|B|CAT|CENTER|CODE|COL|COLGROUP|DIV|EM|EMBED|FIELDSET|LEGEND|FORM|H1|H2|H3|H4|H5|H6|IFRAME|HEAD|HR|HTML|I|IMG|INPUT|LABEL|LI|LINK|MARKMIN|MENU|META|OBJECT|OL|ON|OPTION|P|PRE|SCRIPT|SELECT|SPAN|STYLE|TABLE|THEAD|TBODY|TFOOT|TAG|TD|TEXTAREA|TH|TITLE|TT|T|UL|XHTML|IS_SLUG|IS_STRONG|IS_LOWER|IS_UPPER|IS_ALPHANUMERIC|IS_DATETIME|IS_DATETIME_IN_RANGE|IS_DATE|IS_DATE_IN_RANGE|IS_DECIMAL_IN_RANGE|IS_EMAIL|IS_EXPR|IS_FILE|IS_FLOAT_IN_RANGE|IS_IMAGE|IS_INT_IN_RANGE|IS_IN_SET|IS_IPV4|IS_LIST_OF|IS_LENGTH|IS_MATCH|IS_EQUAL_TO|IS_EMPTY_OR|IS_NULL_OR|IS_NOT_EMPTY|IS_TIME|IS_UPLOAD_FILENAME|IS_URL|CLEANUP|CRYPT|IS_IN_DB|IS_NOT_IN_DB|DAL|Field|SQLFORM|SQLTABLE|xmlescape|embed64)(?![a-zA-Z0-9_])'
  215. ), 'link:%(link)s;text-decoration:None;color:#FF5C1F;'),
  216. ('MAGIC', re.compile(r'self|None'),
  217. 'color:#185369; font-weight: bold'),
  218. ('MULTILINESTRING', re.compile(r'r?u?(\'\'\'|""")'),
  219. 'color: #FF9966'),
  220. ('STRING', re.compile(r'r?u?\'(.*?)(?<!\\)\'|"(.*?)(?<!\\)"'
  221. ), 'color: #FF9966'),
  222. ('IDENTIFIER', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*'),
  223. None),
  224. ('COMMENT', re.compile(r'\#.*\r?\n'),
  225. 'color: green; font-style: italic'),
  226. ('WHITESPACE', re.compile(r'[ \r\n]+'), 'Keep'),
  227. )),
  228. 'PYTHONMultilineString': (cls.python_tokenizer,
  229. (('ENDMULTILINESTRING',
  230. re.compile(r'.*?("""|\'\'\')',
  231. re.DOTALL), 'color: darkred'), )),
  232. 'HTML': (cls.html_tokenizer, (
  233. ('GOTOPYTHON', re.compile(r'\{\{'), 'color: red'),
  234. ('COMMENT', re.compile(r'<!--[^>]*-->|<!>'),
  235. 'color: green; font-style: italic'),
  236. ('XMLCRAP', re.compile(r'<![^>]*>'),
  237. 'color: blue; font-style: italic'),
  238. ('SCRIPT', re.compile(r'<script .*?</script>', re.IGNORECASE
  239. + re.DOTALL), 'color: black'),
  240. ('TAG', re.compile(r'</?\s*[a-zA-Z0-9]+'),
  241. 'color: darkred; font-weight: bold'),
  242. ('ENDTAG', re.compile(r'/?>'),
  243. 'color: darkred; font-weight: bold'),
  244. )),
  245. }
  246. def highlight(
  247. code,
  248. language,
  249. link='/examples/globals/vars/',
  250. counter=1,
  251. styles=None,
  252. highlight_line=None,
  253. context_lines=None,
  254. attributes=None,
  255. ):
  256. styles = styles or {}
  257. attributes = attributes or {}
  258. code_style = styles.get('CODE', None) or '''
  259. font-size: 11px;
  260. font-family: Bitstream Vera Sans Mono,monospace;
  261. background-color: transparent;
  262. margin: 0;
  263. padding: 5px;
  264. border: none;
  265. overflow: auto;
  266. white-space: pre !important;
  267. '''
  268. linenumbers_style = styles.get('LINENUMBERS', None) or '''
  269. font-size: 11px;
  270. font-family: Bitstream Vera Sans Mono,monospace;
  271. background-color: transparent;
  272. margin: 0;
  273. padding: 5px;
  274. border: none;
  275. color: #A0A0A0;
  276. '''
  277. linehighlight_style = styles.get('LINEHIGHLIGHT', None) or \
  278. 'background-color: #EBDDE2;'
  279. if language and language.upper() in ['PYTHON', 'C', 'CPP', 'HTML',
  280. 'WEB2PY']:
  281. code = Highlighter(language, link, styles).highlight(code)
  282. else:
  283. code = xmlescape(code, quote=False)
  284. lines = code.split('\n')
  285. if counter is None:
  286. linenumbers = [''] * len(lines)
  287. elif isinstance(counter, str):
  288. linenumbers = [xmlescape(counter, quote=False)] * len(lines)
  289. else:
  290. linenumbers = [str(i + counter) + '.' for i in
  291. xrange(len(lines))]
  292. if highlight_line:
  293. if counter and not isinstance(counter, str):
  294. lineno = highlight_line - counter
  295. else:
  296. lineno = highlight_line
  297. if lineno < len(lines):
  298. lines[lineno] = '<span style="%s">%s</span>' % (
  299. linehighlight_style, lines[lineno])
  300. linenumbers[lineno] = '<span style="%s">%s</span>' % (
  301. linehighlight_style, linenumbers[lineno])
  302. if context_lines:
  303. if lineno + context_lines < len(lines):
  304. delslice = slice(lineno + context_lines + 1, len(lines))
  305. del lines[delslice]
  306. del linenumbers[delslice]
  307. if lineno - context_lines > 0:
  308. delslice = slice(0, lineno - context_lines)
  309. del lines[delslice]
  310. del linenumbers[delslice]
  311. code = '<br/>'.join(lines)
  312. numbers = '<br/>'.join(linenumbers)
  313. items = attributes.items()
  314. fa = ' '.join([key[1:].lower() for (key, value) in items if key[:1]
  315. == '_' and value is None] + ['%s="%s"'
  316. % (key[1:].lower(), str(value).replace('"', "'"))
  317. for (key, value) in items if key[:1]
  318. == '_' and value])
  319. if fa:
  320. fa = ' ' + fa
  321. return '<table%s><tr style="vertical-align:top;">' \
  322. '<td style="min-width:40px; text-align: right;"><pre style="%s">%s</pre></td>' \
  323. '<td><pre style="%s">%s</pre></td></tr></table>' % (fa, linenumbers_style, numbers, code_style, code)
  324. if __name__ == '__main__':
  325. import sys
  326. argfp = open(sys.argv[1])
  327. data = argfp.read()
  328. argfp.close()
  329. print('<html><body>' + highlight(data, sys.argv[2]) + '</body></html>')