PageRenderTime 58ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/src/webassets/filter/rjsmin/rjsmin.py

https://github.com/mcfletch/webassets
Python | 354 lines | 314 code | 4 blank | 36 comment | 3 complexity | 8687e46c0c23dd7f31d9fae6c4aa1b5a MD5 | raw file
Possible License(s): BSD-2-Clause
  1. #!/usr/bin/env python
  2. # -*- coding: ascii -*-
  3. #
  4. # Copyright 2011
  5. # Andr\xe9 Malo or his licensors, as applicable
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License");
  8. # you may not use this file except in compliance with the License.
  9. # You may obtain a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS,
  15. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  16. # See the License for the specific language governing permissions and
  17. # limitations under the License.
  18. r"""
  19. =====================
  20. Javascript Minifier
  21. =====================
  22. Javascript Minifier based on `jsmin.c by Douglas Crockford`_\.
  23. This module is a re-implementation based on the semantics of jsmin.c. Usually
  24. it produces the same results. It differs in the following ways:
  25. - there is no error detection: unterminated string, regex and comment
  26. literals are treated as regular javascript code and minified as such.
  27. - Control characters inside string and regex literals are left untouched; they
  28. are not converted to spaces (nor to \n)
  29. - Newline characters are not allowed inside string and regex literals, except
  30. for line continuations in string literals (ECMA-5).
  31. - rjsmin does not handle streams, but only complete strings. (However, the
  32. module provides a "streamy" interface).
  33. Besides the list above it differs from direct python ports of jsmin.c in
  34. speed. Since most parts of the logic are handled by the regex engine it's way
  35. faster than the original python port by Baruch Even. The speed factor varies
  36. between about 6 and 55 depending on input and python version (it gets faster
  37. the more compressed the input already is). Compared to the speed-refactored
  38. python port by Dave St.Germain the performance gain is less dramatic but still
  39. between 1.2 and 7. See the docs/BENCHMARKS file for details.
  40. rjsmin.c is a reimplementation of rjsmin.py in C and speeds it up even more.
  41. Both python 2 and python 3 are supported.
  42. .. _jsmin.c by Douglas Crockford:
  43. http://www.crockford.com/javascript/jsmin.c
  44. """
  45. __author__ = "Andr\xe9 Malo"
  46. __author__ = getattr(__author__, 'decode', lambda x: __author__)('latin-1')
  47. __docformat__ = "restructuredtext en"
  48. __license__ = "Apache License, Version 2.0"
  49. __version__ = '1.0.1'
  50. __all__ = ['jsmin', 'jsmin_for_posers']
  51. import re as _re
  52. from webassets.six.moves import map
  53. from webassets.six.moves import zip
  54. def _make_jsmin(extended=True, python_only=False):
  55. """
  56. Generate JS minifier based on `jsmin.c by Douglas Crockford`_
  57. .. _jsmin.c by Douglas Crockford:
  58. http://www.crockford.com/javascript/jsmin.c
  59. :Parameters:
  60. `extended` : ``bool``
  61. Extended Regexps? (using lookahead and lookbehind). This is faster,
  62. because it can be optimized way more. The regexps used with `extended`
  63. being false are only left here to allow easier porting to platforms
  64. without extended regex features (and for my own reference...)
  65. `python_only` : ``bool``
  66. Use only the python variant. If true, the c extension is not even
  67. tried to be loaded.
  68. :Return: Minifier
  69. :Rtype: ``callable``
  70. """
  71. # pylint: disable = R0912, R0914, W0612
  72. if not python_only:
  73. try:
  74. import _rjsmin
  75. except ImportError:
  76. pass
  77. else:
  78. return _rjsmin.jsmin
  79. try:
  80. xrange
  81. except NameError:
  82. xrange = range # pylint: disable = W0622
  83. space_chars = r'[\000-\011\013\014\016-\040]'
  84. line_comment = r'(?://[^\r\n]*)'
  85. space_comment = r'(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/)'
  86. string1 = \
  87. r'(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^\047\\\r\n]*)*\047)'
  88. string2 = r'(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|\r)[^"\\\r\n]*)*")'
  89. strings = r'(?:%s|%s)' % (string1, string2)
  90. charclass = r'(?:\[[^\\\]\r\n]*(?:\\[^\r\n][^\\\]\r\n]*)*\])'
  91. nospecial = r'[^/\\\[\r\n]'
  92. if extended:
  93. regex = r'(?:/(?![\r\n/*])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)' % (
  94. nospecial, charclass, nospecial
  95. )
  96. else:
  97. regex = (
  98. r'(?:/(?:[^*/\\\r\n\[]|%s|\\[^\r\n])%s*(?:(?:\\[^\r\n]|%s)%s*)*/)'
  99. )
  100. regex = regex % (charclass, nospecial, charclass, nospecial)
  101. pre_regex = r'[(,=:\[!&|?{};\r\n]'
  102. space = r'(?:%s|%s)' % (space_chars, space_comment)
  103. newline = r'(?:%s?[\r\n])' % line_comment
  104. def fix_charclass(result):
  105. """ Fixup string of chars to fit into a regex char class """
  106. pos = result.find('-')
  107. if pos >= 0:
  108. result = r'%s%s-' % (result[:pos], result[pos + 1:])
  109. def sequentize(string):
  110. """
  111. Notate consecutive characters as sequence
  112. (1-4 instead of 1234)
  113. """
  114. first, last, result = None, None, []
  115. for char in map(ord, string):
  116. if last is None:
  117. first = last = char
  118. elif last + 1 == char:
  119. last = char
  120. else:
  121. result.append((first, last))
  122. first = last = char
  123. if last is not None:
  124. result.append((first, last))
  125. return ''.join(['%s%s%s' % (
  126. chr(first),
  127. last > first + 1 and '-' or '',
  128. last != first and chr(last) or ''
  129. ) for first, last in result])
  130. return _re.sub(r'([\000-\040\047])', # for better portability
  131. lambda m: '\\%03o' % ord(m.group(1)), (sequentize(result)
  132. .replace('\\', '\\\\')
  133. .replace('[', '\\[')
  134. .replace(']', '\\]')
  135. )
  136. )
  137. def id_literal_(what):
  138. """ Make id_literal like char class """
  139. match = _re.compile(what).match
  140. result = ''.join([
  141. chr(c) for c in range(127) if not match(chr(c))
  142. ])
  143. return '[^%s]' % fix_charclass(result)
  144. def not_id_literal_(keep):
  145. """ Make negated id_literal like char class """
  146. match = _re.compile(id_literal_(keep)).match
  147. result = ''.join([
  148. chr(c) for c in range(127) if not match(chr(c))
  149. ])
  150. return r'[%s]' % fix_charclass(result)
  151. if extended:
  152. id_literal = id_literal_(r'[a-zA-Z0-9_$]')
  153. id_literal_open = id_literal_(r'[a-zA-Z0-9_${\[(+-]')
  154. id_literal_close = id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
  155. space_sub = _re.compile((
  156. r'([^\047"/\000-\040]+)'
  157. r'|(%(strings)s[^\047"/\000-\040]*)'
  158. r'|(?:(?<=%(pre_regex)s)%(space)s*(%(regex)s[^\047"/\000-\040]*))'
  159. r'|(?<=%(id_literal_close)s)'
  160. r'%(space)s*(?:(%(newline)s)%(space)s*)+'
  161. r'(?=%(id_literal_open)s)'
  162. r'|(?<=%(id_literal)s)(%(space)s)+(?=%(id_literal)s)'
  163. r'|%(space)s+'
  164. r'|(?:%(newline)s%(space)s*)+'
  165. ) % locals()).sub
  166. def space_subber(match):
  167. """ Substitution callback """
  168. # pylint: disable = C0321
  169. groups = match.groups()
  170. if groups[0]: return groups[0]
  171. elif groups[1]: return groups[1]
  172. elif groups[2]: return groups[2]
  173. elif groups[3]: return '\n'
  174. elif groups[4]: return ' '
  175. return ''
  176. def jsmin(script): # pylint: disable = W0621
  177. r"""
  178. Minify javascript based on `jsmin.c by Douglas Crockford`_\.
  179. Instead of parsing the stream char by char, it uses a regular
  180. expression approach which minifies the whole script with one big
  181. substitution regex.
  182. .. _jsmin.c by Douglas Crockford:
  183. http://www.crockford.com/javascript/jsmin.c
  184. :Parameters:
  185. `script` : ``str``
  186. Script to minify
  187. :Return: Minified script
  188. :Rtype: ``str``
  189. """
  190. return space_sub(space_subber, '\n%s\n' % script).strip()
  191. else:
  192. not_id_literal = not_id_literal_(r'[a-zA-Z0-9_$]')
  193. not_id_literal_open = not_id_literal_(r'[a-zA-Z0-9_${\[(+-]')
  194. not_id_literal_close = not_id_literal_(r'[a-zA-Z0-9_$}\])"\047+-]')
  195. space_norm_sub = _re.compile((
  196. r'(%(strings)s)'
  197. r'|(?:(%(pre_regex)s)%(space)s*(%(regex)s))'
  198. r'|(%(space)s)+'
  199. r'|(?:(%(newline)s)%(space)s*)+'
  200. ) % locals()).sub
  201. def space_norm_subber(match):
  202. """ Substitution callback """
  203. # pylint: disable = C0321
  204. groups = match.groups()
  205. if groups[0]: return groups[0]
  206. elif groups[1]: return groups[1].replace('\r', '\n') + groups[2]
  207. elif groups[3]: return ' '
  208. elif groups[4]: return '\n'
  209. space_sub1 = _re.compile((
  210. r'[\040\n]?(%(strings)s|%(pre_regex)s%(regex)s)'
  211. r'|\040(%(not_id_literal)s)'
  212. r'|\n(%(not_id_literal_open)s)'
  213. ) % locals()).sub
  214. def space_subber1(match):
  215. """ Substitution callback """
  216. groups = match.groups()
  217. return groups[0] or groups[1] or groups[2]
  218. space_sub2 = _re.compile((
  219. r'(%(strings)s)\040?'
  220. r'|(%(pre_regex)s%(regex)s)[\040\n]?'
  221. r'|(%(not_id_literal)s)\040'
  222. r'|(%(not_id_literal_close)s)\n'
  223. ) % locals()).sub
  224. def space_subber2(match):
  225. """ Substitution callback """
  226. groups = match.groups()
  227. return groups[0] or groups[1] or groups[2] or groups[3]
  228. def jsmin(script):
  229. r"""
  230. Minify javascript based on `jsmin.c by Douglas Crockford`_\.
  231. Instead of parsing the stream char by char, it uses a regular
  232. expression approach. The script is minified with three passes:
  233. normalization
  234. Control character are mapped to spaces, spaces and newlines
  235. are squeezed and comments are stripped.
  236. space removal 1
  237. Spaces before certain tokens are removed
  238. space removal 2
  239. Spaces after certain tokens are remove
  240. .. _jsmin.c by Douglas Crockford:
  241. http://www.crockford.com/javascript/jsmin.c
  242. :Parameters:
  243. `script` : ``str``
  244. Script to minify
  245. :Return: Minified script
  246. :Rtype: ``str``
  247. """
  248. return space_sub2(space_subber2,
  249. space_sub1(space_subber1,
  250. space_norm_sub(space_norm_subber, '\n%s\n' % script)
  251. )
  252. ).strip()
  253. return jsmin
  254. jsmin = _make_jsmin()
  255. def jsmin_for_posers(script):
  256. r"""
  257. Minify javascript based on `jsmin.c by Douglas Crockford`_\.
  258. Instead of parsing the stream char by char, it uses a regular
  259. expression approach which minifies the whole script with one big
  260. substitution regex.
  261. .. _jsmin.c by Douglas Crockford:
  262. http://www.crockford.com/javascript/jsmin.c
  263. :Warning: This function is the digest of a _make_jsmin() call. It just
  264. utilizes the resulting regex. It's just for fun here and may
  265. vanish any time. Use the `jsmin` function instead.
  266. :Parameters:
  267. `script` : ``str``
  268. Script to minify
  269. :Return: Minified script
  270. :Rtype: ``str``
  271. """
  272. def subber(match):
  273. """ Substitution callback """
  274. groups = match.groups()
  275. return (
  276. groups[0] or
  277. groups[1] or
  278. groups[2] or
  279. (groups[3] and '\n') or
  280. (groups[4] and ' ') or
  281. ''
  282. )
  283. return _re.sub(
  284. r'([^\047"/\000-\040]+)|((?:(?:\047[^\047\\\r\n]*(?:\\(?:[^\r\n]|\r?'
  285. r'\n|\r)[^\047\\\r\n]*)*\047)|(?:"[^"\\\r\n]*(?:\\(?:[^\r\n]|\r?\n|'
  286. r'\r)[^"\\\r\n]*)*"))[^\047"/\000-\040]*)|(?:(?<=[(,=:\[!&|?{};\r\n]'
  287. r')(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/'
  288. r'))*((?:/(?![\r\n/*])[^/\\\[\r\n]*(?:(?:\\[^\r\n]|(?:\[[^\\\]\r\n]*'
  289. r'(?:\\[^\r\n][^\\\]\r\n]*)*\]))[^/\\\[\r\n]*)*/)[^\047"/\000-\040]*'
  290. r'))|(?<=[^\000-!#%&(*,./:-@\[\\^`{|~])(?:[\000-\011\013\014\016-\04'
  291. r'0]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))*(?:((?:(?://[^\r\n]*)?[\r\n'
  292. r']))(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)'
  293. r'*/))*)+(?=[^\000-#%-\047)*,./:-@\\-^`|-~])|(?<=[^\000-#%-,./:-@\[-'
  294. r'^`{-~-])((?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*'
  295. r']*\*+)*/)))+(?=[^\000-#%-,./:-@\[-^`{-~-])|(?:[\000-\011\013\014\0'
  296. r'16-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]*\*+)*/))+|(?:(?:(?://[^\r\n]*)'
  297. r'?[\r\n])(?:[\000-\011\013\014\016-\040]|(?:/\*[^*]*\*+(?:[^/*][^*]'
  298. r'*\*+)*/))*)+', subber, '\n%s\n' % script
  299. ).strip()
  300. if __name__ == '__main__':
  301. import sys as _sys
  302. _sys.stdout.write(jsmin(_sys.stdin.read()))