/src/webassets/filter/jspacker/jspacker.py
Python | 571 lines | 552 code | 7 blank | 12 comment | 5 complexity | 9e3705bdb175effdb8ceef8ae924b999 MD5 | raw file
Possible License(s): BSD-2-Clause
- ## ParseMaster, version 1.0 (pre-release) (2005/05/12) x6
- ## Copyright 2005, Dean Edwards
- ## Web: http://dean.edwards.name/
- ##
- ## This software is licensed under the CC-GNU LGPL
- ## Web: http://creativecommons.org/licenses/LGPL/2.1/
- ##
- ## Ported to Python by Florian Schulze
- import os, re
- # a multi-pattern parser
- class Pattern:
- def __init__(self, expression, replacement, length):
- self.expression = expression
- self.replacement = replacement
- self.length = length
- def __str__(self):
- return "(" + self.expression + ")"
- class Patterns(list):
- def __str__(self):
- return '|'.join([str(e) for e in self])
- class ParseMaster:
- # constants
- EXPRESSION = 0
- REPLACEMENT = 1
- LENGTH = 2
- GROUPS = re.compile(r"""\(""", re.M)#g
- SUB_REPLACE = re.compile(r"""\$\d""", re.M)
- INDEXED = re.compile(r"""^\$\d+$""", re.M)
- TRIM = re.compile(r"""(['"])\1\+(.*)\+\1\1$""", re.M)
- ESCAPE = re.compile(r"""\\.""", re.M)#g
- #QUOTE = re.compile(r"""'""", re.M)
- DELETED = re.compile("""\x01[^\x01]*\x01""", re.M)#g
- def __init__(self):
- # private
- self._patterns = Patterns() # patterns stored by index
- self._escaped = []
- self.ignoreCase = False
- self.escapeChar = None
- def DELETE(self, match, offset):
- return "\x01" + match.group(offset) + "\x01"
- def _repl(self, a, o, r, i):
- while (i):
- m = a.group(o+i-1)
- if m is None:
- s = ""
- else:
- s = m
- r = r.replace("$" + str(i), s)
- i = i - 1
- r = ParseMaster.TRIM.sub("$1", r)
- return r
- # public
- def add(self, expression="^$", replacement=None):
- if replacement is None:
- replacement = self.DELETE
- # count the number of sub-expressions
- # - add one because each pattern is itself a sub-expression
- length = len(ParseMaster.GROUPS.findall(self._internalEscape(str(expression)))) + 1
- # does the pattern deal with sub-expressions?
- if (isinstance(replacement, str) and ParseMaster.SUB_REPLACE.match(replacement)):
- # a simple lookup? (e.g. "$2")
- if (ParseMaster.INDEXED.match(replacement)):
- # store the index (used for fast retrieval of matched strings)
- replacement = int(replacement[1:]) - 1
- else: # a complicated lookup (e.g. "Hello $2 $1")
- # build a function to do the lookup
- i = length
- r = replacement
- replacement = lambda a,o: self._repl(a,o,r,i)
- # pass the modified arguments
- self._patterns.append(Pattern(expression, replacement, length))
- # execute the global replacement
- def execute(self, string):
- if self.ignoreCase:
- r = re.compile(str(self._patterns), re.I | re.M)
- else:
- r = re.compile(str(self._patterns), re.M)
- string = self._escape(string, self.escapeChar)
- string = r.sub(self._replacement, string)
- string = self._unescape(string, self.escapeChar)
- string = ParseMaster.DELETED.sub("", string)
- return string
- # clear the patterns collections so that this object may be re-used
- def reset(self):
- self._patterns = Patterns()
- # this is the global replace function (it's quite complicated)
- def _replacement(self, match):
- i = 1
- # loop through the patterns
- for pattern in self._patterns:
- if match.group(i) is not None:
- replacement = pattern.replacement
- if callable(replacement):
- return replacement(match, i)
- elif isinstance(replacement, (int, long)):
- return match.group(replacement+i)
- else:
- return replacement
- else:
- i = i+pattern.length
- # encode escaped characters
- def _escape(self, string, escapeChar=None):
- def repl(match):
- char = match.group(1)
- self._escaped.append(char)
- return escapeChar
- if escapeChar is None:
- return string
- r = re.compile("\\"+escapeChar+"(.)", re.M)
- result = r.sub(repl, string)
- return result
- # decode escaped characters
- def _unescape(self, string, escapeChar=None):
- def repl(match):
- try:
- #result = eval("'"+escapeChar + self._escaped.pop(0)+"'")
- result = escapeChar + self._escaped.pop(0)
- return result
- except IndexError:
- return escapeChar
- if escapeChar is None:
- return string
- r = re.compile("\\"+escapeChar, re.M)
- result = r.sub(repl, string)
- return result
- def _internalEscape(self, string):
- return ParseMaster.ESCAPE.sub("", string)
- ## packer, version 2.0 (2005/04/20)
- ## Copyright 2004-2005, Dean Edwards
- ## License: http://creativecommons.org/licenses/LGPL/2.1/
- ## Ported to Python by Florian Schulze
- ## http://dean.edwards.name/packer/
- class JavaScriptPacker:
- def __init__(self):
- self._basicCompressionParseMaster = self.getCompressionParseMaster(False)
- self._specialCompressionParseMaster = self.getCompressionParseMaster(True)
- def basicCompression(self, script):
- return self._basicCompressionParseMaster.execute(script)
- def specialCompression(self, script):
- return self._specialCompressionParseMaster.execute(script)
- def getCompressionParseMaster(self, specialChars):
- IGNORE = "$1"
- parser = ParseMaster()
- parser.escapeChar = '\\'
- # protect strings
- parser.add(r"""'[^']*?'""", IGNORE)
- parser.add(r'"[^"]*?"', IGNORE)
- # remove comments
- parser.add(r"""//[^\n\r]*?[\n\r]""")
- parser.add(r"""/\*[^*]*?\*+([^/][^*]*?\*+)*?/""")
- # protect regular expressions
- parser.add(r"""\s+(\/[^\/\n\r\*][^\/\n\r]*\/g?i?)""", "$2")
- parser.add(r"""[^\w\$\/'"*)\?:]\/[^\/\n\r\*][^\/\n\r]*\/g?i?""", IGNORE)
- # remove: ;;; doSomething();
- if specialChars:
- parser.add(""";;;[^\n\r]+[\n\r]""")
- # remove redundant semi-colons
- parser.add(r""";+\s*([};])""", "$2")
- # remove white-space
- parser.add(r"""(\b|\$)\s+(\b|\$)""", "$2 $3")
- parser.add(r"""([+\-])\s+([+\-])""", "$2 $3")
- parser.add(r"""\s+""", "")
- return parser
- def getEncoder(self, ascii):
- mapping = {}
- base = ord('0')
- mapping.update(dict([(i, chr(i+base)) for i in range(10)]))
- base = ord('a')
- mapping.update(dict([(i+10, chr(i+base)) for i in range(26)]))
- base = ord('A')
- mapping.update(dict([(i+36, chr(i+base)) for i in range(26)]))
- base = 161
- mapping.update(dict([(i+62, chr(i+base)) for i in range(95)]))
- # zero encoding
- # characters: 0123456789
- def encode10(charCode):
- return str(charCode)
- # inherent base36 support
- # characters: 0123456789abcdefghijklmnopqrstuvwxyz
- def encode36(charCode):
- l = []
- remainder = charCode
- while 1:
- result, remainder = divmod(remainder, 36)
- l.append(mapping[remainder])
- if not result:
- break
- remainder = result
- l.reverse()
- return "".join(l)
- # hitch a ride on base36 and add the upper case alpha characters
- # characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
- def encode62(charCode):
- l = []
- remainder = charCode
- while 1:
- result, remainder = divmod(remainder, 62)
- l.append(mapping[remainder])
- if not result:
- break
- remainder = result
- l.reverse()
- return "".join(l)
- # use high-ascii values
- def encode95(charCode):
- l = []
- remainder = charCode
- while 1:
- result, remainder = divmod(remainder, 95)
- l.append(mapping[remainder+62])
- if not result:
- break
- remainder = result
- l.reverse()
- return "".join(l)
- if ascii <= 10:
- return encode10
- elif ascii <= 36:
- return encode36
- elif ascii <= 62:
- return encode62
- return encode95
- def escape(self, script):
- script = script.replace("\\","\\\\")
- script = script.replace("'","\\'")
- script = script.replace('\n','\\n')
- #return re.sub(r"""([\\'](?!\n))""", "\\$1", script)
- return script
- def escape95(self, script):
- result = []
- for x in script:
- if x>'\xa1':
- x = "\\x%0x" % ord(x)
- result.append(x)
- return "".join(result)
- def encodeKeywords(self, script, encoding, fastDecode):
- # escape high-ascii values already in the script (i.e. in strings)
- if (encoding > 62):
- script = self.escape95(script)
- # create the parser
- parser = ParseMaster()
- encode = self.getEncoder(encoding)
- # for high-ascii, don't encode single character low-ascii
- if encoding > 62:
- regexp = r"""\w\w+"""
- else:
- regexp = r"""\w+"""
- # build the word list
- keywords = self.analyze(script, regexp, encode)
- encoded = keywords['encoded']
- # encode
- def repl(match, offset):
- return encoded.get(match.group(offset), "")
- parser.add(regexp, repl)
- # if encoded, wrap the script in a decoding function
- script = parser.execute(script)
- script = self.bootStrap(script, keywords, encoding, fastDecode)
- return script
- def analyze(self, script, regexp, encode):
- # analyse
- # retreive all words in the script
- regexp = re.compile(regexp, re.M)
- all = regexp.findall(script)
- sorted = [] # list of words sorted by frequency
- encoded = {} # dictionary of word->encoding
- protected = {} # instances of "protected" words
- if all:
- unsorted = []
- _protected = {}
- values = {}
- count = {}
- all.reverse()
- for word in all:
- word = "$"+word
- if word not in count:
- count[word] = 0
- j = len(unsorted)
- unsorted.append(word)
- # make a dictionary of all of the protected words in this script
- # these are words that might be mistaken for encoding
- values[j] = encode(j)
- _protected["$"+values[j]] = j
- count[word] = count[word] + 1
- # prepare to sort the word list, first we must protect
- # words that are also used as codes. we assign them a code
- # equivalent to the word itself.
- # e.g. if "do" falls within our encoding range
- # then we store keywords["do"] = "do";
- # this avoids problems when decoding
- sorted = [None] * len(unsorted)
- for word in unsorted:
- if word in _protected and isinstance(_protected[word], int):
- sorted[_protected[word]] = word[1:]
- protected[_protected[word]] = True
- count[word] = 0
- unsorted.sort(lambda a,b: count[b]-count[a])
- j = 0
- for i in range(len(sorted)):
- if sorted[i] is None:
- sorted[i] = unsorted[j][1:]
- j = j + 1
- encoded[sorted[i]] = values[i]
- return {'sorted': sorted, 'encoded': encoded, 'protected': protected}
- def encodePrivate(self, charCode):
- return "_"+str(charCode)
- def encodeSpecialChars(self, script):
- parser = ParseMaster()
- # replace: $name -> n, $$name -> $$na
- def repl(match, offset):
- #print offset, match.groups()
- length = len(match.group(offset + 2))
- start = length - max(length - len(match.group(offset + 3)), 0)
- return match.group(offset + 1)[start:start+length] + match.group(offset + 4)
- parser.add(r"""((\$+)([a-zA-Z\$_]+))(\d*)""", repl)
- # replace: _name -> _0, double-underscore (__name) is ignored
- regexp = r"""\b_[A-Za-z\d]\w*"""
- # build the word list
- keywords = self.analyze(script, regexp, self.encodePrivate)
- # quick ref
- encoded = keywords['encoded']
- def repl(match, offset):
- return encoded.get(match.group(offset), "")
- parser.add(regexp, repl)
- return parser.execute(script)
- # build the boot function used for loading and decoding
- def bootStrap(self, packed, keywords, encoding, fastDecode):
- ENCODE = re.compile(r"""\$encode\(\$count\)""")
- # $packed: the packed script
- #packed = self.escape(packed)
- #packed = [packed[x*10000:(x+1)*10000] for x in range((len(packed)/10000)+1)]
- #packed = "'" + "'+\n'".join(packed) + "'\n"
- packed = "'" + self.escape(packed) + "'"
- # $count: number of words contained in the script
- count = len(keywords['sorted'])
- # $ascii: base for encoding
- ascii = min(count, encoding) or 1
- # $keywords: list of words contained in the script
- for i in keywords['protected']:
- keywords['sorted'][i] = ""
- # convert from a string to an array
- keywords = "'" + "|".join(keywords['sorted']) + "'.split('|')"
- encoding_functions = {
- 10: """ function($charCode) {
- return $charCode;
- }""",
- 36: """ function($charCode) {
- return $charCode.toString(36);
- }""",
- 62: """ function($charCode) {
- return ($charCode < _encoding ? "" : arguments.callee(parseInt($charCode / _encoding))) +
- (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
- }""",
- 95: """ function($charCode) {
- return ($charCode < _encoding ? "" : arguments.callee($charCode / _encoding)) +
- String.fromCharCode($charCode % _encoding + 161);
- }"""
- }
- # $encode: encoding function (used for decoding the script)
- encode = encoding_functions[encoding]
- encode = encode.replace('_encoding',"$ascii")
- encode = encode.replace('arguments.callee', "$encode")
- if ascii > 10:
- inline = "$count.toString($ascii)"
- else:
- inline = "$count"
- # $decode: code snippet to speed up decoding
- if fastDecode:
- # create the decoder
- decode = r"""// does the browser support String.replace where the
- // replacement value is a function?
- if (!''.replace(/^/, String)) {
- // decode all the values we need
- while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
- // global replacement function
- $keywords = [function($encoded){return $decode[$encoded]}];
- // generic match
- $encode = function(){return'\\w+'};
- // reset the loop counter - we are now doing a global replace
- $count = 1;
- }"""
- if encoding > 62:
- decode = decode.replace('\\\\w', "[\\xa1-\\xff]")
- else:
- # perform the encoding inline for lower ascii values
- if ascii < 36:
- decode = ENCODE.sub(inline, decode)
- # special case: when $count==0 there ar no keywords. i want to keep
- # the basic shape of the unpacking funcion so i'll frig the code...
- if not count:
- raise NotImplemented
- #) $decode = $decode.replace(/(\$count)\s*=\s*1/, "$1=0");
- # boot function
- unpack = r"""function($packed, $ascii, $count, $keywords, $encode, $decode) {
- while ($count--)
- if ($keywords[$count])
- $packed = $packed.replace(new RegExp("\\b" + $encode($count) + "\\b", "g"), $keywords[$count]);
- return $packed;
- }"""
- if fastDecode:
- # insert the decoder
- #unpack = re.sub(r"""\{""", "{" + decode + ";", unpack)
- unpack = unpack.replace('{', "{" + decode + ";", 1)
- if encoding > 62: # high-ascii
- # get rid of the word-boundaries for regexp matches
- unpack = re.sub(r"""'\\\\b'\s*\+|\+\s*'\\\\b'""", "", unpack)
- if ascii > 36 or encoding > 62 or fastDecode:
- # insert the encode function
- #unpack = re.sub(r"""\{""", "{$encode=" + encode + ";", unpack)
- unpack = unpack.replace('{', "{$encode=" + encode + ";", 1)
- else:
- # perform the encoding inline
- unpack = ENCODE.sub(inline, unpack)
- # pack the boot function too
- unpack = self.pack(unpack, 0, False, True)
- # arguments
- params = [packed, str(ascii), str(count), keywords]
- if fastDecode:
- # insert placeholders for the decoder
- params.extend(['0', "{}"])
- # the whole thing
- return "eval(" + unpack + "(" + ",".join(params) + "))\n";
- def pack(self, script, encoding=0, fastDecode=False, specialChars=False, compaction=True):
- script = script+"\n"
- self._encoding = encoding
- self._fastDecode = fastDecode
- if specialChars:
- script = self.specialCompression(script)
- script = self.encodeSpecialChars(script)
- else:
- if compaction:
- script = self.basicCompression(script)
- if encoding:
- script = self.encodeKeywords(script, encoding, fastDecode)
- return script
- def run():
- p = JavaScriptPacker()
- script = open('test_plone.js').read()
- result = p.pack(script, compaction=False, encoding=62, fastDecode=True)
- open('output.js','w').write(result)
- def run1():
- test_scripts = []
- test_scripts.append(("""// -----------------------------------------------------------------------
- // public interface
- // -----------------------------------------------------------------------
- cssQuery.toString = function() {
- return "function cssQuery() {\n [version " + version + "]\n}";
- };""", 0, False, False, """cssQuery.toString=function(){return"function cssQuery() {\n [version "+version+"]\n}"};"""))
- test_scripts.append(("""function test(_localvar) {
- var $name = 'foo';
- var $$dummy = 2;
- return $name + $$dummy;
- }""", 0, False, True, """function test(_0){var n='foo';var du=2;return n+du}"""))
- test_scripts.append(("""function _test($localvar) {
- var $name = 1;
- var _dummy = 2;
- var __foo = 3;
- return $name + _dummy + $localvar + __foo;
- }""", 0, False, True, """function _1(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}"""))
- test_scripts.append(("""function _test($localvar) {
- var $name = 1;
- var _dummy = 2;
- var __foo = 3;
- return $name + _dummy + $localvar + __foo;
- }
- function _bar(_ocalvar) {
- var $name = 1;
- var _dummy = 2;
- var __foo = 3;
- return $name + _dummy + $localvar + __foo;
- }""", 0, False, True, """function _3(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}function _2(_1){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}"""))
- test_scripts.append(("cssQuery1.js", 0, False, False, "cssQuery1-p1.js"))
- test_scripts.append(("cssQuery.js", 0, False, False, "cssQuery-p1.js"))
- test_scripts.append(("pack.js", 0, False, False, "pack-p1.js"))
- test_scripts.append(("cssQuery.js", 0, False, True, "cssQuery-p2.js"))
- # the following ones are different, because javascript might use an
- # unstable sort algorithm while python uses an stable sort algorithm
- test_scripts.append(("pack.js", 0, False, True, "pack-p2.js"))
- test_scripts.append(("test.js", 0, False, True, """function _4(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}function _3(_1){var n=1;var _2=2;var __foo=3;return n+_2+l+__foo}"""))
- test_scripts.append(("test.js", 10, False, False, """eval(function(p,a,c,k,e,d){while(c--){if(k[c]){p=p.replace(new RegExp("\\b"+e(c)+"\\b","g"),k[c])}}return p}('8 13($6){0 $4=1;0 7=2;0 5=3;9 $4+7+$6+5}8 11(12){0 $4=1;0 10=2;0 5=3;9 $4+10+$6+5}',10,14,'var||||name|__foo|localvar|_dummy|function|return|_2|_bar|_ocalvar|_test'.split('|')))
- """))
- test_scripts.append(("test.js", 62, False, False, """eval(function(p,a,c,k,e,d){while(c--){if(k[c]){p=p.replace(new RegExp("\\b"+e(c)+"\\b","g"),k[c])}}return p}('8 d($6){0 $4=1;0 7=2;0 5=3;9 $4+7+$6+5}8 b(c){0 $4=1;0 a=2;0 5=3;9 $4+a+$6+5}',14,14,'var||||name|__foo|localvar|_dummy|function|return|_2|_bar|_ocalvar|_test'.split('|')))
- """))
- test_scripts.append(("test.js", 95, False, False, "test-p4.js"))
- test_scripts.append(("cssQuery.js", 0, False, True, "cssQuery-p3.js"))
- test_scripts.append(("cssQuery.js", 62, False, True, "cssQuery-p4.js"))
- import difflib
- p = JavaScriptPacker()
- for script, encoding, fastDecode, specialChars, expected in test_scripts:
- if os.path.exists(script):
- _script = open(script).read()
- else:
- _script = script
- if os.path.exists(expected):
- _expected = open(expected).read()
- else:
- _expected = expected
- print script[:20], encoding, fastDecode, specialChars, expected[:20]
- print "="*40
- result = p.pack(_script, encoding, fastDecode, specialChars)
- print len(result), len(_script)
- if (result != _expected):
- print "ERROR!!!!!!!!!!!!!!!!"
- print _expected
- print result
- #print list(difflib.unified_diff(result, _expected))
- if __name__=='__main__':
- run()