PageRenderTime 59ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/petri/common/lib/jspacker.py

https://github.com/EricSchles/hackerunion.org
Python | 572 lines | 553 code | 7 blank | 12 comment | 5 complexity | b23f73091e9375df2bc77a40dc8708d8 MD5 | raw file
Possible License(s): GPL-3.0
  1. ## ParseMaster, version 1.0 (pre-release) (2005/05/12) x6
  2. ## Copyright 2005, Dean Edwards
  3. ## Web: http://dean.edwards.name/
  4. ##
  5. ## This software is licensed under the CC-GNU LGPL
  6. ## Web: http://creativecommons.org/licenses/LGPL/2.1/
  7. ##
  8. ## Ported to Python by Florian Schulze
  9. import os, re
  10. # a multi-pattern parser
  11. class Pattern:
  12. def __init__(self, expression, replacement, length):
  13. self.expression = expression
  14. self.replacement = replacement
  15. self.length = length
  16. def __str__(self):
  17. return "(" + self.expression + ")"
  18. class Patterns(list):
  19. def __str__(self):
  20. return '|'.join([str(e) for e in self])
  21. class ParseMaster:
  22. # constants
  23. EXPRESSION = 0
  24. REPLACEMENT = 1
  25. LENGTH = 2
  26. GROUPS = re.compile(r"""\(""", re.M)#g
  27. SUB_REPLACE = re.compile(r"""\$\d""", re.M)
  28. INDEXED = re.compile(r"""^\$\d+$""", re.M)
  29. TRIM = re.compile(r"""(['"])\1\+(.*)\+\1\1$""", re.M)
  30. ESCAPE = re.compile(r"""\\.""", re.M)#g
  31. #QUOTE = re.compile(r"""'""", re.M)
  32. DELETED = re.compile("""\x01[^\x01]*\x01""", re.M)#g
  33. def __init__(self):
  34. # private
  35. self._patterns = Patterns() # patterns stored by index
  36. self._escaped = []
  37. self.ignoreCase = False
  38. self.escapeChar = None
  39. def DELETE(self, match, offset):
  40. return "\x01" + match.group(offset) + "\x01"
  41. def _repl(self, a, o, r, i):
  42. while (i):
  43. m = a.group(o+i-1)
  44. if m is None:
  45. s = ""
  46. else:
  47. s = m
  48. r = r.replace("$" + str(i), s)
  49. i = i - 1
  50. r = ParseMaster.TRIM.sub("$1", r)
  51. return r
  52. # public
  53. def add(self, expression="^$", replacement=None):
  54. if replacement is None:
  55. replacement = self.DELETE
  56. # count the number of sub-expressions
  57. # - add one because each pattern is itself a sub-expression
  58. length = len(ParseMaster.GROUPS.findall(self._internalEscape(str(expression)))) + 1
  59. # does the pattern deal with sub-expressions?
  60. if (isinstance(replacement, str) and ParseMaster.SUB_REPLACE.match(replacement)):
  61. # a simple lookup? (e.g. "$2")
  62. if (ParseMaster.INDEXED.match(replacement)):
  63. # store the index (used for fast retrieval of matched strings)
  64. replacement = int(replacement[1:]) - 1
  65. else: # a complicated lookup (e.g. "Hello $2 $1")
  66. # build a function to do the lookup
  67. i = length
  68. r = replacement
  69. replacement = lambda a,o: self._repl(a,o,r,i)
  70. # pass the modified arguments
  71. self._patterns.append(Pattern(expression, replacement, length))
  72. # execute the global replacement
  73. def execute(self, string):
  74. if self.ignoreCase:
  75. r = re.compile(str(self._patterns), re.I | re.M)
  76. else:
  77. r = re.compile(str(self._patterns), re.M)
  78. string = self._escape(string, self.escapeChar)
  79. string = r.sub(self._replacement, string)
  80. string = self._unescape(string, self.escapeChar)
  81. string = ParseMaster.DELETED.sub("", string)
  82. return string
  83. # clear the patterns collections so that this object may be re-used
  84. def reset(self):
  85. self._patterns = Patterns()
  86. # this is the global replace function (it's quite complicated)
  87. def _replacement(self, match):
  88. i = 1
  89. # loop thpetri the patterns
  90. for pattern in self._patterns:
  91. if match.group(i) is not None:
  92. replacement = pattern.replacement
  93. if callable(replacement):
  94. return replacement(match, i)
  95. elif isinstance(replacement, (int, long)):
  96. return match.group(replacement+i)
  97. else:
  98. return replacement
  99. else:
  100. i = i+pattern.length
  101. # encode escaped characters
  102. def _escape(self, string, escapeChar=None):
  103. def repl(match):
  104. char = match.group(1)
  105. self._escaped.append(char)
  106. return escapeChar
  107. if escapeChar is None:
  108. return string
  109. r = re.compile("\\"+escapeChar+"(.)", re.M)
  110. result = r.sub(repl, string)
  111. return result
  112. # decode escaped characters
  113. def _unescape(self, string, escapeChar=None):
  114. def repl(match):
  115. try:
  116. #result = eval("'"+escapeChar + self._escaped.pop(0)+"'")
  117. result = escapeChar + self._escaped.pop(0)
  118. return result
  119. except IndexError:
  120. return escapeChar
  121. if escapeChar is None:
  122. return string
  123. r = re.compile("\\"+escapeChar, re.M)
  124. result = r.sub(repl, string)
  125. return result
  126. def _internalEscape(self, string):
  127. return ParseMaster.ESCAPE.sub("", string)
  128. ## packer, version 2.0 (2005/04/20)
  129. ## Copyright 2004-2005, Dean Edwards
  130. ## License: http://creativecommons.org/licenses/LGPL/2.1/
  131. ## Ported to Python by Florian Schulze
  132. ## http://dean.edwards.name/packer/
  133. class JavaScriptPacker:
  134. def __init__(self):
  135. self._basicCompressionParseMaster = self.getCompressionParseMaster(False)
  136. self._specialCompressionParseMaster = self.getCompressionParseMaster(True)
  137. def basicCompression(self, script):
  138. return self._basicCompressionParseMaster.execute(script)
  139. def specialCompression(self, script):
  140. return self._specialCompressionParseMaster.execute(script)
  141. def getCompressionParseMaster(self, specialChars):
  142. IGNORE = "$1"
  143. parser = ParseMaster()
  144. parser.escapeChar = '\\'
  145. # protect strings
  146. parser.add(r"""'[^']*?'""", IGNORE)
  147. parser.add(r'"[^"]*?"', IGNORE)
  148. # remove comments
  149. parser.add(r"""//[^\n\r]*?[\n\r]""")
  150. parser.add(r"""/\*[^*]*?\*+([^/][^*]*?\*+)*?/""")
  151. # protect regular expressions
  152. parser.add(r"""\s+(\/[^\/\n\r\*][^\/\n\r]*\/g?i?)""", "$2")
  153. parser.add(r"""[^\w\$\/'"*)\?:]\/[^\/\n\r\*][^\/\n\r]*\/g?i?""", IGNORE)
  154. # remove: ;;; doSomething();
  155. if specialChars:
  156. parser.add(""";;;[^\n\r]+[\n\r]""")
  157. # remove redundant semi-colons
  158. parser.add(r""";+\s*([};])""", "$2")
  159. # remove white-space
  160. parser.add(r"""(\b|\$)\s+(\b|\$)""", "$2 $3")
  161. parser.add(r"""([+\-])\s+([+\-])""", "$2 $3")
  162. parser.add(r"""\s+""", "")
  163. return parser
  164. def getEncoder(self, ascii):
  165. mapping = {}
  166. base = ord('0')
  167. mapping.update(dict([(i, chr(i+base)) for i in range(10)]))
  168. base = ord('a')
  169. mapping.update(dict([(i+10, chr(i+base)) for i in range(26)]))
  170. base = ord('A')
  171. mapping.update(dict([(i+36, chr(i+base)) for i in range(26)]))
  172. base = 161
  173. mapping.update(dict([(i+62, chr(i+base)) for i in range(95)]))
  174. # zero encoding
  175. # characters: 0123456789
  176. def encode10(charCode):
  177. return str(charCode)
  178. # inherent base36 support
  179. # characters: 0123456789abcdefghijklmnopqrstuvwxyz
  180. def encode36(charCode):
  181. l = []
  182. remainder = charCode
  183. while 1:
  184. result, remainder = divmod(remainder, 36)
  185. l.append(mapping[remainder])
  186. if not result:
  187. break
  188. remainder = result
  189. l.reverse()
  190. return "".join(l)
  191. # hitch a ride on base36 and add the upper case alpha characters
  192. # characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
  193. def encode62(charCode):
  194. l = []
  195. remainder = charCode
  196. while 1:
  197. result, remainder = divmod(remainder, 62)
  198. l.append(mapping[remainder])
  199. if not result:
  200. break
  201. remainder = result
  202. l.reverse()
  203. return "".join(l)
  204. # use high-ascii values
  205. def encode95(charCode):
  206. l = []
  207. remainder = charCode
  208. while 1:
  209. result, remainder = divmod(remainder, 95)
  210. l.append(mapping[remainder+62])
  211. if not result:
  212. break
  213. remainder = result
  214. l.reverse()
  215. return "".join(l)
  216. if ascii <= 10:
  217. return encode10
  218. elif ascii <= 36:
  219. return encode36
  220. elif ascii <= 62:
  221. return encode62
  222. return encode95
  223. def escape(self, script):
  224. script = script.replace("\\","\\\\")
  225. script = script.replace("'","\\'")
  226. script = script.replace('\n','\\n')
  227. #return re.sub(r"""([\\'](?!\n))""", "\\$1", script)
  228. return script
  229. def escape95(self, script):
  230. result = []
  231. for x in script:
  232. if x>'\xa1':
  233. x = "\\x%0x" % ord(x)
  234. result.append(x)
  235. return "".join(result)
  236. def encodeKeywords(self, script, encoding, fastDecode):
  237. # escape high-ascii values already in the script (i.e. in strings)
  238. if (encoding > 62):
  239. script = self.escape95(script)
  240. # create the parser
  241. parser = ParseMaster()
  242. encode = self.getEncoder(encoding)
  243. # for high-ascii, don't encode single character low-ascii
  244. if encoding > 62:
  245. regexp = r"""\w\w+"""
  246. else:
  247. regexp = r"""\w+"""
  248. # build the word list
  249. keywords = self.analyze(script, regexp, encode)
  250. encoded = keywords['encoded']
  251. # encode
  252. def repl(match, offset):
  253. return encoded.get(match.group(offset), "")
  254. parser.add(regexp, repl)
  255. # if encoded, wrap the script in a decoding function
  256. script = parser.execute(script)
  257. script = self.bootStrap(script, keywords, encoding, fastDecode)
  258. return script
  259. def analyze(self, script, regexp, encode):
  260. # analyse
  261. # retreive all words in the script
  262. regexp = re.compile(regexp, re.M)
  263. all = regexp.findall(script)
  264. sorted = [] # list of words sorted by frequency
  265. encoded = {} # dictionary of word->encoding
  266. protected = {} # instances of "protected" words
  267. if all:
  268. unsorted = []
  269. _protected = {}
  270. values = {}
  271. count = {}
  272. all.reverse()
  273. for word in all:
  274. word = "$"+word
  275. if word not in count:
  276. count[word] = 0
  277. j = len(unsorted)
  278. unsorted.append(word)
  279. # make a dictionary of all of the protected words in this script
  280. # these are words that might be mistaken for encoding
  281. values[j] = encode(j)
  282. _protected["$"+values[j]] = j
  283. count[word] = count[word] + 1
  284. # prepare to sort the word list, first we must protect
  285. # words that are also used as codes. we assign them a code
  286. # equivalent to the word itself.
  287. # e.g. if "do" falls within our encoding range
  288. # then we store keywords["do"] = "do";
  289. # this avoids problems when decoding
  290. sorted = [None] * len(unsorted)
  291. for word in unsorted:
  292. if word in _protected and isinstance(_protected[word], int):
  293. sorted[_protected[word]] = word[1:]
  294. protected[_protected[word]] = True
  295. count[word] = 0
  296. unsorted.sort(lambda a,b: count[b]-count[a])
  297. j = 0
  298. for i in range(len(sorted)):
  299. if sorted[i] is None:
  300. sorted[i] = unsorted[j][1:]
  301. j = j + 1
  302. encoded[sorted[i]] = values[i]
  303. return {'sorted': sorted, 'encoded': encoded, 'protected': protected}
  304. def encodePrivate(self, charCode):
  305. return "_"+str(charCode)
  306. def encodeSpecialChars(self, script):
  307. parser = ParseMaster()
  308. # replace: $name -> n, $$name -> $$na
  309. def repl(match, offset):
  310. #print offset, match.groups()
  311. length = len(match.group(offset + 2))
  312. start = length - max(length - len(match.group(offset + 3)), 0)
  313. return match.group(offset + 1)[start:start+length] + match.group(offset + 4)
  314. parser.add(r"""((\$+)([a-zA-Z\$_]+))(\d*)""", repl)
  315. # replace: _name -> _0, double-underscore (__name) is ignored
  316. regexp = r"""\b_[A-Za-z\d]\w*"""
  317. # build the word list
  318. keywords = self.analyze(script, regexp, self.encodePrivate)
  319. # quick ref
  320. encoded = keywords['encoded']
  321. def repl(match, offset):
  322. return encoded.get(match.group(offset), "")
  323. parser.add(regexp, repl)
  324. return parser.execute(script)
  325. # build the boot function used for loading and decoding
  326. def bootStrap(self, packed, keywords, encoding, fastDecode):
  327. ENCODE = re.compile(r"""\$encode\(\$count\)""")
  328. # $packed: the packed script
  329. #packed = self.escape(packed)
  330. #packed = [packed[x*10000:(x+1)*10000] for x in range((len(packed)/10000)+1)]
  331. #packed = "'" + "'+\n'".join(packed) + "'\n"
  332. packed = "'" + self.escape(packed) + "'"
  333. # $count: number of words contained in the script
  334. count = len(keywords['sorted'])
  335. # $ascii: base for encoding
  336. ascii = min(count, encoding) or 1
  337. # $keywords: list of words contained in the script
  338. for i in keywords['protected']:
  339. keywords['sorted'][i] = ""
  340. # convert from a string to an array
  341. keywords = "'" + "|".join(keywords['sorted']) + "'.split('|')"
  342. encoding_functions = {
  343. 10: """ function($charCode) {
  344. return $charCode;
  345. }""",
  346. 36: """ function($charCode) {
  347. return $charCode.toString(36);
  348. }""",
  349. 62: """ function($charCode) {
  350. return ($charCode < _encoding ? "" : arguments.callee(parseInt($charCode / _encoding))) +
  351. (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
  352. }""",
  353. 95: """ function($charCode) {
  354. return ($charCode < _encoding ? "" : arguments.callee($charCode / _encoding)) +
  355. String.fromCharCode($charCode % _encoding + 161);
  356. }"""
  357. }
  358. # $encode: encoding function (used for decoding the script)
  359. encode = encoding_functions[encoding]
  360. encode = encode.replace('_encoding',"$ascii")
  361. encode = encode.replace('arguments.callee', "$encode")
  362. if ascii > 10:
  363. inline = "$count.toString($ascii)"
  364. else:
  365. inline = "$count"
  366. # $decode: code snippet to speed up decoding
  367. if fastDecode:
  368. # create the decoder
  369. decode = r"""// does the browser support String.replace where the
  370. // replacement value is a function?
  371. if (!''.replace(/^/, String)) {
  372. // decode all the values we need
  373. while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
  374. // global replacement function
  375. $keywords = [function($encoded){return $decode[$encoded]}];
  376. // generic match
  377. $encode = function(){return'\\w+'};
  378. // reset the loop counter - we are now doing a global replace
  379. $count = 1;
  380. }"""
  381. if encoding > 62:
  382. decode = decode.replace('\\\\w', "[\\xa1-\\xff]")
  383. else:
  384. # perform the encoding inline for lower ascii values
  385. if ascii < 36:
  386. decode = ENCODE.sub(inline, decode)
  387. # special case: when $count==0 there ar no keywords. i want to keep
  388. # the basic shape of the unpacking funcion so i'll frig the code...
  389. if not count:
  390. raise NotImplemented
  391. #) $decode = $decode.replace(/(\$count)\s*=\s*1/, "$1=0");
  392. # boot function
  393. unpack = r"""function($packed, $ascii, $count, $keywords, $encode, $decode) {
  394. while ($count--)
  395. if ($keywords[$count])
  396. $packed = $packed.replace(new RegExp("\\b" + $encode($count) + "\\b", "g"), $keywords[$count]);
  397. return $packed;
  398. }"""
  399. if fastDecode:
  400. # insert the decoder
  401. #unpack = re.sub(r"""\{""", "{" + decode + ";", unpack)
  402. unpack = unpack.replace('{', "{" + decode + ";", 1)
  403. if encoding > 62: # high-ascii
  404. # get rid of the word-boundaries for regexp matches
  405. unpack = re.sub(r"""'\\\\b'\s*\+|\+\s*'\\\\b'""", "", unpack)
  406. if ascii > 36 or encoding > 62 or fastDecode:
  407. # insert the encode function
  408. #unpack = re.sub(r"""\{""", "{$encode=" + encode + ";", unpack)
  409. unpack = unpack.replace('{', "{$encode=" + encode + ";", 1)
  410. else:
  411. # perform the encoding inline
  412. unpack = ENCODE.sub(inline, unpack)
  413. # pack the boot function too
  414. unpack = self.pack(unpack, 0, False, True)
  415. # arguments
  416. params = [packed, str(ascii), str(count), keywords]
  417. if fastDecode:
  418. # insert placeholders for the decoder
  419. params.extend(['0', "{}"])
  420. # the whole thing
  421. return "eval(" + unpack + "(" + ",".join(params) + "))";
  422. def pack(self, script, encoding=0, fastDecode=False, specialChars=False, compaction=True):
  423. script = script+"\n"
  424. self._encoding = encoding
  425. self._fastDecode = fastDecode
  426. if specialChars:
  427. script = self.specialCompression(script)
  428. script = self.encodeSpecialChars(script)
  429. else:
  430. if compaction:
  431. script = self.basicCompression(script)
  432. if encoding:
  433. script = self.encodeKeywords(script, encoding, fastDecode)
  434. return script
  435. def run():
  436. p = JavaScriptPacker()
  437. script = open('test_plone.js').read()
  438. result = p.pack(script, compaction=False, encoding=62, fastDecode=True)
  439. open('output.js','w').write(result)
  440. def run1():
  441. test_scripts = []
  442. test_scripts.append(("""// -----------------------------------------------------------------------
  443. // public interface
  444. // -----------------------------------------------------------------------
  445. cssQuery.toString = function() {
  446. return "function cssQuery() {\n [version " + version + "]\n}";
  447. };""", 0, False, False, """cssQuery.toString=function(){return"function cssQuery() {\n [version "+version+"]\n}"};"""))
  448. test_scripts.append(("""function test(_localvar) {
  449. var $name = 'foo';
  450. var $$dummy = 2;
  451. return $name + $$dummy;
  452. }""", 0, False, True, """function test(_0){var n='foo';var du=2;return n+du}"""))
  453. test_scripts.append(("""function _test($localvar) {
  454. var $name = 1;
  455. var _dummy = 2;
  456. var __foo = 3;
  457. return $name + _dummy + $localvar + __foo;
  458. }""", 0, False, True, """function _1(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}"""))
  459. test_scripts.append(("""function _test($localvar) {
  460. var $name = 1;
  461. var _dummy = 2;
  462. var __foo = 3;
  463. return $name + _dummy + $localvar + __foo;
  464. }
  465. function _bar(_ocalvar) {
  466. var $name = 1;
  467. var _dummy = 2;
  468. var __foo = 3;
  469. return $name + _dummy + $localvar + __foo;
  470. }""", 0, False, True, """function _3(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}function _2(_1){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}"""))
  471. test_scripts.append(("cssQuery1.js", 0, False, False, "cssQuery1-p1.js"))
  472. test_scripts.append(("cssQuery.js", 0, False, False, "cssQuery-p1.js"))
  473. test_scripts.append(("pack.js", 0, False, False, "pack-p1.js"))
  474. test_scripts.append(("cssQuery.js", 0, False, True, "cssQuery-p2.js"))
  475. # the following ones are different, because javascript might use an
  476. # unstable sort algorithm while python uses an stable sort algorithm
  477. test_scripts.append(("pack.js", 0, False, True, "pack-p2.js"))
  478. test_scripts.append(("test.js", 0, False, True, """function _4(l){var n=1;var _0=2;var __foo=3;return n+_0+l+__foo}function _3(_1){var n=1;var _2=2;var __foo=3;return n+_2+l+__foo}"""))
  479. test_scripts.append(("test.js", 10, False, False, """eval(function(p,a,c,k,e,d){while(c--){if(k[c]){p=p.replace(new RegExp("\\b"+e(c)+"\\b","g"),k[c])}}return p}('8 13($6){0 $4=1;0 7=2;0 5=3;9 $4+7+$6+5}8 11(12){0 $4=1;0 10=2;0 5=3;9 $4+10+$6+5}',10,14,'var||||name|__foo|localvar|_dummy|function|return|_2|_bar|_ocalvar|_test'.split('|')))
  480. """))
  481. test_scripts.append(("test.js", 62, False, False, """eval(function(p,a,c,k,e,d){while(c--){if(k[c]){p=p.replace(new RegExp("\\b"+e(c)+"\\b","g"),k[c])}}return p}('8 d($6){0 $4=1;0 7=2;0 5=3;9 $4+7+$6+5}8 b(c){0 $4=1;0 a=2;0 5=3;9 $4+a+$6+5}',14,14,'var||||name|__foo|localvar|_dummy|function|return|_2|_bar|_ocalvar|_test'.split('|')))
  482. """))
  483. test_scripts.append(("test.js", 95, False, False, "test-p4.js"))
  484. test_scripts.append(("cssQuery.js", 0, False, True, "cssQuery-p3.js"))
  485. test_scripts.append(("cssQuery.js", 62, False, True, "cssQuery-p4.js"))
  486. import difflib
  487. p = JavaScriptPacker()
  488. for script, encoding, fastDecode, specialChars, expected in test_scripts:
  489. if os.path.exists(script):
  490. _script = open(script).read()
  491. else:
  492. _script = script
  493. if os.path.exists(expected):
  494. _expected = open(expected).read()
  495. else:
  496. _expected = expected
  497. print script[:20], encoding, fastDecode, specialChars, expected[:20]
  498. print "="*40
  499. result = p.pack(_script, encoding, fastDecode, specialChars)
  500. print len(result), len(_script)
  501. if (result != _expected):
  502. print "ERROR!!!!!!!!!!!!!!!!"
  503. print _expected
  504. print result
  505. #print list(difflib.unified_diff(result, _expected))
  506. if __name__=='__main__':
  507. run()