PageRenderTime 55ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/media/libraries/JavaScriptPacker.php

https://bitbucket.org/tomina/freenetisrest
PHP | 735 lines | 498 code | 55 blank | 182 comment | 41 complexity | 89ef175dfcdcf25bf16834fde07545ee MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0, GPL-3.0
  1. <?php
  2. /* 7 December 2006. version 1.0
  3. *
  4. * This is the php version of the Dean Edwards JavaScript 's Packer,
  5. * Based on :
  6. *
  7. * ParseMaster, version 1.0.2 (2005-08-19) Copyright 2005, Dean Edwards
  8. * a multi-pattern parser.
  9. * KNOWN BUG: erroneous behavior when using escapeChar with a replacement
  10. * value that is a function
  11. *
  12. * packer, version 2.0.2 (2005-08-19) Copyright 2004-2005, Dean Edwards
  13. *
  14. * License: http://creativecommons.org/licenses/LGPL/2.1/
  15. *
  16. * Ported to PHP by Nicolas Martin.
  17. *
  18. * ----------------------------------------------------------------------
  19. *
  20. * examples of usage :
  21. * $myPacker = new JavaScriptPacker($script, 62, true, false);
  22. * $packed = $myPacker->pack();
  23. *
  24. * or
  25. *
  26. * $myPacker = new JavaScriptPacker($script, 'Normal', true, false);
  27. * $packed = $myPacker->pack();
  28. *
  29. * or (default values)
  30. *
  31. * $myPacker = new JavaScriptPacker($script);
  32. * $packed = $myPacker->pack();
  33. *
  34. *
  35. * params of the constructor :
  36. * $script: the JavaScript to pack, string.
  37. * $encoding: level of encoding, int or string :
  38. * 0,10,62,95 or 'None', 'Numeric', 'Normal', 'High ASCII'.
  39. * default: 62.
  40. * $fastDecode: include the fast decoder in the packed result, boolean.
  41. * default : true.
  42. * $specialChars: if you are flagged your private and local variables
  43. * in the script, boolean.
  44. * default: false.
  45. *
  46. * The pack() method return the compressed JavasScript, as a string.
  47. *
  48. * see http://dean.edwards.name/packer/usage/ for more information.
  49. *
  50. * Notes :
  51. * # need PHP 5 . Tested with PHP 5.1.2
  52. *
  53. * # The packed result may be different than with the Dean Edwards
  54. * version, but with the same length. The reason is that the PHP
  55. * function usort to sort array don't necessarily preserve the
  56. * original order of two equal member. The Javascript sort function
  57. * in fact preserve this order (but that's not require by the
  58. * ECMAScript standard). So the encoded keywords order can be
  59. * different in the two results.
  60. *
  61. * # Be careful with the 'High ASCII' Level encoding if you use
  62. * UTF-8 in your files...
  63. */
  64. class JavaScriptPacker_Core {
  65. // constants
  66. const IGNORE = '$1';
  67. // validate parameters
  68. private $_script = '';
  69. private $_encoding = 62;
  70. private $_fastDecode = true;
  71. private $_specialChars = false;
  72. private $LITERAL_ENCODING = array(
  73. 'None' => 0,
  74. 'Numeric' => 10,
  75. 'Normal' => 62,
  76. 'High ASCII' => 95
  77. );
  78. public function __construct($_script, $_encoding = 62, $_fastDecode = true, $_specialChars = false)
  79. {
  80. $this->_script = $_script . "\n";
  81. if (array_key_exists($_encoding, $this->LITERAL_ENCODING))
  82. $_encoding = $this->LITERAL_ENCODING[$_encoding];
  83. $this->_encoding = min((int)$_encoding, 95);
  84. $this->_fastDecode = $_fastDecode;
  85. $this->_specialChars = $_specialChars;
  86. }
  87. public function pack() {
  88. $this->_addParser('_basicCompression');
  89. if ($this->_specialChars)
  90. $this->_addParser('_encodeSpecialChars');
  91. if ($this->_encoding)
  92. $this->_addParser('_encodeKeywords');
  93. // go!
  94. return $this->_pack($this->_script);
  95. }
  96. // apply all parsing routines
  97. private function _pack($script) {
  98. for ($i = 0; isset($this->_parsers[$i]); $i++) {
  99. $script = call_user_func(array(&$this,$this->_parsers[$i]), $script);
  100. }
  101. return $script;
  102. }
  103. // keep a list of parsing functions, they'll be executed all at once
  104. private $_parsers = array();
  105. private function _addParser($parser) {
  106. $this->_parsers[] = $parser;
  107. }
  108. // zero encoding - just removal of white space and comments
  109. private function _basicCompression($script) {
  110. $parser = new ParseMaster();
  111. // make safe
  112. $parser->escapeChar = '\\';
  113. // protect strings
  114. $parser->add('/\'[^\'\\n\\r]*\'/', self::IGNORE);
  115. $parser->add('/"[^"\\n\\r]*"/', self::IGNORE);
  116. // remove comments
  117. $parser->add('/\\/\\/[^\\n\\r]*[\\n\\r]/', ' ');
  118. $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' ');
  119. // protect regular expressions
  120. $parser->add('/\\s+(\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?)/', '$2'); // IGNORE
  121. $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?/', self::IGNORE);
  122. // remove: ;;; doSomething();
  123. if ($this->_specialChars) $parser->add('/;;;[^\\n\\r]+[\\n\\r]/');
  124. // remove redundant semi-colons
  125. $parser->add('/\\(;;\\)/', self::IGNORE); // protect for (;;) loops
  126. $parser->add('/;+\\s*([};])/', '$2');
  127. // apply the above
  128. $script = $parser->exec($script);
  129. // remove white-space
  130. $parser->add('/(\\b|\\x24)\\s+(\\b|\\x24)/', '$2 $3');
  131. $parser->add('/([+\\-])\\s+([+\\-])/', '$2 $3');
  132. $parser->add('/\\s+/', '');
  133. // done
  134. return $parser->exec($script);
  135. }
  136. private function _encodeSpecialChars($script) {
  137. $parser = new ParseMaster();
  138. // replace: $name -> n, $$name -> na
  139. $parser->add('/((\\x24+)([a-zA-Z$_]+))(\\d*)/',
  140. array('fn' => '_replace_name')
  141. );
  142. // replace: _name -> _0, double-underscore (__name) is ignored
  143. $regexp = '/\\b_[A-Za-z\\d]\\w*/';
  144. // build the word list
  145. $keywords = $this->_analyze($script, $regexp, '_encodePrivate');
  146. // quick ref
  147. $encoded = $keywords['encoded'];
  148. $parser->add($regexp,
  149. array(
  150. 'fn' => '_replace_encoded',
  151. 'data' => $encoded
  152. )
  153. );
  154. return $parser->exec($script);
  155. }
  156. private function _encodeKeywords($script) {
  157. // escape high-ascii values already in the script (i.e. in strings)
  158. if ($this->_encoding > 62)
  159. $script = $this->_escape95($script);
  160. // create the parser
  161. $parser = new ParseMaster();
  162. $encode = $this->_getEncoder($this->_encoding);
  163. // for high-ascii, don't encode single character low-ascii
  164. $regexp = ($this->_encoding > 62) ? '/\\w\\w+/' : '/\\w+/';
  165. // build the word list
  166. $keywords = $this->_analyze($script, $regexp, $encode);
  167. $encoded = $keywords['encoded'];
  168. // encode
  169. $parser->add($regexp,
  170. array(
  171. 'fn' => '_replace_encoded',
  172. 'data' => $encoded
  173. )
  174. );
  175. if (empty($script)) return $script;
  176. else {
  177. //$res = $parser->exec($script);
  178. //$res = $this->_bootStrap($res, $keywords);
  179. //return $res;
  180. return $this->_bootStrap($parser->exec($script), $keywords);
  181. }
  182. }
  183. private function _analyze($script, $regexp, $encode) {
  184. // analyse
  185. // retreive all words in the script
  186. $all = array();
  187. preg_match_all($regexp, $script, $all);
  188. $_sorted = array(); // list of words sorted by frequency
  189. $_encoded = array(); // dictionary of word->encoding
  190. $_protected = array(); // instances of "protected" words
  191. $all = $all[0]; // simulate the javascript comportement of global match
  192. if (!empty($all)) {
  193. $unsorted = array(); // same list, not sorted
  194. $protected = array(); // "protected" words (dictionary of word->"word")
  195. $value = array(); // dictionary of charCode->encoding (eg. 256->ff)
  196. $this->_count = array(); // word->count
  197. $i = count($all); $j = 0; //$word = null;
  198. // count the occurrences - used for sorting later
  199. do {
  200. --$i;
  201. $word = '$' . $all[$i];
  202. if (!isset($this->_count[$word])) {
  203. $this->_count[$word] = 0;
  204. $unsorted[$j] = $word;
  205. // make a dictionary of all of the protected words in this script
  206. // these are words that might be mistaken for encoding
  207. //if (is_string($encode) && method_exists($this, $encode))
  208. $values[$j] = call_user_func(array(&$this, $encode), $j);
  209. $protected['$' . $values[$j]] = $j++;
  210. }
  211. // increment the word counter
  212. $this->_count[$word]++;
  213. } while ($i > 0);
  214. // prepare to sort the word list, first we must protect
  215. // words that are also used as codes. we assign them a code
  216. // equivalent to the word itself.
  217. // e.g. if "do" falls within our encoding range
  218. // then we store keywords["do"] = "do";
  219. // this avoids problems when decoding
  220. $i = count($unsorted);
  221. do {
  222. $word = $unsorted[--$i];
  223. if (isset($protected[$word]) /*!= null*/) {
  224. $_sorted[$protected[$word]] = substr($word, 1);
  225. $_protected[$protected[$word]] = true;
  226. $this->_count[$word] = 0;
  227. }
  228. } while ($i);
  229. // sort the words by frequency
  230. // Note: the javascript and php version of sort can be different :
  231. // in php manual, usort :
  232. // " If two members compare as equal,
  233. // their order in the sorted array is undefined."
  234. // so the final packed script is different of the Dean's javascript version
  235. // but equivalent.
  236. // the ECMAscript standard does not guarantee this behaviour,
  237. // and thus not all browsers (e.g. Mozilla versions dating back to at
  238. // least 2003) respect this.
  239. usort($unsorted, array(&$this, '_sortWords'));
  240. $j = 0;
  241. // because there are "protected" words in the list
  242. // we must add the sorted words around them
  243. do {
  244. if (!isset($_sorted[$i]))
  245. $_sorted[$i] = substr($unsorted[$j++], 1);
  246. $_encoded[$_sorted[$i]] = $values[$i];
  247. } while (++$i < count($unsorted));
  248. }
  249. return array(
  250. 'sorted' => $_sorted,
  251. 'encoded' => $_encoded,
  252. 'protected' => $_protected);
  253. }
  254. private $_count = array();
  255. private function _sortWords($match1, $match2) {
  256. return $this->_count[$match2] - $this->_count[$match1];
  257. }
  258. // build the boot function used for loading and decoding
  259. private function _bootStrap($packed, $keywords) {
  260. $ENCODE = $this->_safeRegExp('$encode\\($count\\)');
  261. // $packed: the packed script
  262. $packed = "'" . $this->_escape($packed) . "'";
  263. // $ascii: base for encoding
  264. $ascii = min(count($keywords['sorted']), $this->_encoding);
  265. if ($ascii == 0) $ascii = 1;
  266. // $count: number of words contained in the script
  267. $count = count($keywords['sorted']);
  268. // $keywords: list of words contained in the script
  269. foreach ($keywords['protected'] as $i=>$value) {
  270. $keywords['sorted'][$i] = '';
  271. }
  272. // convert from a string to an array
  273. ksort($keywords['sorted']);
  274. $keywords = "'" . implode('|',$keywords['sorted']) . "'.split('|')";
  275. $encode = ($this->_encoding > 62) ? '_encode95' : $this->_getEncoder($ascii);
  276. $encode = $this->_getJSFunction($encode);
  277. $encode = preg_replace('/_encoding/','$ascii', $encode);
  278. $encode = preg_replace('/arguments\\.callee/','$encode', $encode);
  279. $inline = '\\$count' . ($ascii > 10 ? '.toString(\\$ascii)' : '');
  280. // $decode: code snippet to speed up decoding
  281. if ($this->_fastDecode) {
  282. // create the decoder
  283. $decode = $this->_getJSFunction('_decodeBody');
  284. if ($this->_encoding > 62)
  285. $decode = preg_replace('/\\\\w/', '[\\xa1-\\xff]', $decode);
  286. // perform the encoding inline for lower ascii values
  287. elseif ($ascii < 36)
  288. $decode = preg_replace($ENCODE, $inline, $decode);
  289. // special case: when $count==0 there are no keywords. I want to keep
  290. // the basic shape of the unpacking funcion so i'll frig the code...
  291. if ($count == 0)
  292. $decode = preg_replace($this->_safeRegExp('($count)\\s*=\\s*1'), '$1=0', $decode, 1);
  293. }
  294. // boot function
  295. $unpack = $this->_getJSFunction('_unpack');
  296. if ($this->_fastDecode) {
  297. // insert the decoder
  298. $this->buffer = $decode;
  299. $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastDecode'), $unpack, 1);
  300. }
  301. $unpack = preg_replace('/"/', "'", $unpack);
  302. if ($this->_encoding > 62) { // high-ascii
  303. // get rid of the word-boundaries for regexp matches
  304. $unpack = preg_replace('/\'\\\\\\\\b\'\s*\\+|\\+\s*\'\\\\\\\\b\'/', '', $unpack);
  305. }
  306. if ($ascii > 36 || $this->_encoding > 62 || $this->_fastDecode) {
  307. // insert the encode function
  308. $this->buffer = $encode;
  309. $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastEncode'), $unpack, 1);
  310. } else {
  311. // perform the encoding inline
  312. $unpack = preg_replace($ENCODE, $inline, $unpack);
  313. }
  314. // pack the boot function too
  315. $unpackPacker = new JavaScriptPacker($unpack, 0, false, true);
  316. $unpack = $unpackPacker->pack();
  317. // arguments
  318. $params = array($packed, $ascii, $count, $keywords);
  319. if ($this->_fastDecode) {
  320. $params[] = 0;
  321. $params[] = '{}';
  322. }
  323. $params = implode(',', $params);
  324. // the whole thing
  325. return 'eval(' . $unpack . '(' . $params . "))\n";
  326. }
  327. private $buffer;
  328. private function _insertFastDecode($match) {
  329. return '{' . $this->buffer . ';';
  330. }
  331. private function _insertFastEncode($match) {
  332. return '{$encode=' . $this->buffer . ';';
  333. }
  334. // mmm.. ..which one do i need ??
  335. private function _getEncoder($ascii) {
  336. return $ascii > 10 ? $ascii > 36 ? $ascii > 62 ?
  337. '_encode95' : '_encode62' : '_encode36' : '_encode10';
  338. }
  339. // zero encoding
  340. // characters: 0123456789
  341. private function _encode10($charCode) {
  342. return $charCode;
  343. }
  344. // inherent base36 support
  345. // characters: 0123456789abcdefghijklmnopqrstuvwxyz
  346. private function _encode36($charCode) {
  347. return base_convert($charCode, 10, 36);
  348. }
  349. // hitch a ride on base36 and add the upper case alpha characters
  350. // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
  351. private function _encode62($charCode) {
  352. $res = '';
  353. if ($charCode >= $this->_encoding) {
  354. $res = $this->_encode62((int)($charCode / $this->_encoding));
  355. }
  356. $charCode = $charCode % $this->_encoding;
  357. if ($charCode > 35)
  358. return $res . chr($charCode + 29);
  359. else
  360. return $res . base_convert($charCode, 10, 36);
  361. }
  362. // use high-ascii values
  363. // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
  364. private function _encode95($charCode) {
  365. $res = '';
  366. if ($charCode >= $this->_encoding)
  367. $res = $this->_encode95($charCode / $this->_encoding);
  368. return $res . chr(($charCode % $this->_encoding) + 161);
  369. }
  370. private function _safeRegExp($string) {
  371. return '/'.preg_replace('/\$/', '\\\$', $string).'/';
  372. }
  373. private function _encodePrivate($charCode) {
  374. return "_" . $charCode;
  375. }
  376. // protect characters used by the parser
  377. private function _escape($script) {
  378. return preg_replace('/([\\\\\'])/', '\\\$1', $script);
  379. }
  380. // protect high-ascii characters already in the script
  381. private function _escape95($script) {
  382. return preg_replace_callback(
  383. '/[\\xa1-\\xff]/',
  384. array(&$this, '_escape95Bis'),
  385. $script
  386. );
  387. }
  388. private function _escape95Bis($match) {
  389. return '\x'.((string)dechex(ord($match)));
  390. }
  391. private function _getJSFunction($aName) {
  392. if (defined('self::JSFUNCTION'.$aName))
  393. return constant('self::JSFUNCTION'.$aName);
  394. else
  395. return '';
  396. }
  397. // JavaScript Functions used.
  398. // Note : In Dean's version, these functions are converted
  399. // with 'String(aFunctionName);'.
  400. // This internal conversion complete the original code, ex :
  401. // 'while (aBool) anAction();' is converted to
  402. // 'while (aBool) { anAction(); }'.
  403. // The JavaScript functions below are corrected.
  404. // unpacking function - this is the boot strap function
  405. // data extracted from this packing routine is passed to
  406. // this function when decoded in the target
  407. // NOTE ! : without the ';' final.
  408. const JSFUNCTION_unpack =
  409. 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
  410. while ($count--) {
  411. if ($keywords[$count]) {
  412. $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
  413. }
  414. }
  415. return $packed;
  416. }';
  417. /*
  418. 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
  419. while ($count--)
  420. if ($keywords[$count])
  421. $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
  422. return $packed;
  423. }';
  424. */
  425. // code-snippet inserted into the unpacker to speed up decoding
  426. const JSFUNCTION_decodeBody =
  427. //_decode = function() {
  428. // does the browser support String.replace where the
  429. // replacement value is a function?
  430. ' if (!\'\'.replace(/^/, String)) {
  431. // decode all the values we need
  432. while ($count--) {
  433. $decode[$encode($count)] = $keywords[$count] || $encode($count);
  434. }
  435. // global replacement function
  436. $keywords = [function ($encoded) {return $decode[$encoded]}];
  437. // generic match
  438. $encode = function () {return \'\\\\w+\'};
  439. // reset the loop counter - we are now doing a global replace
  440. $count = 1;
  441. }
  442. ';
  443. //};
  444. /*
  445. ' if (!\'\'.replace(/^/, String)) {
  446. // decode all the values we need
  447. while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
  448. // global replacement function
  449. $keywords = [function ($encoded) {return $decode[$encoded]}];
  450. // generic match
  451. $encode = function () {return\'\\\\w+\'};
  452. // reset the loop counter - we are now doing a global replace
  453. $count = 1;
  454. }';
  455. */
  456. // zero encoding
  457. // characters: 0123456789
  458. const JSFUNCTION_encode10 =
  459. 'function($charCode) {
  460. return $charCode;
  461. }';//;';
  462. // inherent base36 support
  463. // characters: 0123456789abcdefghijklmnopqrstuvwxyz
  464. const JSFUNCTION_encode36 =
  465. 'function($charCode) {
  466. return $charCode.toString(36);
  467. }';//;';
  468. // hitch a ride on base36 and add the upper case alpha characters
  469. // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
  470. const JSFUNCTION_encode62 =
  471. 'function($charCode) {
  472. return ($charCode < _encoding ? \'\' : arguments.callee(parseInt($charCode / _encoding))) +
  473. (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
  474. }';
  475. // use high-ascii values
  476. // characters: ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
  477. const JSFUNCTION_encode95 =
  478. 'function($charCode) {
  479. return ($charCode < _encoding ? \'\' : arguments.callee($charCode / _encoding)) +
  480. String.fromCharCode($charCode % _encoding + 161);
  481. }';
  482. }
  483. class ParseMaster {
  484. public $ignoreCase = false;
  485. public $escapeChar = '';
  486. // constants
  487. const EXPRESSION = 0;
  488. const REPLACEMENT = 1;
  489. const LENGTH = 2;
  490. // used to determine nesting levels
  491. private $GROUPS = '/\\(/';//g
  492. private $SUB_REPLACE = '/\\$\\d/';
  493. private $INDEXED = '/^\\$\\d+$/';
  494. private $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/';
  495. private $ESCAPE = '/\\\./';//g
  496. private $QUOTE = '/\'/';
  497. private $DELETED = '/\\x01[^\\x01]*\\x01/';//g
  498. public function add($expression, $replacement = '') {
  499. // count the number of sub-expressions
  500. // - add one because each pattern is itself a sub-expression
  501. $length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out);
  502. // treat only strings $replacement
  503. if (is_string($replacement)) {
  504. // does the pattern deal with sub-expressions?
  505. if (preg_match($this->SUB_REPLACE, $replacement)) {
  506. // a simple lookup? (e.g. "$2")
  507. if (preg_match($this->INDEXED, $replacement)) {
  508. // store the index (used for fast retrieval of matched strings)
  509. $replacement = (int)(substr($replacement, 1)) - 1;
  510. } else { // a complicated lookup (e.g. "Hello $2 $1")
  511. // build a function to do the lookup
  512. $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement))
  513. ? '"' : "'";
  514. $replacement = array(
  515. 'fn' => '_backReferences',
  516. 'data' => array(
  517. 'replacement' => $replacement,
  518. 'length' => $length,
  519. 'quote' => $quote
  520. )
  521. );
  522. }
  523. }
  524. }
  525. // pass the modified arguments
  526. if (!empty($expression)) $this->_add($expression, $replacement, $length);
  527. else $this->_add('/^$/', $replacement, $length);
  528. }
  529. public function exec($string) {
  530. // execute the global replacement
  531. $this->_escaped = array();
  532. // simulate the _patterns.toSTring of Dean
  533. $regexp = '/';
  534. foreach ($this->_patterns as $reg) {
  535. $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|';
  536. }
  537. $regexp = substr($regexp, 0, -1) . '/';
  538. $regexp .= ($this->ignoreCase) ? 'i' : '';
  539. $string = $this->_escape($string, $this->escapeChar);
  540. $string = preg_replace_callback(
  541. $regexp,
  542. array(
  543. &$this,
  544. '_replacement'
  545. ),
  546. $string
  547. );
  548. $string = $this->_unescape($string, $this->escapeChar);
  549. return preg_replace($this->DELETED, '', $string);
  550. }
  551. public function reset() {
  552. // clear the patterns collection so that this object may be re-used
  553. $this->_patterns = array();
  554. }
  555. // private
  556. private $_escaped = array(); // escaped characters
  557. private $_patterns = array(); // patterns stored by index
  558. // create and add a new pattern to the patterns collection
  559. private function _add() {
  560. $arguments = func_get_args();
  561. $this->_patterns[] = $arguments;
  562. }
  563. // this is the global replace function (it's quite complicated)
  564. private function _replacement($arguments) {
  565. if (empty($arguments)) return '';
  566. $i = 1; $j = 0;
  567. // loop through the patterns
  568. while (isset($this->_patterns[$j])) {
  569. $pattern = $this->_patterns[$j++];
  570. // do we have a result?
  571. if (isset($arguments[$i]) && ($arguments[$i] != '')) {
  572. $replacement = $pattern[self::REPLACEMENT];
  573. if (is_array($replacement) && isset($replacement['fn'])) {
  574. if (isset($replacement['data'])) $this->buffer = $replacement['data'];
  575. return call_user_func(array(&$this, $replacement['fn']), $arguments, $i);
  576. } elseif (is_int($replacement)) {
  577. return $arguments[$replacement + $i];
  578. }
  579. $delete = ($this->escapeChar == '' ||
  580. strpos($arguments[$i], $this->escapeChar) === false)
  581. ? '' : "\x01" . $arguments[$i] . "\x01";
  582. return $delete . $replacement;
  583. // skip over references to sub-expressions
  584. } else {
  585. $i += $pattern[self::LENGTH];
  586. }
  587. }
  588. }
  589. private function _backReferences($match, $offset) {
  590. $replacement = $this->buffer['replacement'];
  591. $quote = $this->buffer['quote'];
  592. $i = $this->buffer['length'];
  593. while ($i) {
  594. $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement);
  595. }
  596. return $replacement;
  597. }
  598. private function _replace_name($match, $offset){
  599. $length = strlen($match[$offset + 2]);
  600. $start = $length - max($length - strlen($match[$offset + 3]), 0);
  601. return substr($match[$offset + 1], $start, $length) . $match[$offset + 4];
  602. }
  603. private function _replace_encoded($match, $offset) {
  604. return $this->buffer[$match[$offset]];
  605. }
  606. // php : we cannot pass additional data to preg_replace_callback,
  607. // and we cannot use &$this in create_function, so let's go to lower level
  608. private $buffer;
  609. // encode escaped characters
  610. private function _escape($string, $escapeChar) {
  611. if ($escapeChar) {
  612. $this->buffer = $escapeChar;
  613. return preg_replace_callback(
  614. '/\\' . $escapeChar . '(.)' .'/',
  615. array(&$this, '_escapeBis'),
  616. $string
  617. );
  618. } else {
  619. return $string;
  620. }
  621. }
  622. private function _escapeBis($match) {
  623. $this->_escaped[] = $match[1];
  624. return $this->buffer;
  625. }
  626. // decode escaped characters
  627. private function _unescape($string, $escapeChar) {
  628. if ($escapeChar) {
  629. $regexp = '/'.'\\'.$escapeChar.'/';
  630. $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0);
  631. return preg_replace_callback
  632. (
  633. $regexp,
  634. array(&$this, '_unescapeBis'),
  635. $string
  636. );
  637. } else {
  638. return $string;
  639. }
  640. }
  641. private function _unescapeBis() {
  642. if (!empty($this->_escaped[$this->buffer['i']])) {
  643. $temp = $this->_escaped[$this->buffer['i']];
  644. } else {
  645. $temp = '';
  646. }
  647. $this->buffer['i']++;
  648. return $this->buffer['escapeChar'] . $temp;
  649. }
  650. private function _internalEscape($string) {
  651. return preg_replace($this->ESCAPE, '', $string);
  652. }
  653. }