PageRenderTime 102ms CodeModel.GetById 3ms RepoModel.GetById 1ms app.codeStats 0ms

/libraries/php-ids/lib/IDS/Converter.php

https://bitbucket.org/syahzul/blog
PHP | 769 lines | 415 code | 101 blank | 253 comment | 38 complexity | dd4f7c0f188b9e1337a6545e3178c53a MD5 | raw file
  1. <?php
  2. /**
  3. * PHPIDS
  4. *
  5. * Requirements: PHP5, SimpleXML
  6. *
  7. * Copyright (c) 2008 PHPIDS group (http://php-ids.org)
  8. *
  9. * PHPIDS is free software; you can redistribute it and/or modify
  10. * it under the terms of the GNU Lesser General Public License as published by
  11. * the Free Software Foundation, version 3 of the License, or
  12. * (at your option) any later version.
  13. *
  14. * PHPIDS is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU Lesser General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU Lesser General Public License
  20. * along with PHPIDS. If not, see <http://www.gnu.org/licenses/>.
  21. *
  22. * PHP version 5.1.6+
  23. *
  24. * @category Security
  25. * @package PHPIDS
  26. * @author Mario Heiderich <mario.heiderich@gmail.com>
  27. * @author Christian Matthies <ch0012@gmail.com>
  28. * @author Lars Strojny <lars@strojny.net>
  29. * @license http://www.gnu.org/licenses/lgpl.html LGPL
  30. * @link http://php-ids.org/
  31. */
  32. /**
  33. * PHPIDS specific utility class to convert charsets manually
  34. *
  35. * Note that if you make use of IDS_Converter::runAll(), existing class
  36. * methods will be executed in the same order as they are implemented in the
  37. * class tree!
  38. *
  39. * @category Security
  40. * @package PHPIDS
  41. * @author Christian Matthies <ch0012@gmail.com>
  42. * @author Mario Heiderich <mario.heiderich@gmail.com>
  43. * @author Lars Strojny <lars@strojny.net>
  44. * @copyright 2007-2009 The PHPIDS Group
  45. * @license http://www.gnu.org/licenses/lgpl.html LGPL
  46. * @version Release: $Id:Converter.php 517 2007-09-15 15:04:13Z mario $
  47. * @link http://php-ids.org/
  48. */
  49. class IDS_Converter
  50. {
  51. /**
  52. * Runs all converter functions
  53. *
  54. * Note that if you make use of IDS_Converter::runAll(), existing class
  55. * methods will be executed in the same order as they are implemented in the
  56. * class tree!
  57. *
  58. * @param string $value the value to convert
  59. *
  60. * @static
  61. * @return string
  62. */
  63. public static function runAll($value)
  64. {
  65. foreach (get_class_methods(__CLASS__) as $method) {
  66. if (strpos($method, 'run') === 0) {
  67. continue;
  68. }
  69. $value = self::$method($value);
  70. }
  71. return $value;
  72. }
  73. /**
  74. * Make sure the value to normalize and monitor doesn't contain
  75. * possibilities for a regex DoS.
  76. *
  77. * @param string $value the value to pre-sanitize
  78. *
  79. * @static
  80. * @return string
  81. */
  82. public static function convertFromRepetition($value)
  83. {
  84. // remove obvios repetition patterns
  85. $value = preg_replace(
  86. '/(?:(.{2,})\1{32,})|(?:[+=|\-@\s]{128,})/',
  87. 'x',
  88. $value
  89. );
  90. return $value;
  91. }
  92. /**
  93. * Check for comments and erases them if available
  94. *
  95. * @param string $value the value to convert
  96. *
  97. * @static
  98. * @return string
  99. */
  100. public static function convertFromCommented($value)
  101. {
  102. // check for existing comments
  103. if (preg_match('/(?:\<!-|-->|\/\*|\*\/|\/\/\W*\w+\s*$)|' .
  104. '(?:--[^-]*-)/ms', $value)) {
  105. $pattern = array(
  106. '/(?:(?:<!)(?:(?:--(?:[^-]*(?:-[^-]+)*)--\s*)*)(?:>))/ms',
  107. '/(?:(?:\/\*\/*[^\/\*]*)+\*\/)/ms',
  108. '/(?:--[^-]*-)/ms'
  109. );
  110. $converted = preg_replace($pattern, ';', $value);
  111. $value .= "\n" . $converted;
  112. }
  113. //make sure inline comments are detected and converted correctly
  114. $value = preg_replace('/(<\w+)\/+(\w+=?)/m', '$1/$2', $value);
  115. $value = preg_replace('/[^\\\:]\/\/(.*)$/m', '/**/$1', $value);
  116. return $value;
  117. }
  118. /**
  119. * Strip newlines
  120. *
  121. * @param string $value the value to convert
  122. *
  123. * @static
  124. * @return string
  125. */
  126. public static function convertFromWhiteSpace($value)
  127. {
  128. //check for inline linebreaks
  129. $search = array('\r', '\n', '\f', '\t', '\v');
  130. $value = str_replace($search, ';', $value);
  131. // replace replacement characters regular spaces
  132. $value = str_replace('�', ' ', $value);
  133. //convert real linebreaks
  134. return preg_replace('/(?:\n|\r|\v)/m', ' ', $value);
  135. }
  136. /**
  137. * Checks for common charcode pattern and decodes them
  138. *
  139. * @param string $value the value to convert
  140. *
  141. * @static
  142. * @return string
  143. */
  144. public static function convertFromJSCharcode($value)
  145. {
  146. $matches = array();
  147. // check if value matches typical charCode pattern
  148. if (preg_match_all('/(?:[\d+-=\/\* ]+(?:\s?,\s?[\d+-=\/\* ]+)){4,}/ms',
  149. $value, $matches)) {
  150. $converted = '';
  151. $string = implode(',', $matches[0]);
  152. $string = preg_replace('/\s/', '', $string);
  153. $string = preg_replace('/\w+=/', '', $string);
  154. $charcode = explode(',', $string);
  155. foreach ($charcode as $char) {
  156. $char = preg_replace('/\W0/s', '', $char);
  157. if (preg_match_all('/\d*[+-\/\* ]\d+/', $char, $matches)) {
  158. $match = preg_split('/(\W?\d+)/',
  159. (implode('', $matches[0])),
  160. null,
  161. PREG_SPLIT_DELIM_CAPTURE);
  162. if (array_sum($match) >= 20 && array_sum($match) <= 127) {
  163. $converted .= chr(array_sum($match));
  164. }
  165. } elseif (!empty($char) && $char >= 20 && $char <= 127) {
  166. $converted .= chr($char);
  167. }
  168. }
  169. $value .= "\n" . $converted;
  170. }
  171. // check for octal charcode pattern
  172. if (preg_match_all('/(?:(?:[\\\]+\d+[ \t]*){8,})/ims', $value, $matches)) {
  173. $converted = '';
  174. $charcode = explode('\\', preg_replace('/\s/', '', implode(',',
  175. $matches[0])));
  176. foreach ($charcode as $char) {
  177. if (!empty($char)) {
  178. if (octdec($char) >= 20 && octdec($char) <= 127) {
  179. $converted .= chr(octdec($char));
  180. }
  181. }
  182. }
  183. $value .= "\n" . $converted;
  184. }
  185. // check for hexadecimal charcode pattern
  186. if (preg_match_all('/(?:(?:[\\\]+\w+\s*){8,})/ims', $value, $matches)) {
  187. $converted = '';
  188. $charcode = explode('\\', preg_replace('/[ux]/', '', implode(',',
  189. $matches[0])));
  190. foreach ($charcode as $char) {
  191. if (!empty($char)) {
  192. if (hexdec($char) >= 20 && hexdec($char) <= 127) {
  193. $converted .= chr(hexdec($char));
  194. }
  195. }
  196. }
  197. $value .= "\n" . $converted;
  198. }
  199. return $value;
  200. }
  201. /**
  202. * Eliminate JS regex modifiers
  203. *
  204. * @param string $value the value to convert
  205. *
  206. * @static
  207. * @return string
  208. */
  209. public static function convertJSRegexModifiers($value)
  210. {
  211. $value = preg_replace('/\/[gim]+/', '/', $value);
  212. return $value;
  213. }
  214. /**
  215. * Converts from hex/dec entities
  216. *
  217. * @param string $value the value to convert
  218. *
  219. * @static
  220. * @return string
  221. */
  222. public static function convertEntities($value)
  223. {
  224. $converted = null;
  225. //deal with double encoded payload
  226. $value = preg_replace('/&amp;/', '&', $value);
  227. if (preg_match('/&#x?[\w]+/ms', $value)) {
  228. $converted = preg_replace('/(&#x?[\w]{2}\d?);?/ms', '$1;', $value);
  229. $converted = html_entity_decode($converted, ENT_QUOTES, 'UTF-8');
  230. $value .= "\n" . str_replace(';;', ';', $converted);
  231. }
  232. // normalize obfuscated protocol handlers
  233. $value = preg_replace(
  234. '/(?:j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)|(d\s*a\s*t\s*a\s*)/ms',
  235. 'javascript', $value
  236. );
  237. return $value;
  238. }
  239. /**
  240. * Normalize quotes
  241. *
  242. * @param string $value the value to convert
  243. *
  244. * @static
  245. * @return string
  246. */
  247. public static function convertQuotes($value)
  248. {
  249. // normalize different quotes to "
  250. $pattern = array('\'', '`', '´', '’', '‘');
  251. $value = str_replace($pattern, '"', $value);
  252. //make sure harmless quoted strings don't generate false alerts
  253. $value = preg_replace('/^"([^"=\\!><~]+)"$/', '$1', $value);
  254. return $value;
  255. }
  256. /**
  257. * Converts SQLHEX to plain text
  258. *
  259. * @param string $value the value to convert
  260. *
  261. * @static
  262. * @return string
  263. */
  264. public static function convertFromSQLHex($value)
  265. {
  266. $matches = array();
  267. if(preg_match_all('/(?:(?:\A|[^\d])0x[a-f\d]{2,}[a-f\d]*)+/im', $value, $matches)) {
  268. foreach($matches[0] as $match) {
  269. $converted = '';
  270. foreach(str_split($match, 2) as $hex_index) {
  271. if(preg_match('/[a-f\d]{2,3}/i', $hex_index)) {
  272. $converted .= chr(hexdec($hex_index));
  273. }
  274. }
  275. $value = str_replace($match, $converted, $value);
  276. }
  277. }
  278. // take care of hex encoded ctrl chars
  279. $value = preg_replace('/0x\d+/m', 1, $value);
  280. return $value;
  281. }
  282. /**
  283. * Converts basic SQL keywords and obfuscations
  284. *
  285. * @param string $value the value to convert
  286. *
  287. * @static
  288. * @return string
  289. */
  290. public static function convertFromSQLKeywords($value)
  291. {
  292. $pattern = array('/(?:IS\s+null)|(LIKE\s+null)|' .
  293. '(?:(?:^|\W)IN[+\s]*\([\s\d"]+[^()]*\))/ims');
  294. $value = preg_replace($pattern, '"=0', $value);
  295. $value = preg_replace('/\W+\s*like\s*\W+/ims', '1" OR "1"', $value);
  296. $value = preg_replace('/null[,"\s]/ims', ',0', $value);
  297. $value = preg_replace('/\d+\./ims', ' 1', $value);
  298. $value = preg_replace('/,null/ims', ',0', $value);
  299. $value = preg_replace('/(?:between|mod)/ims', 'or', $value);
  300. $value = preg_replace('/(?:and\s+\d+\.?\d*)/ims', '', $value);
  301. $value = preg_replace('/(?:\s+and\s+)/ims', ' or ', $value);
  302. $pattern = array('/[^\w,(]NULL|\\\N|TRUE|FALSE|UTC_TIME|' .
  303. 'LOCALTIME(?:STAMP)?|CURRENT_\w+|BINARY|' .
  304. '(?:(?:ASCII|SOUNDEX|FIND_IN_SET|' .
  305. 'MD5|R?LIKE)[+\s]*\([^()]+\))|(?:-+\d)/ims');
  306. $value = preg_replace($pattern, 0, $value);
  307. $pattern = array('/(?:NOT\s+BETWEEN)|(?:IS\s+NOT)|(?:NOT\s+IN)|' .
  308. '(?:XOR|\WDIV\W|<>|RLIKE(?:\s+BINARY)?)|' .
  309. '(?:REGEXP\s+BINARY)|' .
  310. '(?:SOUNDS\s+LIKE)/ims');
  311. $value = preg_replace($pattern, '!', $value);
  312. $value = preg_replace('/"\s+\d/', '"', $value);
  313. $value = preg_replace('/\/(?:\d+|null)/', null, $value);
  314. return $value;
  315. }
  316. /**
  317. * Detects nullbytes and controls chars via ord()
  318. *
  319. * @param string $value the value to convert
  320. *
  321. * @static
  322. * @return string
  323. */
  324. public static function convertFromControlChars($value)
  325. {
  326. // critical ctrl values
  327. $search = array(
  328. chr(0), chr(1), chr(2), chr(3), chr(4), chr(5),
  329. chr(6), chr(7), chr(8), chr(11), chr(12), chr(14),
  330. chr(15), chr(16), chr(17), chr(18), chr(19), chr(24),
  331. chr(25), chr(192), chr(193), chr(238), chr(255)
  332. );
  333. $value = str_replace($search, '%00', $value);
  334. //take care for malicious unicode characters
  335. $value = urldecode(preg_replace('/(?:%E(?:2|3)%8(?:0|1)%(?:A|8|9)' .
  336. '\w|%EF%BB%BF|%EF%BF%BD)|(?:&#(?:65|8)\d{3};?)/i', null,
  337. urlencode($value)));
  338. $value = urldecode(
  339. preg_replace('/(?:%F0%80%BE)/i', '>', urlencode($value)));
  340. $value = urldecode(
  341. preg_replace('/(?:%F0%80%BC)/i', '<', urlencode($value)));
  342. $value = urldecode(
  343. preg_replace('/(?:%F0%80%A2)/i', '"', urlencode($value)));
  344. $value = urldecode(
  345. preg_replace('/(?:%F0%80%A7)/i', '\'', urlencode($value)));
  346. $value = preg_replace('/(?:%ff1c)/', '<', $value);
  347. $value = preg_replace(
  348. '/(?:&[#x]*(200|820|200|820|zwn?j|lrm|rlm)\w?;?)/i', null,$value
  349. );
  350. $value = preg_replace('/(?:&#(?:65|8)\d{3};?)|' .
  351. '(?:&#(?:56|7)3\d{2};?)|' .
  352. '(?:&#x(?:fe|20)\w{2};?)|' .
  353. '(?:&#x(?:d[c-f])\w{2};?)/i', null,
  354. $value);
  355. $value = str_replace(
  356. array('«', '〈', '<', '‹', '〈', '⟨'), '<', $value
  357. );
  358. $value = str_replace(
  359. array('»', '〉', '>', '›', '〉', '⟩'), '>', $value
  360. );
  361. return $value;
  362. }
  363. /**
  364. * This method matches and translates base64 strings and fragments
  365. * used in data URIs
  366. *
  367. * @param string $value the value to convert
  368. *
  369. * @static
  370. * @return string
  371. */
  372. public static function convertFromNestedBase64($value)
  373. {
  374. $matches = array();
  375. preg_match_all('/(?:^|[,&?])\s*([a-z0-9]{30,}=*)(?:\W|$)/im',
  376. $value,
  377. $matches);
  378. foreach ($matches[1] as $item) {
  379. if (isset($item) && !preg_match('/[a-f0-9]{32}/i', $item)) {
  380. $base64_item = base64_decode($item);
  381. $value = str_replace($item, $base64_item, $value);
  382. }
  383. }
  384. return $value;
  385. }
  386. /**
  387. * Detects nullbytes and controls chars via ord()
  388. *
  389. * @param string $value the value to convert
  390. *
  391. * @static
  392. * @return string
  393. */
  394. public static function convertFromOutOfRangeChars($value)
  395. {
  396. $values = str_split($value);
  397. foreach ($values as $item) {
  398. if (ord($item) >= 127) {
  399. $value = str_replace($item, ' ', $value);
  400. }
  401. }
  402. return $value;
  403. }
  404. /**
  405. * Strip XML patterns
  406. *
  407. * @param string $value the value to convert
  408. *
  409. * @static
  410. * @return string
  411. */
  412. public static function convertFromXML($value)
  413. {
  414. $converted = strip_tags($value);
  415. if ($converted && ($converted != $value)) {
  416. return $value . "\n" . $converted;
  417. }
  418. return $value;
  419. }
  420. /**
  421. * This method converts JS unicode code points to
  422. * regular characters
  423. *
  424. * @param string $value the value to convert
  425. *
  426. * @static
  427. * @return string
  428. */
  429. public static function convertFromJSUnicode($value)
  430. {
  431. $matches = array();
  432. preg_match_all('/\\\u[0-9a-f]{4}/ims', $value, $matches);
  433. if (!empty($matches[0])) {
  434. foreach ($matches[0] as $match) {
  435. $chr = chr(hexdec(substr($match, 2, 4)));
  436. $value = str_replace($match, $chr, $value);
  437. }
  438. $value .= "\n\u0001";
  439. }
  440. return $value;
  441. }
  442. /**
  443. * Converts relevant UTF-7 tags to UTF-8
  444. *
  445. * @param string $value the value to convert
  446. *
  447. * @static
  448. * @return string
  449. */
  450. public static function convertFromUTF7($value)
  451. {
  452. if(preg_match('/\+A\w+-/m', $value)) {
  453. if (function_exists('mb_convert_encoding')) {
  454. if(version_compare(PHP_VERSION, '5.2.8', '<')) {
  455. $tmp_chars = str_split($value);
  456. $value = '';
  457. foreach($tmp_chars as $char) {
  458. if(ord($char) <= 127) {
  459. $value .= $char;
  460. }
  461. }
  462. }
  463. $value .= "\n" . mb_convert_encoding($value, 'UTF-8', 'UTF-7');
  464. } else {
  465. //list of all critical UTF7 codepoints
  466. $schemes = array(
  467. '+ACI-' => '"',
  468. '+ADw-' => '<',
  469. '+AD4-' => '>',
  470. '+AFs-' => '[',
  471. '+AF0-' => ']',
  472. '+AHs-' => '{',
  473. '+AH0-' => '}',
  474. '+AFw-' => '\\',
  475. '+ADs-' => ';',
  476. '+ACM-' => '#',
  477. '+ACY-' => '&',
  478. '+ACU-' => '%',
  479. '+ACQ-' => '$',
  480. '+AD0-' => '=',
  481. '+AGA-' => '`',
  482. '+ALQ-' => '"',
  483. '+IBg-' => '"',
  484. '+IBk-' => '"',
  485. '+AHw-' => '|',
  486. '+ACo-' => '*',
  487. '+AF4-' => '^',
  488. '+ACIAPg-' => '">',
  489. '+ACIAPgA8-' => '">'
  490. );
  491. $value = str_ireplace(array_keys($schemes),
  492. array_values($schemes), $value);
  493. }
  494. }
  495. return $value;
  496. }
  497. /**
  498. * Converts basic concatenations
  499. *
  500. * @param string $value the value to convert
  501. *
  502. * @static
  503. * @return string
  504. */
  505. public static function convertFromConcatenated($value)
  506. {
  507. //normalize remaining backslashes
  508. if ($value != preg_replace('/(\w)\\\/', "$1", $value)) {
  509. $value .= preg_replace('/(\w)\\\/', "$1", $value);
  510. }
  511. $compare = stripslashes($value);
  512. $pattern = array('/(?:<\/\w+>\+<\w+>)/s',
  513. '/(?:":\d+[^"[]+")/s',
  514. '/(?:"?"\+\w+\+")/s',
  515. '/(?:"\s*;[^"]+")|(?:";[^"]+:\s*")/s',
  516. '/(?:"\s*(?:;|\+).{8,18}:\s*")/s',
  517. '/(?:";\w+=)|(?:!""&&")|(?:~)/s',
  518. '/(?:"?"\+""?\+?"?)|(?:;\w+=")|(?:"[|&]{2,})/s',
  519. '/(?:"\s*\W+")/s',
  520. '/(?:";\w\s*\+=\s*\w?\s*")/s',
  521. '/(?:"[|&;]+\s*[^|&\n]*[|&]+\s*"?)/s',
  522. '/(?:";\s*\w+\W+\w*\s*[|&]*")/s',
  523. '/(?:"\s*"\s*\.)/s',
  524. '/(?:\s*new\s+\w+\s*[+",])/',
  525. '/(?:(?:^|\s+)(?:do|else)\s+)/',
  526. '/(?:[{(]\s*new\s+\w+\s*[)}])/',
  527. '/(?:(this|self)\.)/',
  528. '/(?:undefined)/',
  529. '/(?:in\s+)/');
  530. // strip out concatenations
  531. $converted = preg_replace($pattern, null, $compare);
  532. //strip object traversal
  533. $converted = preg_replace('/\w(\.\w\()/', "$1", $converted);
  534. // normalize obfuscated method calls
  535. $converted = preg_replace('/\)\s*\+/', ")", $converted);
  536. //convert JS special numbers
  537. $converted = preg_replace('/(?:\(*[.\d]e[+-]*[^a-z\W]+\)*)' .
  538. '|(?:NaN|Infinity)\W/ims', 1, $converted);
  539. if ($converted && ($compare != $converted)) {
  540. $value .= "\n" . $converted;
  541. }
  542. return $value;
  543. }
  544. /**
  545. * This method collects and decodes proprietary encoding types
  546. *
  547. * @param string $value the value to convert
  548. *
  549. * @static
  550. * @return string
  551. */
  552. public static function convertFromProprietaryEncodings($value) {
  553. //Xajax error reportings
  554. $value = preg_replace('/<!\[CDATA\[(\W+)\]\]>/im', '$1', $value);
  555. //strip false alert triggering apostrophes
  556. $value = preg_replace('/(\w)\"(s)/m', '$1$2', $value);
  557. //strip quotes within typical search patterns
  558. $value = preg_replace('/^"([^"=\\!><~]+)"$/', '$1', $value);
  559. //OpenID login tokens
  560. $value = preg_replace('/{[\w-]{8,9}\}(?:\{[\w=]{8}\}){2}/', null, $value);
  561. //convert Content and \sdo\s to null
  562. $value = preg_replace('/Content|\Wdo\s/', null, $value);
  563. //strip emoticons
  564. $value = preg_replace(
  565. '/(?:\s[:;]-[)\/PD]+)|(?:\s;[)PD]+)|(?:\s:[)PD]+)|-\.-|\^\^/m',
  566. null,
  567. $value
  568. );
  569. //normalize separation char repetion
  570. $value = preg_replace('/([.+~=*_\-;])\1{2,}/m', '$1', $value);
  571. //normalize multiple single quotes
  572. $value = preg_replace('/"{2,}/m', '"', $value);
  573. //normalize quoted numerical values and asterisks
  574. $value = preg_replace('/"(\d+)"/m', '$1', $value);
  575. //normalize pipe separated request parameters
  576. $value = preg_replace('/\|(\w+=\w+)/m', '&$1', $value);
  577. //normalize ampersand listings
  578. $value = preg_replace('/(\w\s)&\s(\w)/', '$1$2', $value);
  579. //normalize escaped RegExp modifiers
  580. $value = preg_replace('/\/\\\(\w)/', '/$1', $value);
  581. return $value;
  582. }
  583. /**
  584. * This method is the centrifuge prototype
  585. *
  586. * @param string $value the value to convert
  587. * @param IDS_Monitor $monitor the monitor object
  588. *
  589. * @static
  590. * @return string
  591. */
  592. public static function runCentrifuge($value, IDS_Monitor $monitor = null)
  593. {
  594. $threshold = 3.49;
  595. if (strlen($value) > 25) {
  596. //strip padding
  597. $tmp_value = preg_replace('/\s{4}|==$/m', null, $value);
  598. $tmp_value = preg_replace(
  599. '/\s{4}|[\p{L}\d\+\-=,.%()]{8,}/m',
  600. 'aaa',
  601. $tmp_value
  602. );
  603. // Check for the attack char ratio
  604. $tmp_value = preg_replace('/([*.!?+-])\1{1,}/m', '$1', $tmp_value);
  605. $tmp_value = preg_replace('/"[\p{L}\d\s]+"/m', null, $tmp_value);
  606. $stripped_length = strlen(preg_replace('/[\d\s\p{L}\.:,%&\/><\-)!|]+/m',
  607. null, $tmp_value));
  608. $overall_length = strlen(
  609. preg_replace('/([\d\s\p{L}:,\.]{3,})+/m', 'aaa',
  610. preg_replace('/\s{2,}/m', null, $tmp_value))
  611. );
  612. if ($stripped_length != 0
  613. && $overall_length/$stripped_length <= $threshold) {
  614. $monitor->centrifuge['ratio'] =
  615. $overall_length/$stripped_length;
  616. $monitor->centrifuge['threshold'] =
  617. $threshold;
  618. $value .= "\n$[!!!]";
  619. }
  620. }
  621. if (strlen($value) > 40) {
  622. // Replace all non-special chars
  623. $converted = preg_replace('/[\w\s\p{L},.:!]/', null, $value);
  624. // Split string into an array, unify and sort
  625. $array = str_split($converted);
  626. $array = array_unique($array);
  627. asort($array);
  628. // Normalize certain tokens
  629. $schemes = array(
  630. '~' => '+',
  631. '^' => '+',
  632. '|' => '+',
  633. '*' => '+',
  634. '%' => '+',
  635. '&' => '+',
  636. '/' => '+'
  637. );
  638. $converted = implode($array);
  639. $_keys = array_keys($schemes);
  640. $_values = array_values($schemes);
  641. $converted = str_replace($_keys, $_values, $converted);
  642. $converted = preg_replace('/[+-]\s*\d+/', '+', $converted);
  643. $converted = preg_replace('/[()[\]{}]/', '(', $converted);
  644. $converted = preg_replace('/[!?:=]/', ':', $converted);
  645. $converted = preg_replace('/[^:(+]/', null, stripslashes($converted));
  646. // Sort again and implode
  647. $array = str_split($converted);
  648. asort($array);
  649. $converted = implode($array);
  650. if (preg_match('/(?:\({2,}\+{2,}:{2,})|(?:\({2,}\+{2,}:+)|' .
  651. '(?:\({3,}\++:{2,})/', $converted)) {
  652. $monitor->centrifuge['converted'] = $converted;
  653. return $value . "\n" . $converted;
  654. }
  655. }
  656. return $value;
  657. }
  658. }
  659. /**
  660. * Local variables:
  661. * tab-width: 4
  662. * c-basic-offset: 4
  663. * End:
  664. * vim600: sw=4 ts=4 expandtab
  665. */