PageRenderTime 23ms CodeModel.GetById 2ms app.highlight 16ms RepoModel.GetById 1ms app.codeStats 0ms

/libraries/php-ids/lib/IDS/Converter.php

https://bitbucket.org/syahzul/blog
PHP | 769 lines | 415 code | 101 blank | 253 comment | 38 complexity | dd4f7c0f188b9e1337a6545e3178c53a MD5 | raw file
  1<?php
  2
  3/**
  4 * PHPIDS
  5 *
  6 * Requirements: PHP5, SimpleXML
  7 *
  8 * Copyright (c) 2008 PHPIDS group (http://php-ids.org)
  9 *
 10 * PHPIDS is free software; you can redistribute it and/or modify
 11 * it under the terms of the GNU Lesser General Public License as published by
 12 * the Free Software Foundation, version 3 of the License, or 
 13 * (at your option) any later version.
 14 *
 15 * PHPIDS is distributed in the hope that it will be useful,
 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 18 * GNU Lesser General Public License for more details.
 19 * 
 20 * You should have received a copy of the GNU Lesser General Public License
 21 * along with PHPIDS. If not, see <http://www.gnu.org/licenses/>. 
 22 *
 23 * PHP version 5.1.6+
 24 *
 25 * @category Security
 26 * @package  PHPIDS
 27 * @author   Mario Heiderich <mario.heiderich@gmail.com>
 28 * @author   Christian Matthies <ch0012@gmail.com>
 29 * @author   Lars Strojny <lars@strojny.net>
 30 * @license  http://www.gnu.org/licenses/lgpl.html LGPL
 31 * @link     http://php-ids.org/
 32 */
 33
 34/**
 35 * PHPIDS specific utility class to convert charsets manually
 36 *
 37 * Note that if you make use of IDS_Converter::runAll(), existing class
 38 * methods will be executed in the same order as they are implemented in the
 39 * class tree!
 40 *
 41 * @category  Security
 42 * @package   PHPIDS
 43 * @author    Christian Matthies <ch0012@gmail.com>
 44 * @author    Mario Heiderich <mario.heiderich@gmail.com>
 45 * @author    Lars Strojny <lars@strojny.net>
 46 * @copyright 2007-2009 The PHPIDS Group
 47 * @license   http://www.gnu.org/licenses/lgpl.html LGPL
 48 * @version   Release: $Id:Converter.php 517 2007-09-15 15:04:13Z mario $
 49 * @link      http://php-ids.org/
 50 */
 51class IDS_Converter
 52{
 53    /**
 54     * Runs all converter functions
 55     *
 56     * Note that if you make use of IDS_Converter::runAll(), existing class
 57     * methods will be executed in the same order as they are implemented in the
 58     * class tree!
 59     *
 60     * @param string $value the value to convert
 61     *
 62     * @static
 63     * @return string
 64     */
 65    public static function runAll($value)
 66    {
 67        foreach (get_class_methods(__CLASS__) as $method) {
 68
 69            if (strpos($method, 'run') === 0) {
 70                continue;
 71            }
 72            $value = self::$method($value);
 73        }
 74
 75        return $value;
 76    }
 77
 78    /**
 79     * Make sure the value to normalize and monitor doesn't contain 
 80     * possibilities for a regex DoS.
 81     * 
 82     * @param string $value the value to pre-sanitize
 83     *
 84     * @static
 85     * @return string
 86     */
 87    public static function convertFromRepetition($value) 
 88    {
 89        // remove obvios repetition patterns
 90        $value = preg_replace(
 91            '/(?:(.{2,})\1{32,})|(?:[+=|\-@\s]{128,})/', 
 92            'x', 
 93            $value
 94        );
 95        return $value;
 96    }
 97
 98    /**
 99     * Check for comments and erases them if available
100     *
101     * @param string $value the value to convert
102     *
103     * @static
104     * @return string
105     */
106    public static function convertFromCommented($value)
107    {
108        // check for existing comments
109        if (preg_match('/(?:\<!-|-->|\/\*|\*\/|\/\/\W*\w+\s*$)|' .
110            '(?:--[^-]*-)/ms', $value)) {
111
112            $pattern = array(
113                '/(?:(?:<!)(?:(?:--(?:[^-]*(?:-[^-]+)*)--\s*)*)(?:>))/ms',
114                '/(?:(?:\/\*\/*[^\/\*]*)+\*\/)/ms',
115                '/(?:--[^-]*-)/ms'
116            );
117
118            $converted = preg_replace($pattern, ';', $value);
119            $value    .= "\n" . $converted;
120        }
121        
122        //make sure inline comments are detected and converted correctly
123        $value = preg_replace('/(<\w+)\/+(\w+=?)/m', '$1/$2', $value);
124        $value = preg_replace('/[^\\\:]\/\/(.*)$/m', '/**/$1', $value);
125
126        return $value;
127    }
128
129    /**
130     * Strip newlines
131     *
132     * @param string $value the value to convert
133     *
134     * @static
135     * @return string
136     */
137    public static function convertFromWhiteSpace($value)
138    {
139        //check for inline linebreaks
140        $search = array('\r', '\n', '\f', '\t', '\v');
141        $value  = str_replace($search, ';', $value);
142
143        // replace replacement characters regular spaces
144        $value = str_replace('�', ' ', $value);
145
146        //convert real linebreaks
147        return preg_replace('/(?:\n|\r|\v)/m', '  ', $value);
148    }
149
150    /**
151     * Checks for common charcode pattern and decodes them
152     *
153     * @param string $value the value to convert
154     *
155     * @static
156     * @return string
157     */
158    public static function convertFromJSCharcode($value)
159    {
160        $matches = array();
161
162        // check if value matches typical charCode pattern
163        if (preg_match_all('/(?:[\d+-=\/\* ]+(?:\s?,\s?[\d+-=\/\* ]+)){4,}/ms',
164            $value, $matches)) {
165
166            $converted = '';
167            $string    = implode(',', $matches[0]);
168            $string    = preg_replace('/\s/', '', $string);
169            $string    = preg_replace('/\w+=/', '', $string);
170            $charcode  = explode(',', $string);
171
172            foreach ($charcode as $char) {
173                $char = preg_replace('/\W0/s', '', $char);
174
175                if (preg_match_all('/\d*[+-\/\* ]\d+/', $char, $matches)) {
176                    $match = preg_split('/(\W?\d+)/',
177                                        (implode('', $matches[0])),
178                                        null,
179                                        PREG_SPLIT_DELIM_CAPTURE);
180
181                    if (array_sum($match) >= 20 && array_sum($match) <= 127) {
182                        $converted .= chr(array_sum($match));
183                    }
184
185                } elseif (!empty($char) && $char >= 20 && $char <= 127) {
186                    $converted .= chr($char);
187                }
188            }
189
190            $value .= "\n" . $converted;
191        }
192
193        // check for octal charcode pattern
194        if (preg_match_all('/(?:(?:[\\\]+\d+[ \t]*){8,})/ims', $value, $matches)) {
195
196            $converted = '';
197            $charcode  = explode('\\', preg_replace('/\s/', '', implode(',',
198                $matches[0])));
199
200            foreach ($charcode as $char) {
201                if (!empty($char)) {
202                    if (octdec($char) >= 20 && octdec($char) <= 127) {
203                        $converted .= chr(octdec($char));
204                    }
205                }
206            }
207            $value .= "\n" . $converted;
208        }
209
210        // check for hexadecimal charcode pattern
211        if (preg_match_all('/(?:(?:[\\\]+\w+\s*){8,})/ims', $value, $matches)) {
212
213            $converted = '';
214            $charcode  = explode('\\', preg_replace('/[ux]/', '', implode(',',
215                $matches[0])));
216
217            foreach ($charcode as $char) {
218                if (!empty($char)) {
219                    if (hexdec($char) >= 20 && hexdec($char) <= 127) {
220                        $converted .= chr(hexdec($char));
221                    }
222                }
223            }
224            $value .= "\n" . $converted;
225        }
226
227        return $value;
228    }
229
230    /**
231     * Eliminate JS regex modifiers
232     *
233     * @param string $value the value to convert
234     *
235     * @static
236     * @return string
237     */
238    public static function convertJSRegexModifiers($value)
239    {
240        $value = preg_replace('/\/[gim]+/', '/', $value);
241
242        return $value;
243    }
244
245    /**
246     * Converts from hex/dec entities
247     *
248     * @param string $value the value to convert
249     *
250     * @static
251     * @return string
252     */
253    public static function convertEntities($value)
254    {
255        $converted = null;
256        
257        //deal with double encoded payload 
258        $value = preg_replace('/&amp;/', '&', $value);     
259        
260        if (preg_match('/&#x?[\w]+/ms', $value)) {
261            $converted = preg_replace('/(&#x?[\w]{2}\d?);?/ms', '$1;', $value);
262            $converted = html_entity_decode($converted, ENT_QUOTES, 'UTF-8');
263            $value    .= "\n" . str_replace(';;', ';', $converted);
264        }
265        // normalize obfuscated protocol handlers
266        $value = preg_replace(
267            '/(?:j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)|(d\s*a\s*t\s*a\s*)/ms', 
268            'javascript', $value
269        );
270        
271        return $value;
272    }
273
274    /**
275     * Normalize quotes
276     *
277     * @param string $value the value to convert
278     *
279     * @static
280     * @return string
281     */
282    public static function convertQuotes($value)
283    {
284        // normalize different quotes to "
285        $pattern = array('\'', '`', '´', '’', '‘');
286        $value   = str_replace($pattern, '"', $value);
287
288        //make sure harmless quoted strings don't generate false alerts
289        $value = preg_replace('/^"([^"=\\!><~]+)"$/', '$1', $value);
290
291        return $value;
292    }
293
294    /**
295     * Converts SQLHEX to plain text
296     *
297     * @param string $value the value to convert
298     *
299     * @static
300     * @return string
301     */
302    public static function convertFromSQLHex($value)
303    {
304        $matches = array();
305        if(preg_match_all('/(?:(?:\A|[^\d])0x[a-f\d]{2,}[a-f\d]*)+/im', $value, $matches)) {
306        	foreach($matches[0] as $match) {
307                $converted = '';
308                foreach(str_split($match, 2) as $hex_index) {
309                    if(preg_match('/[a-f\d]{2,3}/i', $hex_index)) {
310                      $converted .= chr(hexdec($hex_index));
311                    }
312                }
313                $value = str_replace($match, $converted, $value);
314            }
315        }
316        // take care of hex encoded ctrl chars
317        $value = preg_replace('/0x\d+/m', 1, $value);
318        
319        return $value;
320    }
321
322    /**
323     * Converts basic SQL keywords and obfuscations
324     *
325     * @param string $value the value to convert
326     *
327     * @static
328     * @return string
329     */
330    public static function convertFromSQLKeywords($value)
331    {
332        $pattern = array('/(?:IS\s+null)|(LIKE\s+null)|' .
333            '(?:(?:^|\W)IN[+\s]*\([\s\d"]+[^()]*\))/ims');
334        $value   = preg_replace($pattern, '"=0', $value);
335        $value   = preg_replace('/\W+\s*like\s*\W+/ims', '1" OR "1"', $value);
336        $value   = preg_replace('/null[,"\s]/ims', ',0', $value);
337        $value   = preg_replace('/\d+\./ims', ' 1', $value);
338        $value   = preg_replace('/,null/ims', ',0', $value);
339        $value   = preg_replace('/(?:between|mod)/ims', 'or', $value);
340        $value   = preg_replace('/(?:and\s+\d+\.?\d*)/ims', '', $value);
341        $value   = preg_replace('/(?:\s+and\s+)/ims', ' or ', $value);
342        $pattern = array('/[^\w,(]NULL|\\\N|TRUE|FALSE|UTC_TIME|' .
343                         'LOCALTIME(?:STAMP)?|CURRENT_\w+|BINARY|' .
344                         '(?:(?:ASCII|SOUNDEX|FIND_IN_SET|' .
345                         'MD5|R?LIKE)[+\s]*\([^()]+\))|(?:-+\d)/ims');
346        $value   = preg_replace($pattern, 0, $value);
347        $pattern = array('/(?:NOT\s+BETWEEN)|(?:IS\s+NOT)|(?:NOT\s+IN)|' .
348                         '(?:XOR|\WDIV\W|<>|RLIKE(?:\s+BINARY)?)|' .
349                         '(?:REGEXP\s+BINARY)|' .
350                         '(?:SOUNDS\s+LIKE)/ims');
351        $value   = preg_replace($pattern, '!', $value);
352        $value   = preg_replace('/"\s+\d/', '"', $value);
353        $value   = preg_replace('/\/(?:\d+|null)/', null, $value);
354
355        return $value;
356    }
357
358    /**
359     * Detects nullbytes and controls chars via ord()
360     *
361     * @param string $value the value to convert
362     *
363     * @static
364     * @return string
365     */
366    public static function convertFromControlChars($value)
367    {
368        // critical ctrl values
369        $search = array(
370            chr(0), chr(1), chr(2), chr(3), chr(4), chr(5),
371            chr(6), chr(7), chr(8), chr(11), chr(12), chr(14),
372            chr(15), chr(16), chr(17), chr(18), chr(19), chr(24), 
373            chr(25), chr(192), chr(193), chr(238), chr(255)
374        );
375        
376        $value = str_replace($search, '%00', $value);
377
378        //take care for malicious unicode characters
379        $value = urldecode(preg_replace('/(?:%E(?:2|3)%8(?:0|1)%(?:A|8|9)' .
380            '\w|%EF%BB%BF|%EF%BF%BD)|(?:&#(?:65|8)\d{3};?)/i', null,
381                urlencode($value)));
382        $value = urldecode(
383            preg_replace('/(?:%F0%80%BE)/i', '>', urlencode($value)));
384        $value = urldecode(
385            preg_replace('/(?:%F0%80%BC)/i', '<', urlencode($value)));
386        $value = urldecode(
387            preg_replace('/(?:%F0%80%A2)/i', '"', urlencode($value)));
388        $value = urldecode(
389            preg_replace('/(?:%F0%80%A7)/i', '\'', urlencode($value)));		
390
391        $value = preg_replace('/(?:%ff1c)/', '<', $value);
392        $value = preg_replace(
393            '/(?:&[#x]*(200|820|200|820|zwn?j|lrm|rlm)\w?;?)/i', null,$value
394        );
395        $value = preg_replace('/(?:&#(?:65|8)\d{3};?)|' .
396                '(?:&#(?:56|7)3\d{2};?)|' .
397                '(?:&#x(?:fe|20)\w{2};?)|' .
398                '(?:&#x(?:d[c-f])\w{2};?)/i', null,
399                $value);
400                
401        $value = str_replace(
402            array('«', '〈', '<', '‹', '〈', '⟨'), '<', $value
403        );
404        $value = str_replace(
405            array('»', '〉', '>', '›', '〉', '⟩'), '>', $value
406        );
407
408        return $value;
409    }
410
411    /**
412     * This method matches and translates base64 strings and fragments
413     * used in data URIs
414     *
415     * @param string $value the value to convert
416     *
417     * @static
418     * @return string
419     */
420    public static function convertFromNestedBase64($value)
421    {
422        $matches = array();
423        preg_match_all('/(?:^|[,&?])\s*([a-z0-9]{30,}=*)(?:\W|$)/im',
424            $value,
425            $matches);
426
427        foreach ($matches[1] as $item) {
428            if (isset($item) && !preg_match('/[a-f0-9]{32}/i', $item)) {
429                $base64_item = base64_decode($item);
430                $value = str_replace($item, $base64_item, $value);
431            }
432        }
433
434        return $value;
435    }
436
437    /**
438     * Detects nullbytes and controls chars via ord()
439     *
440     * @param string $value the value to convert
441     *
442     * @static
443     * @return string
444     */
445    public static function convertFromOutOfRangeChars($value)
446    {
447        $values = str_split($value);
448        foreach ($values as $item) {
449            if (ord($item) >= 127) {
450                $value = str_replace($item, ' ', $value);
451            }
452        }
453
454        return $value;
455    }
456
457    /**
458     * Strip XML patterns
459     *
460     * @param string $value the value to convert
461     *
462     * @static
463     * @return string
464     */
465    public static function convertFromXML($value)
466    {
467        $converted = strip_tags($value);
468
469        if ($converted && ($converted != $value)) {
470            return $value . "\n" . $converted;
471        }
472        return $value;
473    }
474
475    /**
476     * This method converts JS unicode code points to
477     * regular characters
478     *
479     * @param string $value the value to convert
480     *
481     * @static
482     * @return string
483     */
484    public static function convertFromJSUnicode($value)
485    {
486        $matches = array();
487
488        preg_match_all('/\\\u[0-9a-f]{4}/ims', $value, $matches);
489
490        if (!empty($matches[0])) {
491            foreach ($matches[0] as $match) {
492                $chr = chr(hexdec(substr($match, 2, 4))); 
493                $value = str_replace($match, $chr, $value);
494            }
495            $value .= "\n\u0001";
496        }
497
498        return $value;
499    }
500
501    /**
502     * Converts relevant UTF-7 tags to UTF-8
503     *
504     * @param string $value the value to convert
505     *
506     * @static
507     * @return string
508     */
509    public static function convertFromUTF7($value)
510    {
511        if(preg_match('/\+A\w+-/m', $value)) {
512            if (function_exists('mb_convert_encoding')) {
513                if(version_compare(PHP_VERSION, '5.2.8', '<')) {
514                    $tmp_chars = str_split($value);
515                    $value = '';
516                    foreach($tmp_chars as $char) {
517                        if(ord($char) <= 127) {
518                            $value .= $char;	
519                        }
520                    }     
521                }
522                $value .= "\n" . mb_convert_encoding($value, 'UTF-8', 'UTF-7');
523            } else {
524                //list of all critical UTF7 codepoints
525                $schemes = array(
526                    '+ACI-'      => '"',
527                    '+ADw-'      => '<',
528                    '+AD4-'      => '>',
529                    '+AFs-'      => '[',
530                    '+AF0-'      => ']',
531                    '+AHs-'      => '{',
532                    '+AH0-'      => '}',
533                    '+AFw-'      => '\\',
534                    '+ADs-'      => ';',
535                    '+ACM-'      => '#',
536                    '+ACY-'      => '&',
537                    '+ACU-'      => '%',
538                    '+ACQ-'      => '$',
539                    '+AD0-'      => '=',
540                    '+AGA-'      => '`',
541                    '+ALQ-'      => '"',
542                    '+IBg-'      => '"',
543                    '+IBk-'      => '"',
544                    '+AHw-'      => '|',
545                    '+ACo-'      => '*',
546                    '+AF4-'      => '^',
547                    '+ACIAPg-'   => '">',
548                    '+ACIAPgA8-' => '">'
549                );
550    
551                $value = str_ireplace(array_keys($schemes),
552                    array_values($schemes), $value);
553            }
554        }
555        return $value;
556    }
557
558    /**
559     * Converts basic concatenations
560     *
561     * @param string $value the value to convert
562     *
563     * @static
564     * @return string
565     */
566    public static function convertFromConcatenated($value)
567    {
568        //normalize remaining backslashes
569        if ($value != preg_replace('/(\w)\\\/', "$1", $value)) {
570            $value .= preg_replace('/(\w)\\\/', "$1", $value);
571        }
572
573        $compare = stripslashes($value);
574        
575        $pattern = array('/(?:<\/\w+>\+<\w+>)/s',
576            '/(?:":\d+[^"[]+")/s',
577            '/(?:"?"\+\w+\+")/s',
578            '/(?:"\s*;[^"]+")|(?:";[^"]+:\s*")/s',
579            '/(?:"\s*(?:;|\+).{8,18}:\s*")/s',
580            '/(?:";\w+=)|(?:!""&&")|(?:~)/s',
581            '/(?:"?"\+""?\+?"?)|(?:;\w+=")|(?:"[|&]{2,})/s',
582            '/(?:"\s*\W+")/s',
583            '/(?:";\w\s*\+=\s*\w?\s*")/s',
584            '/(?:"[|&;]+\s*[^|&\n]*[|&]+\s*"?)/s',
585            '/(?:";\s*\w+\W+\w*\s*[|&]*")/s',
586            '/(?:"\s*"\s*\.)/s',
587            '/(?:\s*new\s+\w+\s*[+",])/',
588            '/(?:(?:^|\s+)(?:do|else)\s+)/',
589            '/(?:[{(]\s*new\s+\w+\s*[)}])/',
590            '/(?:(this|self)\.)/',
591            '/(?:undefined)/',
592            '/(?:in\s+)/');
593
594        // strip out concatenations
595        $converted = preg_replace($pattern, null, $compare);
596
597        //strip object traversal
598        $converted = preg_replace('/\w(\.\w\()/', "$1", $converted);
599
600        // normalize obfuscated method calls
601        $converted = preg_replace('/\)\s*\+/', ")", $converted);
602
603        //convert JS special numbers
604        $converted = preg_replace('/(?:\(*[.\d]e[+-]*[^a-z\W]+\)*)' .
605            '|(?:NaN|Infinity)\W/ims', 1, $converted);
606
607        if ($converted && ($compare != $converted)) {
608            $value .= "\n" . $converted;
609        }
610
611        return $value;
612    }
613
614    /**
615     * This method collects and decodes proprietary encoding types
616     *
617     * @param string $value the value to convert
618     *
619     * @static
620     * @return string
621     */
622    public static function convertFromProprietaryEncodings($value) {
623
624        //Xajax error reportings
625        $value = preg_replace('/<!\[CDATA\[(\W+)\]\]>/im', '$1', $value);
626
627        //strip false alert triggering apostrophes
628        $value = preg_replace('/(\w)\"(s)/m', '$1$2', $value);
629
630        //strip quotes within typical search patterns
631        $value = preg_replace('/^"([^"=\\!><~]+)"$/', '$1', $value);
632
633        //OpenID login tokens
634        $value = preg_replace('/{[\w-]{8,9}\}(?:\{[\w=]{8}\}){2}/', null, $value);
635
636        //convert Content and \sdo\s to null
637        $value = preg_replace('/Content|\Wdo\s/', null, $value);
638
639        //strip emoticons
640        $value = preg_replace(
641            '/(?:\s[:;]-[)\/PD]+)|(?:\s;[)PD]+)|(?:\s:[)PD]+)|-\.-|\^\^/m',
642            null,
643            $value
644        );
645        
646        //normalize separation char repetion
647        $value = preg_replace('/([.+~=*_\-;])\1{2,}/m', '$1', $value);
648
649        //normalize multiple single quotes
650        $value = preg_replace('/"{2,}/m', '"', $value);
651        
652        //normalize quoted numerical values and asterisks
653        $value = preg_replace('/"(\d+)"/m', '$1', $value);
654
655        //normalize pipe separated request parameters
656        $value = preg_replace('/\|(\w+=\w+)/m', '&$1', $value);
657
658        //normalize ampersand listings
659        $value = preg_replace('/(\w\s)&\s(\w)/', '$1$2', $value);
660        
661        //normalize escaped RegExp modifiers
662        $value = preg_replace('/\/\\\(\w)/', '/$1', $value);        
663        
664        return $value;
665    }
666
667    /**
668     * This method is the centrifuge prototype
669     *
670     * @param string      $value   the value to convert
671     * @param IDS_Monitor $monitor the monitor object
672     *
673     * @static
674     * @return string
675     */
676    public static function runCentrifuge($value, IDS_Monitor $monitor = null)
677    {
678        $threshold = 3.49;
679        if (strlen($value) > 25) {
680            
681            //strip padding
682            $tmp_value = preg_replace('/\s{4}|==$/m', null, $value);
683            $tmp_value = preg_replace(
684                '/\s{4}|[\p{L}\d\+\-=,.%()]{8,}/m', 
685                'aaa', 
686                $tmp_value
687            );
688            
689            // Check for the attack char ratio
690            $tmp_value = preg_replace('/([*.!?+-])\1{1,}/m', '$1', $tmp_value);
691            $tmp_value = preg_replace('/"[\p{L}\d\s]+"/m', null, $tmp_value);
692
693            $stripped_length = strlen(preg_replace('/[\d\s\p{L}\.:,%&\/><\-)!|]+/m',
694                null, $tmp_value));
695            $overall_length  = strlen(
696                preg_replace('/([\d\s\p{L}:,\.]{3,})+/m', 'aaa',
697                preg_replace('/\s{2,}/m', null, $tmp_value))
698            );
699
700            if ($stripped_length != 0
701                && $overall_length/$stripped_length <= $threshold) {
702
703                $monitor->centrifuge['ratio']     =
704                    $overall_length/$stripped_length;
705                $monitor->centrifuge['threshold'] =
706                    $threshold;
707
708                $value .= "\n$[!!!]";
709            }
710        }
711
712        if (strlen($value) > 40) {
713            // Replace all non-special chars
714            $converted =  preg_replace('/[\w\s\p{L},.:!]/', null, $value);
715
716            // Split string into an array, unify and sort
717            $array = str_split($converted);
718            $array = array_unique($array);
719            asort($array);
720
721            // Normalize certain tokens
722            $schemes = array(
723                '~' => '+',
724                '^' => '+',
725                '|' => '+',
726                '*' => '+',
727                '%' => '+',
728                '&' => '+',
729                '/' => '+'
730            );
731
732            $converted = implode($array);
733            
734            $_keys = array_keys($schemes);
735            $_values = array_values($schemes);
736            
737            $converted = str_replace($_keys, $_values, $converted);
738            
739            $converted = preg_replace('/[+-]\s*\d+/', '+', $converted);
740            $converted = preg_replace('/[()[\]{}]/', '(', $converted);
741            $converted = preg_replace('/[!?:=]/', ':', $converted);
742            $converted = preg_replace('/[^:(+]/', null, stripslashes($converted));
743
744            // Sort again and implode
745            $array = str_split($converted);
746            asort($array);
747
748            $converted = implode($array);
749
750            if (preg_match('/(?:\({2,}\+{2,}:{2,})|(?:\({2,}\+{2,}:+)|' .
751                '(?:\({3,}\++:{2,})/', $converted)) {
752
753                $monitor->centrifuge['converted'] = $converted;
754
755                return $value . "\n" . $converted;
756            }
757        }
758
759        return $value;
760    }
761}
762
763/**
764 * Local variables:
765 * tab-width: 4
766 * c-basic-offset: 4
767 * End:
768 * vim600: sw=4 ts=4 expandtab
769 */