PageRenderTime 63ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/CGI/IDS.pm

https://github.com/gitpan/CGI-IDS
Perl | 2103 lines | 1230 code | 340 blank | 533 comment | 84 complexity | ae45dd9777e564704c89d171f4ddd806 MD5 | raw file
Possible License(s): LGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. package CGI::IDS;
  2. #------------------------- Notes -----------------------------------------------
  3. # This source code is documented in both POD and ROBODoc format.
  4. # Please find additional POD documentation at the end of this file
  5. # (search for "__END__").
  6. #-------------------------------------------------------------------------------
  7. #****c* IDS
  8. # NAME
  9. # PerlIDS (CGI::IDS)
  10. # DESCRIPTION
  11. # Website Intrusion Detection System based on PHPIDS https://phpids.org rev. 1409
  12. # AUTHOR
  13. # Hinnerk Altenburg <hinnerk@cpan.org>
  14. # CREATION DATE
  15. # 2008-06-03
  16. # COPYRIGHT
  17. # Copyright (C) 2008-2014 Hinnerk Altenburg
  18. #
  19. # This file is part of PerlIDS.
  20. #
  21. # PerlIDS is free software: you can redistribute it and/or modify
  22. # it under the terms of the GNU Lesser General Public License as published by
  23. # the Free Software Foundation, either version 3 of the License, or
  24. # (at your option) any later version.
  25. #
  26. # PerlIDS is distributed in the hope that it will be useful,
  27. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  29. # GNU Lesser General Public License for more details.
  30. #
  31. # You should have received a copy of the GNU Lesser General Public License
  32. # along with PerlIDS. If not, see <http://www.gnu.org/licenses/>.
  33. #****
  34. =head1 NAME
  35. CGI::IDS - PerlIDS - Perl Website Intrusion Detection System (XSS, CSRF, SQLI, LFI etc.)
  36. =head1 VERSION
  37. Version 1.0217 - based on and tested against the filter tests of PHPIDS https://phpids.org rev. 1409
  38. =cut
  39. our $VERSION = '1.0217';
  40. =head1 DESCRIPTION
  41. PerlIDS (CGI::IDS) is a website intrusion detection system based on PHPIDS L<https://phpids.org/> to detect possible attacks in website requests, e.g. Cross-Site Scripting (XSS), Cross-Site Request Forgery (CSRF), SQL Injections (SQLI) etc.
  42. It parses any hashref for possible attacks, so it does not depend on CGI.pm.
  43. The intrusion detection is based on a set of converters that convert the request according to common techniques that are used to hide attacks. These converted strings are checked for attacks by running a filter set of currently 68 regular expressions and a generic attack detector to find obfuscated attacks. For easily keeping the filter set up-to-date, PerlIDS is compatible to the original XML filter set of PHPIDS, which is frequently updated.
  44. Each matching regular expression has it's own impact value that increases the tested string's total attack impact. Using these total impacts, a threshold can be defined by the calling application to log the suspicious requests to database and send out warnings via e-mail or even SMS on high impacts that indicate critical attack activity. These impacts can be summed per IP address, session or user to identify attackers who are testing the website with small impact attacks over a time.
  45. You can improve the speed and the accurancy (reduce false positives) of the IDS by specifying an L<XML whitelist file|CGI::IDS/Whitelist>. This whitelist check can also be processed separately by using L<CGI::IDS::Whitelist|CGI::IDS::Whitelist> if you want to pre-check the parameters on your application servers before you send only the suspicious requests over to worker servers that do the complete CGI::IDS check.
  46. Download and install via CPAN: L<http://search.cpan.org/dist/CGI-IDS/lib/CGI/IDS.pm>
  47. Report issues and contribute to PerlIDS on GitHub: L<https://github.com/hinnerk-a/perlids>
  48. =head1 SYNOPSIS
  49. use CGI;
  50. use CGI::IDS;
  51. $cgi = new CGI;
  52. # instantiate the IDS object;
  53. # do not scan keys, values only; don't scan PHP code injection filters (IDs 58,59,60);
  54. # whitelist the parameters as per given XML whitelist file;
  55. # All arguments are optional, 'my $ids = new CGI::IDS();' is also working correctly,
  56. # loading the entire shipped filter set and not scanning the keys.
  57. # See new() for all possible arguments.
  58. my $ids = new CGI::IDS(
  59. whitelist_file => '/home/hinnerk/ids/param_whitelist.xml',
  60. disable_filters => [58,59,60],
  61. );
  62. # start detection
  63. my %params = $cgi->Vars;
  64. my $impact = $ids->detect_attacks( request => \%params );
  65. if ($impact > 0) {
  66. my_log( $ids->get_attacks() );
  67. }
  68. if ($impact > 30) {
  69. my_warn_user();
  70. my_email( $ids->get_attacks() );
  71. }
  72. if ($impact > 50) {
  73. my_deactivate_user();
  74. my_sms( $ids->get_attacks() );
  75. }
  76. # now with scanning the hash keys
  77. $ids->set_scan_keys(scan_keys => 1);
  78. $impact = $ids->detect_attacks( request => \%params );
  79. See F<examples/demo.pl> in CGI::IDS module package for a running demo.
  80. You might want to build your own 'session impact counter' that increases during multiple suspicious requests by one single user, session or IP address.
  81. =head1 METHODS
  82. =cut
  83. #------------------------- Pragmas ---------------------------------------------
  84. use strict;
  85. use warnings;
  86. #------------------------- Libs ------------------------------------------------
  87. use XML::Simple qw(:strict);
  88. use HTML::Entities;
  89. use MIME::Base64;
  90. use Encode qw(decode);
  91. use Carp;
  92. use Time::HiRes;
  93. use FindBin qw($Bin);
  94. use CGI::IDS::Whitelist;
  95. #------------------------- Settings --------------------------------------------
  96. $XML::Simple::PREFERRED_PARSER = "XML::Parser";
  97. #------------------------- Debugging -------------------------------------------
  98. # debug modes (binary):
  99. use constant DEBUG_KEY_VALUES => (1 << 0); # print each key value pair
  100. use constant DEBUG_IMPACTS => (1 << 1); # print impact per key value pair
  101. use constant DEBUG_ARRAY_INFO => (1 << 2); # print attack info arrays
  102. use constant DEBUG_CONVERTERS => (1 << 3); # print output of each converter
  103. use constant DEBUG_SORT_KEYS_NUM => (1 << 4); # sort request by keys numerically
  104. use constant DEBUG_SORT_KEYS_ALPHA => (1 << 5); # sort request by keys alphabetically
  105. use constant DEBUG_WHITELIST => (1 << 6); # dumps loaded whitelist hash
  106. use constant DEBUG_MATCHED_FILTERS => (1 << 7); # print IDs of matched filters
  107. #use constant DEBUG_MODE => DEBUG_KEY_VALUES |
  108. # DEBUG_IMPACTS |
  109. # DEBUG_WHITELIST |
  110. # DEBUG_ARRAY_INFO |
  111. # DEBUG_CONVERTERS |
  112. # DEBUG_MATCHED_FILTERS |
  113. # DEBUG_SORT_KEYS_NUM;
  114. # simply comment this line out to switch debugging mode on (also uncomment above declaration)
  115. use constant DEBUG_MODE => 0;
  116. #------------------------- Constants -------------------------------------------
  117. # converter functions, processed in this order
  118. my @CONVERTERS = qw/
  119. stripslashes
  120. _convert_from_repetition
  121. _convert_from_commented
  122. _convert_from_whitespace
  123. _convert_from_js_charcode
  124. _convert_js_regex_modifiers
  125. _convert_entities
  126. _convert_quotes
  127. _convert_from_sql_hex
  128. _convert_from_sql_keywords
  129. _convert_from_control_chars
  130. _convert_from_nested_base64
  131. _convert_from_out_of_range_chars
  132. _convert_from_xml
  133. _convert_from_js_unicode
  134. _convert_from_utf7
  135. _convert_from_concatenated
  136. _convert_from_proprietary_encodings
  137. _run_centrifuge
  138. /;
  139. #------------------------- Subs ------------------------------------------------
  140. #****m* IDS/new
  141. # NAME
  142. # Constructor
  143. # DESCRIPTION
  144. # Creates an IDS object.
  145. # The filter set and whitelist will stay loaded during the lifetime of the object.
  146. # You may call detect_attacks() multiple times, the attack array ( get_attacks() )
  147. # will be emptied at the start of each run of detect_attacks().
  148. # INPUT
  149. # HASH
  150. # filters_file STRING The path to the filters XML file (defaults to shipped IDS.xml)
  151. # whitelist_file STRING The path to the whitelist XML file
  152. # scan_keys INT 1 to scan also the keys, 0 if not (default: 0)
  153. # disable_filters ARRAYREF[INT,INT,...] if given, these filter ids will be disabled
  154. # OUTPUT
  155. # IDS object, dies (croaks) if no filter rule could be loaded
  156. # EXAMPLE
  157. # # instantiate object; do not scan keys, values only
  158. # my $ids = new CGI::IDS(
  159. # filters_file => '/home/hinnerk/sandbox/ids/cgi-bin/default_filter.xml',
  160. # whitelist_file => '/home/hinnerk/sandbox/ids/cgi-bin/param_whitelist.xml',
  161. # scan_keys => 0,
  162. # disable_filters => [58,59,60],
  163. # );
  164. #****
  165. =head2 new()
  166. Constructor. Can optionally take a hash of settings. If I<filters_file> is not given,
  167. the shipped filter set will be loaded, I<scan_keys> defaults to 0.
  168. The filter set and whitelist will stay loaded during the lifetime of the object.
  169. You may call C<detect_attacks()> multiple times, the attack array (C<get_attacks()>)
  170. will be emptied at the start of each run of C<detect_attacks()>.
  171. For example, the following is a valid constructor:
  172. my $ids = new CGI::IDS(
  173. filters_file => '/home/hinnerk/ids/default_filter.xml',
  174. whitelist_file => '/home/hinnerk/ids/param_whitelist.xml',
  175. scan_keys => 0,
  176. disable_filters => [58,59,60],
  177. );
  178. The Constructor dies (croaks) if no filter rule could be loaded.
  179. =cut
  180. sub new {
  181. my ($package, %args) = @_;
  182. # defaults
  183. $args{scan_keys} = $args{scan_keys} ? 1 : 0;
  184. my $filters_file_default = __FILE__;
  185. $filters_file_default =~ s/IDS.pm/IDS.xml/;
  186. # self member variables
  187. my $self = {
  188. filters_file => $args{filters_file} || $filters_file_default,
  189. whitelist => CGI::IDS::Whitelist->new(whitelist_file => $args{whitelist_file}),
  190. scan_keys => $args{scan_keys},
  191. impact => 0,
  192. attacks => undef, # []
  193. filters => [],
  194. filter_disabled => { map { $_ => 1} @{$args{disable_filters} || []} },
  195. };
  196. if (DEBUG_MODE & DEBUG_WHITELIST) {
  197. use Data::Dumper; print Dumper($self->{whitelist}->{whitelist});
  198. }
  199. # create object
  200. bless $self, $package;
  201. # read & parse filter XML
  202. if (!$self->_load_filters_from_xml($self->{filters_file})) {
  203. croak "No IDS filter rules loaded!";
  204. }
  205. return $self;
  206. }
  207. #****m* IDS/detect_attacks
  208. # NAME
  209. # detect_attacks
  210. # DESCRIPTION
  211. # Parses a hashref (e.g. $query->Vars) for detection of possible attacks.
  212. # The attack array is emptied at the start of each run.
  213. # INPUT
  214. # +request hashref to be parsed
  215. # OUTPUT
  216. # Impact if filter matched, 0 otherwise
  217. # SYNOPSIS
  218. # $ids->detect_attacks(request => $query->Vars);
  219. #****
  220. =head2 detect_attacks()
  221. DESCRIPTION
  222. Parses a hashref (e.g. $query->Vars) for detection of possible attacks.
  223. The attack array is emptied at the start of each run.
  224. INPUT
  225. +request hashref to be parsed
  226. OUTPUT
  227. Impact if filter matched, 0 otherwise
  228. SYNOPSIS
  229. $ids->detect_attacks(request => $query->Vars);
  230. =cut
  231. sub detect_attacks {
  232. my ($self, %args) = @_;
  233. return 0 unless ($args{request});
  234. my $request = $args{request};
  235. # reset last detection data
  236. $self->{impact} = 0;
  237. $self->{attacks} = [];
  238. $self->{filtered_keys} = [];
  239. $self->{non_filtered_keys} = [];
  240. my @request_keys = keys %$request;
  241. # sorting for filter debugging only
  242. if (DEBUG_MODE & DEBUG_SORT_KEYS_ALPHA) {
  243. @request_keys = sort {$a cmp $b} @request_keys;
  244. }
  245. elsif (DEBUG_MODE & DEBUG_SORT_KEYS_NUM) {
  246. @request_keys = sort {$a <=> $b} @request_keys;
  247. }
  248. foreach my $key (@request_keys) {
  249. my $filter_impact = 0;
  250. my $key_converted = '';
  251. my $value_converted = '';
  252. my $time_ms = 0;
  253. my @matched_filters = ();
  254. my @matched_tags = ();
  255. my $request_value = defined $request->{$key} ? $request->{$key} : '';
  256. if (DEBUG_MODE & DEBUG_KEY_VALUES) {
  257. print "\n\n\n******************************************\n".
  258. "Key : $key\nValue : $request_value\n";
  259. }
  260. if ($self->{whitelist}->is_suspicious(key => $key, request => $request)) {
  261. $request_value = $self->{whitelist}->convert_if_marked_encoded(key => $key, value => $request_value);
  262. my $attacks = $self->_apply_filters($request_value);
  263. if ($attacks->{impact}) {
  264. $filter_impact += $attacks->{impact};
  265. $time_ms += $attacks->{time_ms};
  266. $value_converted = $attacks->{string_converted};
  267. push (@matched_filters, @{$attacks->{filters}});
  268. push (@matched_tags, @{$attacks->{tags}});
  269. }
  270. }
  271. # scan key only if desired
  272. if ($self->{scan_keys}) {
  273. # scan only if value is not harmless
  274. if ( !$self->{whitelist}->is_harmless_string($key) ) {
  275. # apply filters to key
  276. my $attacks = $self->_apply_filters($key);
  277. $filter_impact += $attacks->{impact};
  278. $time_ms += $attacks->{time_ms};
  279. $key_converted = $attacks->{string_converted};
  280. push (@matched_filters, @{$attacks->{filters}});
  281. push (@matched_tags, @{$attacks->{tags}});
  282. }
  283. else {
  284. # skipped, alphanumeric key only
  285. }
  286. }
  287. # add attack to log
  288. my %attack = ();
  289. if ($filter_impact) {
  290. # make arrays unique and sorted
  291. my %seen = ();
  292. @matched_filters = sort grep { ! $seen{$_} ++ } @matched_filters;
  293. %seen = ();
  294. @matched_tags = sort grep { ! $seen{$_} ++ } @matched_tags;
  295. %attack = (
  296. key => $key,
  297. key_converted => $key_converted,
  298. value => $request_value,
  299. value_converted => $value_converted,
  300. time_ms => $time_ms,
  301. impact => $filter_impact,
  302. matched_filters => \@matched_filters,
  303. matched_tags => \@matched_tags,
  304. );
  305. push (@{$self->{attacks}}, \%attack);
  306. }
  307. $self->{impact} += $filter_impact;
  308. if (DEBUG_MODE & DEBUG_ARRAY_INFO && %attack) {
  309. use Data::Dumper;
  310. print "------------------------------------------\n".
  311. Dumper(\%attack) .
  312. "\n\n";
  313. }
  314. if (DEBUG_MODE & DEBUG_MATCHED_FILTERS && @matched_filters) {
  315. my $filters_concat = join ", ", @matched_filters;
  316. print "Filters: $filters_concat\n";
  317. }
  318. if (DEBUG_MODE & DEBUG_IMPACTS) {
  319. print "Impact : $filter_impact\n";
  320. }
  321. } # end of foreach key
  322. push (@{$self->{filtered_keys}}, @{$self->{whitelist}->suspicious_keys()});
  323. push (@{$self->{non_filtered_keys}}, @{$self->{whitelist}->non_suspicious_keys()});
  324. # reset filtered_keys and non_filtered_keys
  325. $self->{whitelist}->reset();
  326. if ($self->{impact} > 0) {
  327. return $self->{impact};
  328. }
  329. else {
  330. return 0;
  331. }
  332. }
  333. #****m* IDS/set_scan_keys
  334. # NAME
  335. # set_scan_keys
  336. # DESCRIPTION
  337. # Sets key scanning option
  338. # INPUT
  339. # +scan_keys 1 to scan keys, 0 to switch off scanning keys, defaults to 0
  340. # OUTPUT
  341. # none
  342. # SYNOPSIS
  343. # $ids->set_scan_keys(scan_keys => 1);
  344. #****
  345. =head2 set_scan_keys()
  346. DESCRIPTION
  347. Sets key scanning option
  348. INPUT
  349. +scan_keys 1 to scan keys, 0 to switch off scanning keys, defaults to 0
  350. OUTPUT
  351. none
  352. SYNOPSIS
  353. $ids->set_scan_keys(scan_keys => 1);
  354. =cut
  355. sub set_scan_keys {
  356. my ($self, %args) = @_;
  357. $self->{scan_keys} = $args{scan_keys} ? 1 : 0;
  358. }
  359. #****m* IDS/get_attacks
  360. # NAME
  361. # get_attacks
  362. # DESCRIPTION
  363. # Get an key/value/impact array of all detected attacks.
  364. # The array is emptied at the start of each run of detect_attacks().
  365. # INPUT
  366. # none
  367. # OUTPUT
  368. # HASHREF (
  369. # key => '',
  370. # value => '',
  371. # impact => n,
  372. # filters => (n, n, n, n, ...),
  373. # tags => ('', '', '', '', ...),
  374. # )
  375. # SYNOPSIS
  376. # $ids->get_attacks();
  377. #****
  378. =head2 get_attacks()
  379. DESCRIPTION
  380. Get an key/value/impact array of all detected attacks.
  381. The array is emptied at the start of each run of C<detect_attacks()>.
  382. INPUT
  383. none
  384. OUTPUT
  385. ARRAY (
  386. key => '',
  387. value => '',
  388. impact => n,
  389. filters => (n, n, n, n, ...),
  390. tags => ('', '', '', '', ...),
  391. )
  392. SYNOPSIS
  393. $ids->get_attacks();
  394. =cut
  395. sub get_attacks {
  396. my ($self) = @_;
  397. return $self->{attacks};
  398. }
  399. #****m* IDS/get_rule_description
  400. # NAME
  401. # get_rule_description
  402. # DESCRIPTION
  403. # This sub returns the rule description for a given rule id. This can be used for logging purposes.
  404. # INPUT
  405. # HASH
  406. # + rule_id id of rule
  407. # OUTPUT
  408. # SCALAR description
  409. # EXAMPLE
  410. # $ids->get_rule_description( rule_id => $rule_id );
  411. #****
  412. =head2 get_rule_description()
  413. DESCRIPTION
  414. Returns the rule description for a given rule id. This can be used for logging purposes.
  415. INPUT
  416. HASH
  417. + rule_id id of rule
  418. OUTPUT
  419. SCALAR description
  420. EXAMPLE
  421. $ids->get_rule_description( rule_id => $rule_id );
  422. =cut
  423. sub get_rule_description {
  424. my ($self, %args) = @_;
  425. return $self->{rule_descriptions}{$args{rule_id}};
  426. }
  427. #****im* IDS/_apply_filters
  428. # NAME
  429. # _apply_filters
  430. # DESCRIPTION
  431. # Applies filter rules to a string to detect attacks
  432. # INPUT
  433. # + $string string to be checked for possible attacks
  434. # OUTPUT
  435. # attack hashref:
  436. # (
  437. # impact => n,
  438. # filters => (n, n, n, ...),
  439. # tags => ('', '', '', ...),
  440. # string_converted => string
  441. # )
  442. # SYNOPSIS
  443. # IDS::_apply_filters($string);
  444. #****
  445. sub _apply_filters {
  446. my ($self, $string) = @_;
  447. my %attack = (
  448. filters => [],
  449. tags => [],
  450. impact => 0,
  451. string_converted => '',
  452. );
  453. # benchmark
  454. my $start_time = Time::HiRes::time();
  455. # make UTF-8 and sanitize from malformated UTF-8, if necessary
  456. $string = $self->{whitelist}->make_utf_8($string);
  457. # run all string converters
  458. $attack{string_converted} = _run_all_converters($string);
  459. # apply filters
  460. foreach my $filter (@{$self->{filters}}) {
  461. # skip disabled filters
  462. next if ($self->{filter_disabled}{$filter->{id}});
  463. my $string_converted_lc = lc($attack{string_converted});
  464. if ($string_converted_lc =~ $filter->{rule}) {
  465. $attack{impact} += $filter->{impact};
  466. push (@{$attack{filters}}, $filter->{id});
  467. push (@{$attack{tags}}, @{$filter->{tags}});
  468. }
  469. }
  470. # benchmark
  471. my $end_time = Time::HiRes::time();
  472. $attack{time_ms} = int(($end_time-$start_time)*1000);
  473. return \%attack;
  474. }
  475. #****im* IDS/_load_filters_from_xml
  476. # NAME
  477. # _load_filters_from_xml
  478. # DESCRIPTION
  479. # loads the filters from PHPIDS filter XML file
  480. # INPUT
  481. # filterfile path + name of the XML filter file
  482. # OUTPUT
  483. # filtercount number of loaded filters
  484. # SYNOPSIS
  485. # IDS::_load_filters_from_xml('/home/xyz/default_filter.xml');
  486. #****
  487. sub _load_filters_from_xml {
  488. my ($self, $filterfile) = @_;
  489. my $filtercnt = 0;
  490. if ($filterfile) {
  491. # read & parse filter XML
  492. my $filterxml;
  493. eval {
  494. $filterxml = XML::Simple::XMLin($filterfile,
  495. forcearray => [ qw(rule description tags tag impact filter filters)],
  496. keyattr => [],
  497. );
  498. };
  499. if ($@) {
  500. croak "Error in _load_filters_from_xml while parsing $filterfile: $@";
  501. }
  502. # convert XML structure into handy data structure
  503. foreach my $filterobj (@{$filterxml->{filter}}) {
  504. my @taglist = ();
  505. foreach my $tag (@{$filterobj->{tags}[0]->{tag}}) {
  506. push(@taglist, $tag);
  507. }
  508. my $rule = '';
  509. eval {
  510. $rule = qr/$filterobj->{rule}[0]/ms;
  511. };
  512. if ($@) {
  513. croak 'Error in filter rule #' . $filterobj->{id} . ': ' . $filterobj->{rule}[0] . ' Message: ' . $@;
  514. }
  515. my %filterhash = (
  516. rule => $rule,
  517. impact => $filterobj->{impact}[0],
  518. id => $filterobj->{id},
  519. tags => \@taglist,
  520. );
  521. push (@{$self->{filters}}, \%filterhash);
  522. $self->{rule_descriptions}{$filterobj->{id}} = $filterobj->{description}[0];
  523. $filtercnt++
  524. }
  525. }
  526. return $filtercnt;
  527. }
  528. #****if* IDS/_run_all_converters
  529. # NAME
  530. # _run_all_converters
  531. # DESCRIPTION
  532. # Runs all converter functions
  533. # INPUT
  534. # value the string to convert
  535. # OUTPUT
  536. # value converted string
  537. # SYNOPSIS
  538. # IDS::_run_all_converters($value);
  539. #****
  540. sub _run_all_converters {
  541. my ($value) = @_;
  542. if (DEBUG_MODE & DEBUG_CONVERTERS) {
  543. print "------------------------------------------\n\n";
  544. }
  545. foreach my $converter (@CONVERTERS) {
  546. no strict 'refs';
  547. $value = $converter->($value);
  548. if (DEBUG_MODE & DEBUG_CONVERTERS) {
  549. print "$converter output:\n$value\n\n";
  550. }
  551. }
  552. return $value;
  553. }
  554. #****if* IDS/_convert_from_repetition
  555. # NAME
  556. # _convert_from_repetition
  557. # DESCRIPTION
  558. # Make sure the value to normalize and monitor doesn't contain
  559. # possibilities for a regex DoS.
  560. # INPUT
  561. # value the value to pre-sanitize
  562. # OUTPUT
  563. # value converted string
  564. # SYNOPSIS
  565. # IDS::_convert_from_repetition($value);
  566. #****
  567. sub _convert_from_repetition {
  568. my ($value) = @_;
  569. # remove obvios repetition patterns
  570. $value = preg_replace(
  571. qr/(?:(.{2,})\1{32,})|(?:[+=|\-@\s]{128,})/,
  572. 'x',
  573. $value
  574. );
  575. return $value;
  576. }
  577. #****if* IDS/_convert_from_commented
  578. # NAME
  579. # _convert_from_commented
  580. # DESCRIPTION
  581. # Check for comments and erases them if available
  582. # INPUT
  583. # value the string to convert
  584. # OUTPUT
  585. # value converted string
  586. # SYNOPSIS
  587. # IDS::_convert_from_commented($value);
  588. #****
  589. sub _convert_from_commented {
  590. my ($value) = @_;
  591. # check for existing comments
  592. if (preg_match(qr/(?:\<!-|-->|\/\*|\*\/|\/\/\W*\w+\s*$)|(?:--[^-]*-)/ms, $value)) { #/
  593. my @pattern = (
  594. qr/(?:(?:<!)(?:(?:--(?:[^-]*(?:-[^-]+)*)--\s*)*)(?:>))/ms,
  595. qr/(?:(?:\/\*\/*[^\/\*]*)+\*\/)/ms,
  596. qr/(?:--[^-]*-)/ms,
  597. );
  598. my $converted = preg_replace(\@pattern, ';', $value);
  599. $value .= "\n" . $converted;
  600. }
  601. # make sure inline comments are detected and converted correctly
  602. $value = preg_replace(qr/(<\w+)\/+(\w+=?)/m, '$1/$2', $value);
  603. $value = preg_replace(qr/[^\\:]\/\/(.*)$/m, '/**/$1', $value);
  604. return $value;
  605. }
  606. #****if* IDS/_convert_from_whitespace
  607. # NAME
  608. # _convert_from_whitespace
  609. # DESCRIPTION
  610. # Strip newlines
  611. # INPUT
  612. # value the string to convert
  613. # OUTPUT
  614. # value converted string
  615. # SYNOPSIS
  616. # IDS::_convert_from_whitespace($value);
  617. #****
  618. sub _convert_from_whitespace {
  619. my ($value) = @_;
  620. # check for inline linebreaks
  621. my @search = ('\r', '\n', '\f', '\t', '\v');
  622. $value = str_replace(\@search, ';', $value);
  623. # replace replacement characters regular spaces
  624. $value = str_replace('�', ' ', $value);
  625. # convert real linebreaks (\013 in Perl instead of \v in PHP et al.)
  626. return preg_replace(qr/(?:\n|\r|\013)/m, ' ', $value);
  627. }
  628. #****if* IDS/_convert_from_js_charcode
  629. # NAME
  630. # _convert_from_js_charcode
  631. # DESCRIPTION
  632. # Checks for common charcode pattern and decodes them
  633. # INPUT
  634. # value the string to convert
  635. # OUTPUT
  636. # value converted string
  637. # SYNOPSIS
  638. # IDS::_convert_from_js_charcode($value);
  639. #****
  640. sub _convert_from_js_charcode {
  641. my ($value) = @_;
  642. my @matches = ();
  643. # check if value matches typical charCode pattern
  644. # PHP to Perl note: additional parenthesis around RegEx for getting PHP's $matches[0]
  645. if (preg_match_all(qr/(?:[\d+-=\/\* ]+(?:\s?,\s?[\d+-=\/\* ]+)){4,}/ms,
  646. $value, \@matches)) {
  647. my $converted = '';
  648. my $string = implode(',', $matches[0]);
  649. $string = preg_replace(qr/\s/, '', $string);
  650. $string = preg_replace(qr/\w+=/, '', $string);
  651. my @charcode = explode(',', $string);
  652. foreach my $char (@charcode) {
  653. $char = preg_replace(qr/\W0/s, '', $char);
  654. my @matches = ();
  655. # PHP to Perl note: additional parenthesis around RegEx for getting PHP's $matches[0]
  656. if (preg_match_all(qr/(\d*[+-\/\* ]\d+)/, $char, \@matches)) {
  657. my @match = split(qr/(\W?\d+)/,
  658. (implode('', $matches[0])),
  659. # null,
  660. # PREG_SPLIT_DELIM_CAPTURE
  661. );
  662. # 3rd argument null, 4th argument PREG_SPLIT_DELIM_CAPTURE is default in Perl and not there
  663. my $test = implode('', $matches[0]);
  664. if (array_sum(@match) >= 20 && array_sum(@match) <= 127) {
  665. $converted .= chr(array_sum(@match));
  666. }
  667. }
  668. elsif ($char && $char >= 20 && $char <= 127) {
  669. $converted .= chr($char);
  670. }
  671. }
  672. $value .= "\n" . $converted;
  673. }
  674. # check for octal charcode pattern
  675. # PHP to Perl note: \\ in Perl instead of \\\ in PHP
  676. # PHP to Perl note: additional parenthesis around RegEx for getting PHP's $matches[0]
  677. if (preg_match_all(qr/((?:(?:[\\]+\d+\s*){8,}))/ms, $value, \@matches)) {
  678. my $converted = '';
  679. my @charcode = explode('\\', preg_replace(qr/\s/, '', implode(',',
  680. $matches[0])));
  681. foreach my $char (@charcode) {
  682. if ($char) {
  683. if (oct($char) >= 20 && oct($char) <= 127) {
  684. $converted .= chr(oct($char));
  685. }
  686. }
  687. }
  688. $value .= "\n" . $converted;
  689. }
  690. # check for hexadecimal charcode pattern
  691. # PHP to Perl note: \\ in Perl instead of \\\ in PHP
  692. # PHP to Perl note: additional parenthesis around RegEx for getting PHP's $matches[0]
  693. if (preg_match_all(qr/((?:(?:[\\]+\w+[ \t]*){8,}))/ms, $value, \@matches)) {
  694. my $converted = '';
  695. my @charcode = explode('\\', preg_replace(qr/[ux]/, '', implode(',',
  696. $matches[0])));
  697. foreach my $char (@charcode) {
  698. if ($char) {
  699. if (hex($char) >= 20 && hex($char) <= 127) {
  700. $converted .= chr(hex($char));
  701. }
  702. }
  703. }
  704. $value .= "\n" . $converted;
  705. }
  706. return $value;
  707. }
  708. #****if* IDS/_convert_js_regex_modifiers
  709. # NAME
  710. # _convert_js_regex_modifiers
  711. # DESCRIPTION
  712. # Eliminate JS regex modifiers
  713. # INPUT
  714. # value the string to convert
  715. # OUTPUT
  716. # value converted string
  717. # SYNOPSIS
  718. # IDS::_convert_js_regex_modifiers($value);
  719. #****
  720. sub _convert_js_regex_modifiers {
  721. my ($value) = @_;
  722. $value = preg_replace(qr/\/[gim]+/, '/', $value);
  723. return $value;
  724. }
  725. #****if* IDS/_convert_quotes
  726. # NAME
  727. # _convert_quotes
  728. # DESCRIPTION
  729. # Normalize quotes
  730. # INPUT
  731. # value the string to convert
  732. # OUTPUT
  733. # value converted string
  734. # SYNOPSIS
  735. # IDS::_convert_quotes($value);
  736. #****
  737. sub _convert_quotes {
  738. my ($value) = @_;
  739. # normalize different quotes to "
  740. my @pattern = ('\'', '`', '´', '’', '‘');
  741. $value = str_replace(\@pattern, '"', $value);
  742. # make sure harmless quoted strings don't generate false alerts
  743. $value = preg_replace(qr/^"([^"=\\!><~]+)"$/, '$1', $value);
  744. return $value;
  745. }
  746. #****if* IDS/_convert_from_sql_hex
  747. # NAME
  748. # _convert_from_sql_hex
  749. # DESCRIPTION
  750. # Converts SQLHEX to plain text
  751. # INPUT
  752. # value the string to convert
  753. # OUTPUT
  754. # value converted string
  755. # SYNOPSIS
  756. # IDS::_convert_from_sql_hex($value);
  757. #****
  758. sub _convert_from_sql_hex {
  759. my ($value) = @_;
  760. my @matches = ();
  761. # PHP to Perl note: additional parenthesis around RegEx for getting PHP's $matches[0]
  762. if(preg_match_all(qr/((?:0x[a-f\d]{2,}[a-f\d]*)+)/im, $value, \@matches)) {
  763. foreach my $match ($matches[0]) {
  764. my $converted = '';
  765. foreach my $hex_index (str_split($match, 2)) {
  766. if(preg_match(qr/[a-f\d]{2,3}/i, $hex_index)) {
  767. $converted .= chr(hex($hex_index));
  768. }
  769. }
  770. $value = str_replace($match, $converted, $value);
  771. }
  772. }
  773. # take care of hex encoded ctrl chars
  774. $value = preg_replace('/0x\d+/m', 1, $value);
  775. return $value;
  776. }
  777. #****if* IDS/_convert_from_sql_keywords
  778. # NAME
  779. # _convert_from_sql_keywords
  780. # DESCRIPTION
  781. # Converts basic SQL keywords and obfuscations
  782. # INPUT
  783. # value the string to convert
  784. # OUTPUT
  785. # value converted string
  786. # SYNOPSIS
  787. # IDS::_convert_from_sql_keywords($value);
  788. #****
  789. sub _convert_from_sql_keywords {
  790. my ($value) = @_;
  791. my $pattern = qr/(?:IS\s+null)|(LIKE\s+null)|(?:(?:^|\W)IN[+\s]*\([\s\d"]+[^()]*\))/ims;
  792. $value = preg_replace($pattern, '"=0', $value);
  793. $value = preg_replace(qr/\W+\s*like\s*\W+/ims, '1" OR "1"', $value);
  794. $value = preg_replace(qr/null[,"\s]/ims, ',0', $value);
  795. $value = preg_replace(qr/\d+\./ims, ' 1', $value);
  796. $value = preg_replace(qr/,null/ims, ',0', $value);
  797. $value = preg_replace(qr/(?:between|mod)/ims, 'or', $value);
  798. $value = preg_replace(qr/(?:and\s+\d+\.?\d*)/ims, '', $value);
  799. $value = preg_replace(qr/(?:\s+and\s+)/ims, ' or ', $value);
  800. # \\N instead of PHP's \\\N
  801. $pattern = qr/[^\w,\(]NULL|\\N|TRUE|FALSE|UTC_TIME|LOCALTIME(?:STAMP)?|CURRENT_\w+|BINARY|(?:(?:ASCII|SOUNDEX|FIND_IN_SET|MD5|R?LIKE)[+\s]*\([^()]+\))|(?:-+\d)/ims;
  802. $value = preg_replace($pattern, 0, $value);
  803. $pattern = qr/(?:NOT\s+BETWEEN)|(?:IS\s+NOT)|(?:NOT\s+IN)|(?:XOR|\WDIV\W|\WNOT\W|<>|RLIKE(?:\s+BINARY)?)|(?:REGEXP\s+BINARY)|(?:SOUNDS\s+LIKE)/ims;
  804. $value = preg_replace($pattern, '!', $value);
  805. $value = preg_replace(qr/"\s+\d/, '"', $value);
  806. $value = preg_replace(qr/\/(?:\d+|null)/, '', $value);
  807. return $value;
  808. }
  809. #****if* IDS/_convert_entities
  810. # NAME
  811. # _convert_entities
  812. # DESCRIPTION
  813. # Converts from hex/dec entities (use HTML::Entities;)
  814. # INPUT
  815. # value the string to convert
  816. # OUTPUT
  817. # value converted string
  818. # SYNOPSIS
  819. # IDS::_convert_entities($value);
  820. #****
  821. sub _convert_entities {
  822. my ($value) = @_;
  823. my $converted = '';
  824. # deal with double encoded payload
  825. $value = preg_replace(qr/&amp;/, '&', $value);
  826. if (preg_match(qr/&#x?[\w]+/ms, $value)) {
  827. $converted = preg_replace(qr/(&#x?[\w]{2}\d?);?/ms, '$1;', $value);
  828. $converted = HTML::Entities::decode_entities($converted);
  829. $value .= "\n" . str_replace(';;', ';', $converted);
  830. }
  831. # normalize obfuscated protocol handlers
  832. $value = preg_replace(
  833. '/(?:j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)|(d\s*a\s*t\s*a\s*)/ms',
  834. 'javascript', $value
  835. );
  836. return $value;
  837. }
  838. #****if* IDS/_convert_from_control_chars
  839. # NAME
  840. # _convert_from_control_chars
  841. # DESCRIPTION
  842. # Detects nullbytes and controls chars via ord()
  843. # INPUT
  844. # value the string to convert
  845. # OUTPUT
  846. # value converted string
  847. # SYNOPSIS
  848. # IDS::_convert_from_control_chars($value);
  849. #****
  850. sub _convert_from_control_chars {
  851. my ($value) = @_;
  852. # critical ctrl values
  853. my @search = (
  854. chr(0), chr(1), chr(2), chr(3), chr(4), chr(5),
  855. chr(6), chr(7), chr(8), chr(11), chr(12), chr(14),
  856. chr(15), chr(16), chr(17), chr(18), chr(19), chr(24),
  857. chr(25), chr(192), chr(193), chr(238), chr(255)
  858. );
  859. $value = str_replace(\@search, '%00', $value);
  860. # take care for malicious unicode characters
  861. $value = urldecode(preg_replace(qr/(?:%E(?:2|3)%8(?:0|1)%(?:A|8|9)\w|%EF%BB%BF|%EF%BF%BD)|(?:&#(?:65|8)\d{3};?)/i, '',
  862. urlencode($value)));
  863. $value = urldecode(
  864. preg_replace(qr/(?:%F0%80%BE)/i, '>', urlencode($value)));
  865. $value = urldecode(
  866. preg_replace(qr/(?:%F0%80%BC)/i, '<', urlencode($value)));
  867. $value = urldecode(
  868. preg_replace(qr/(?:%F0%80%A2)/i, '"', urlencode($value)));
  869. $value = urldecode(
  870. preg_replace(qr/(?:%F0%80%A7)/i, '\'', urlencode($value)));
  871. $value = preg_replace(qr/(?:%ff1c)/, '<', $value);
  872. $value = preg_replace(
  873. qr/(?:&[#x]*(200|820|200|820|zwn?j|lrm|rlm)\w?;?)/i, '', $value
  874. );
  875. $value = preg_replace(qr/(?:&#(?:65|8)\d{3};?)|(?:&#(?:56|7)3\d{2};?)|(?:&#x(?:fe|20)\w{2};?)|(?:&#x(?:d[c-f])\w{2};?)/i, '',
  876. $value);
  877. $value = str_replace(
  878. ["\x{ab}", "\x{3008}", "\x{ff1c}", "\x{2039}", "\x{2329}", "\x{27e8}"], '<', $value
  879. );
  880. $value = str_replace(
  881. ["\x{bb}", "\x{3009}", "\x{ff1e}", "\x{203a}", "\x{232a}", "\x{27e9}"], '>', $value
  882. );
  883. return $value;
  884. }
  885. #****if* IDS/_convert_from_nested_base64
  886. # NAME
  887. # _convert_from_nested_base64
  888. # DESCRIPTION
  889. # Matches and translates base64 strings and fragments used in data URIs (use MIME::Base64;)
  890. # INPUT
  891. # value the string to convert
  892. # OUTPUT
  893. # value converted string
  894. # SYNOPSIS
  895. # IDS::_convert_from_nested_base64($value);
  896. #****
  897. sub _convert_from_nested_base64 {
  898. my ($value) = @_;
  899. my @matches = ();
  900. preg_match_all(qr/(?:^|[,&?])\s*([a-z0-9]{30,}=*)(?:\W|$)/im, #)/
  901. $value,
  902. \@matches,
  903. );
  904. # PHP to Perl note: PHP's $matches[1] is Perl's default ($matches[0] is the entire RegEx match)
  905. foreach my $item (@matches) {
  906. if ($item && !preg_match(qr/[a-f0-9]{32}/i, $item)) {
  907. # fill up the string with zero bytes if too short for base64 blocks
  908. my $item_original = $item;
  909. if (my $missing_bytes = length($item) % 4) {
  910. for (1..$missing_bytes) {
  911. $item .= "=";
  912. }
  913. }
  914. my $base64_item = MIME::Base64::decode_base64($item);
  915. $value = str_replace($item_original, $base64_item, $value);
  916. }
  917. }
  918. return $value;
  919. }
  920. #****if* IDS/_convert_from_out_of_range_chars
  921. # NAME
  922. # _convert_from_out_of_range_chars
  923. # DESCRIPTION
  924. # Detects nullbytes and controls chars via ord()
  925. # INPUT
  926. # value the string to convert
  927. # OUTPUT
  928. # value converted string
  929. # SYNOPSIS
  930. # IDS::_convert_from_out_of_range_chars($value);
  931. #****
  932. sub _convert_from_out_of_range_chars {
  933. my ($value) = @_;
  934. my @values = str_split($value);
  935. foreach my $item (@values) {
  936. if (ord($item) >= 127) {
  937. $value = str_replace($item, ' ', $value);
  938. }
  939. }
  940. return $value;
  941. }
  942. #****if* IDS/_convert_from_xml
  943. # NAME
  944. # _convert_from_xml
  945. # DESCRIPTION
  946. # Strip XML patterns
  947. # INPUT
  948. # value the string to convert
  949. # OUTPUT
  950. # value converted string
  951. # SYNOPSIS
  952. # IDS::_convert_from_xml($value);
  953. #****
  954. sub _convert_from_xml {
  955. my ($value) = @_;
  956. my $converted = strip_tags($value);
  957. if ($converted && ($converted ne $value)) {
  958. return $value . "\n" . $converted;
  959. }
  960. return $value;
  961. }
  962. #****if* IDS/_convert_from_js_unicode
  963. # NAME
  964. # _convert_from_js_unicode
  965. # DESCRIPTION
  966. # Converts JS unicode code points to regular characters
  967. # INPUT
  968. # value the string to convert
  969. # OUTPUT
  970. # value converted string
  971. # SYNOPSIS
  972. # IDS::_convert_from_js_unicode($value);
  973. #****
  974. sub _convert_from_js_unicode {
  975. my ($value) = @_;
  976. my @matches = ();
  977. # \\u instead of PHP's \\\u
  978. # PHP to Perl note: additional parenthesis around RegEx for getting PHP's $matches[0]
  979. preg_match_all(qr/(\\u[0-9a-f]{4})/ims, $value, \@matches);
  980. if ($matches[0]) {
  981. foreach my $match ($matches[0]) {
  982. my $chr = chr(hex(substr($match, 2, 4)));
  983. $value = str_replace($match, $chr, $value);
  984. }
  985. $value .= "\n".'\u0001';
  986. }
  987. return $value;
  988. }
  989. #****if* IDS/_convert_from_utf7
  990. # NAME
  991. # _convert_from_utf7
  992. # DESCRIPTION
  993. # Converts relevant UTF-7 tags to UTF-8 (use Encode qw/decode/;)
  994. # INPUT
  995. # value the string to convert
  996. # OUTPUT
  997. # value converted string
  998. # SYNOPSIS
  999. # IDS::_convert_from_utf7($value);
  1000. #****
  1001. sub _convert_from_utf7 {
  1002. my ($value) = @_;
  1003. if (preg_match(qr/\+A\w+-/m, $value)) {
  1004. $value .= "\n" . decode("UTF-7", $value);
  1005. }
  1006. return $value;
  1007. }
  1008. #****if* IDS/_convert_from_concatenated
  1009. # NAME
  1010. # _convert_from_concatenated
  1011. # DESCRIPTION
  1012. # Converts basic concatenations
  1013. # INPUT
  1014. # value the string to convert
  1015. # OUTPUT
  1016. # value converted string
  1017. # SYNOPSIS
  1018. # IDS::_convert_from_concatenated($value);
  1019. #****
  1020. sub _convert_from_concatenated {
  1021. my ($value) = @_;
  1022. # normalize remaining backslashes
  1023. # Perl's \\ should be equivalent to PHP's \\\
  1024. if ($value ne preg_replace(qr/(?:(\w)\\)/, '$1', $value)) {
  1025. $value .= preg_replace(qr/(?:(\w)\\)/, '$1', $value);
  1026. }
  1027. my $compare = stripslashes($value);
  1028. my @pattern = (
  1029. qr/(?:<\/\w+>\+<\w+>)/s,
  1030. qr/(?:":\d+[^"[]+")/s,
  1031. qr/(?:"?"\+\w+\+")/s,
  1032. qr/(?:"\s*;[^"]+")|(?:";[^"]+:\s*")/s,
  1033. qr/(?:"\s*(?:;|\+).{8,18}:\s*")/s,
  1034. qr/(?:";\w+=)|(?:!""&&")|(?:~)/s,
  1035. qr/(?:"?"\+""?\+?"?)|(?:;\w+=")|(?:"[|&]{2,})/s,
  1036. qr/(?:"\s*\W+")/s,
  1037. qr/(?:";\w\s*\+=\s*\w?\s*")/s,
  1038. qr/(?:"[|&;]+\s*[^|&\n]*[|&]+\s*"?)/s,
  1039. qr/(?:";\s*\w+\W+\w*\s*[|&]*")/s,
  1040. qr/(?:"\s*"\s*\.)/s,
  1041. qr/(?:\s*new\s+\w+\s*[+",])/,
  1042. qr/(?:(?:^|\s+)(?:do|else)\s+)/,
  1043. qr/(?:[{(]\s*new\s+\w+\s*[)}])/,
  1044. qr/(?:(this|self)\.)/,
  1045. qr/(?:undefined)/,
  1046. qr/(?:in\s+)/,
  1047. );
  1048. # strip out concatenations
  1049. my $converted = preg_replace(\@pattern, '', $compare);
  1050. # strip object traversal
  1051. $converted = preg_replace(qr/\w(\.\w\()/, '$1', $converted);
  1052. # normalize obfuscated method calls
  1053. $converted = preg_replace(qr/\)\s*\+/, ')', $converted);
  1054. # convert JS special numbers
  1055. $converted = preg_replace(qr/(?:\(*[.\d]e[+-]*[^a-z\W]+\)*)|(?:NaN|Infinity)\W/ims, 1, $converted);
  1056. if ($converted && ($compare ne $converted)) {
  1057. $value .= "\n" . $converted;
  1058. }
  1059. return $value;
  1060. }
  1061. #****if* IDS/_convert_from_proprietary_encodings
  1062. # NAME
  1063. # _convert_from_proprietary_encodings
  1064. # DESCRIPTION
  1065. # Collects and decodes proprietary encoding types
  1066. # INPUT
  1067. # value the string to convert
  1068. # OUTPUT
  1069. # value converted string
  1070. # SYNOPSIS
  1071. # IDS::_convert_from_proprietary_encodings($value);
  1072. #****
  1073. sub _convert_from_proprietary_encodings {
  1074. my ($value) = @_;
  1075. # Xajax error reportings
  1076. $value = preg_replace(qr/<!\[CDATA\[(\W+)\]\]>/im, '$1', $value);
  1077. # strip false alert triggering apostrophes
  1078. $value = preg_replace(qr/(\w)\"(s)/m, '$1$2', $value);
  1079. # strip quotes within typical search patterns
  1080. $value = preg_replace(qr/^"([^"=\\!><~]+)"$/, '$1', $value);
  1081. # OpenID login tokens
  1082. $value = preg_replace(qr/{[\w-]{8,9}\}(?:\{[\w=]{8}\}){2}/, '', $value);
  1083. # convert Content to null to avoid false alerts
  1084. $value = preg_replace(qr/Content|\Wdo\s/, '', $value);
  1085. # strip emoticons
  1086. $value = preg_replace(qr/(?:\s[:;]-[)\/PD]+)|(?:\s;[)PD]+)|(?:\s:[)PD]+)|-\.-|\^\^/m, '', $value);
  1087. # normalize separation char repetition
  1088. $value = preg_replace(qr/([.+~=*_\-;])\1{2,}/m, '$1', $value);
  1089. # normalize multiple single quotes
  1090. $value = preg_replace(qr/"{2,}/m, '"', $value);
  1091. # normalize quoted numerical values and asterisks
  1092. $value = preg_replace(qr/"(\d+)"/m, '$1', $value);
  1093. # normalize pipe separated request parameters
  1094. $value = preg_replace(qr/\|(\w+=\w+)/m, '&$1', $value);
  1095. # normalize ampersand listings
  1096. $value = preg_replace(qr/(\w\s)&\s(\w)/, '$1$2', $value);
  1097. return $value;
  1098. }
  1099. #****if* IDS/_run_centrifuge
  1100. # NAME
  1101. # _run_centrifuge
  1102. # DESCRIPTION
  1103. # The centrifuge prototype
  1104. # INPUT
  1105. # value the string to convert
  1106. # OUTPUT
  1107. # value converted string
  1108. # SYNOPSIS
  1109. # IDS::_run_centrifuge($value);
  1110. #****
  1111. sub _run_centrifuge {
  1112. my ($value) = @_;
  1113. my $threshold = 3.49;
  1114. if (strlen($value) > 25) {
  1115. # strip padding
  1116. my $tmp_value = preg_replace(qr/\s{4}|==$/m, '', $value);
  1117. $tmp_value = preg_replace(
  1118. qr/\s{4}|[\p{L}\d\+\-=,.%()]{8,}/m,
  1119. 'aaa',
  1120. $tmp_value
  1121. );
  1122. # Check for the attack char ratio
  1123. $tmp_value = preg_replace(qr/([*.!?+-])\1{1,}/m, '$1', $tmp_value);
  1124. $tmp_value = preg_replace(qr/"[\p{L}\d\s]+"/m, '', $tmp_value);
  1125. my $stripped_length = strlen(
  1126. preg_replace(qr/[\d\s\p{L}\.:,%&\/><\-)!]+/m,
  1127. '',
  1128. $tmp_value)
  1129. );
  1130. my $overall_length = strlen(
  1131. preg_replace(
  1132. qr/([\d\s\p{L}:,\.]{3,})+/m,
  1133. 'aaa',
  1134. preg_replace(
  1135. qr/\s{2,}/ms,
  1136. '',
  1137. $tmp_value
  1138. )
  1139. )
  1140. );
  1141. if ($stripped_length != 0 &&
  1142. $overall_length/$stripped_length <= $threshold
  1143. ) {
  1144. $value .= "\n".'$[!!!]';
  1145. }
  1146. }
  1147. if (strlen($value) > 40) {
  1148. # Replace all non-special chars
  1149. my $converted = preg_replace(qr/[\w\s\p{L},.:!]/, '', $value);
  1150. # Split string into an array, unify and sort
  1151. my @array = str_split($converted);
  1152. my %seen = ();
  1153. my @unique = grep { ! $seen{$_} ++ } @array;
  1154. @unique = sort @unique;
  1155. # Normalize certain tokens
  1156. my %schemes = (
  1157. '~' => '+',
  1158. '^' => '+',
  1159. '|' => '+',
  1160. '*' => '+',
  1161. '%' => '+',
  1162. '&' => '+',
  1163. '/' => '+',
  1164. );
  1165. $converted = implode('', @unique);
  1166. $converted = str_replace([keys %schemes], [values %schemes], $converted);
  1167. $converted = preg_replace(qr/[+-]\s*\d+/, '+', $converted);
  1168. $converted = preg_replace(qr/[()[\]{}]/, '(', $converted);
  1169. $converted = preg_replace(qr/[!?:=]/, ':', $converted);
  1170. $converted = preg_replace(qr/[^:(+]/, '', stripslashes($converted)); #/
  1171. # Sort again and implode
  1172. @array = str_split($converted);
  1173. @array = sort @array;
  1174. $converted = implode('', @array);
  1175. if (preg_match(qr/(?:\({2,}\+{2,}:{2,})|(?:\({2,}\+{2,}:+)|(?:\({3,}\++:{2,})/, $converted)) {
  1176. return $value . "\n" . $converted;
  1177. }
  1178. }
  1179. return $value;
  1180. }
  1181. #------------------------- PHP functions ---------------------------------------
  1182. #****if* IDS/array_sum
  1183. # NAME
  1184. # array_sum
  1185. # DESCRIPTION
  1186. # Equivalent to PHP's array_sum, sums all array values
  1187. # INPUT
  1188. # array the string to convert
  1189. # OUTPUT
  1190. # sum sum of all array values
  1191. # SYNOPSIS
  1192. # IDS::array_sum(@array);
  1193. #****
  1194. sub array_sum {
  1195. (my @array) = @_;
  1196. my $sum = 0;
  1197. foreach my $value (@array) {
  1198. if ($value) {
  1199. $sum += $value;
  1200. }
  1201. }
  1202. return $sum;
  1203. }
  1204. #****if* IDS/preg_match
  1205. # NAME
  1206. # preg_match
  1207. # DESCRIPTION
  1208. # Equivalent to PHP's preg_match, but with two arguments only
  1209. # INPUT
  1210. # pattern the pattern to match
  1211. # string the string
  1212. # OUTPUT
  1213. # boolean 1 if pattern matches string, 0 otherwise
  1214. # SYNOPSIS
  1215. # IDS::preg_match($pattern, $string);
  1216. #****
  1217. sub preg_match {
  1218. (my $pattern, my $string) = @_;
  1219. return ($string =~ $pattern);
  1220. }
  1221. #****if* IDS/preg_match_all
  1222. # NAME
  1223. # preg_match_all
  1224. # DESCRIPTION
  1225. # Equivalent to PHP's preg_match_all, but with three arguments only.
  1226. # Does not return nested arrays like PHP.
  1227. # Does not automatically match entire RegEx in $matches[0] like PHP does -
  1228. # Use brackets around your entire RegEx instead: preg_match_all(qr/(your(\d)(R|r)egex)/.
  1229. # INPUT
  1230. # pattern the pattern to match
  1231. # string the string
  1232. # arrayref the array to store the matches in
  1233. # OUTPUT
  1234. # array same content as written into arrayref
  1235. # SYNOPSIS
  1236. # IDS::preg_match_all(qr/(?:[\d+-=\/\* ]+(?:\s?,\s?[\d+-=\/\* ]+)+){4,}/ms, $value, \@matches)
  1237. # if (IDS::preg_match_all(qr/(?:[\d+-=\/\* ]+(?:\s?,\s?[\d+-=\/\* ]+)+){4,}/ms, $value, \@matches)) {
  1238. # print 'match';
  1239. # }
  1240. #****
  1241. sub preg_match_all {
  1242. (my $pattern, my $string, my $matches) = @_;
  1243. return (@$matches = ($string =~ /$pattern/g));
  1244. }
  1245. #****if* IDS/preg_replace
  1246. # NAME
  1247. # preg_replace
  1248. # DESCRIPTION
  1249. # Equivalent to PHP's preg_replace, but with three arguments only
  1250. # INPUT
  1251. # + pattern the pattern(s) to match
  1252. # replacement the replacement(s)
  1253. # + string the string(s)
  1254. # OUTPUT
  1255. # string the string(s) with all replacements done
  1256. # SYNOPSIS
  1257. # IDS::preg_replace(\@patterns, $replacement, $string);
  1258. # IDS::preg_replace(qr/^f.*ck/i, 'censored', $string);
  1259. # IDS::preg_replace(['badword', 'badword2', 'badword3'], ['censored1', 'censored2', 'censored3'], $string);
  1260. #****
  1261. sub preg_replace {
  1262. (my $patterns, my $replacements, my $strings) = @_;
  1263. # check input
  1264. if (!defined($strings) || !$strings ||
  1265. !defined($patterns) || !$patterns ) {
  1266. return '';
  1267. }
  1268. my $return_string = '';
  1269. if (ref($strings) ne 'ARRAY') {
  1270. $return_string = $strings;
  1271. }
  1272. if (ref($strings) eq 'ARRAY') {
  1273. my @replaced_strings = map {
  1274. preg_replace($patterns, $replacements, $_);
  1275. } @$strings;
  1276. return \@replaced_strings;
  1277. }
  1278. elsif (ref($patterns) eq 'ARRAY') {
  1279. my $pattern_no = 0;
  1280. foreach my $pattern (@$patterns) {
  1281. if (ref($replacements) eq 'ARRAY') {
  1282. $return_string = preg_replace($pattern, @$replacements[$pattern_no++], $return_string);
  1283. }
  1284. else {
  1285. $return_string = preg_replace($pattern, $replacements, $return_string);
  1286. }
  1287. }
  1288. }
  1289. else {
  1290. my $repl = '';
  1291. if (ref($replacements) eq 'ARRAY') {
  1292. $repl = @$replacements[0];
  1293. }
  1294. else {
  1295. if (!defined($replacements)) {
  1296. $repl = '';
  1297. }
  1298. else {
  1299. $repl = $replacements;
  1300. }
  1301. }
  1302. $repl =~ s/\\/\\\\/g;
  1303. $repl =~ s/\"/\\"/g;
  1304. $repl =~ s/\@/\\@/g;
  1305. $repl =~ s/\$(?!\d)/\\\$/g; # escape $ if not substitution variable like $1
  1306. $repl = qq{"$repl"};
  1307. $return_string =~ s/$patterns/defined $repl ? $repl : ''/eeg;
  1308. }
  1309. return $return_string;
  1310. }
  1311. #****if* IDS/str_replace
  1312. # NAME
  1313. # str_replace
  1314. # DESCRIPTION
  1315. # Equivalent to PHP's str_replace, but with three arguments only (simply a wrapper for preg_replace, but escapes pattern meta characters)
  1316. # INPUT
  1317. # pattern the pattern(s) to match
  1318. # replacement the replacement(s)
  1319. # string the string(s)
  1320. # OUTPUT
  1321. # string the string(s) with all replacements done
  1322. # SYNOPSIS
  1323. # IDS::str_replace(\@patterns, $replacement, $string);
  1324. # IDS::str_replace('bad\tword', 'censored', $string); # replaces 'bad\tword' but not 'bad word' or "bad\tword"
  1325. # IDS::str_replace(['badword', 'badword2', 'badword3'], ['censored1', 'censored2', 'censored3'], $string);
  1326. #****
  1327. sub str_replace {
  1328. (my $patterns, my $replacements, my $strings) = @_;
  1329. my @escapedpatterns = ();
  1330. if (ref($patterns) eq 'ARRAY') {
  1331. @escapedpatterns = map {quotemeta($_)} @$patterns;
  1332. return preg_replace(\@escapedpatterns, $replacements, $strings);
  1333. }
  1334. else {
  1335. return preg_replace(quotemeta($patterns), $replacements, $strings);
  1336. }
  1337. }
  1338. #****if* IDS/str_split
  1339. # NAME
  1340. # str_split
  1341. # DESCRIPTION
  1342. # Equivalent to PHP's str_split
  1343. # INPUT
  1344. # string the string to split
  1345. # OUTPUT
  1346. # array the split string
  1347. # SYNOPSIS
  1348. # IDS::str_split($string);
  1349. #****
  1350. sub str_split {
  1351. (my $string, my $limit) = @_;
  1352. if (defined($limit)) {
  1353. return ($string =~ /(.{1,$limit})/g);
  1354. }
  1355. else {
  1356. return split(//, $string);
  1357. }
  1358. }
  1359. #****if* IDS/strlen
  1360. # NAME
  1361. # strlen
  1362. # DESCRIPTION
  1363. # Equivalent to PHP's strlen, wrapper for Perl's length()
  1364. # INPUT
  1365. # string the string
  1366. # OUTPUT
  1367. # string the string's length
  1368. # SYNOPSIS
  1369. # IDS::strlen($url);
  1370. #****
  1371. sub strlen {
  1372. (my $string) = @_;
  1373. return length($string);
  1374. }
  1375. #****if* IDS/urldecode
  1376. # NAME
  1377. # urldecode
  1378. # DESCRIPTION
  1379. # Equivalent to PHP's urldecode
  1380. # INPUT
  1381. # string the URL to decode
  1382. # OUTPUT
  1383. # string the decoded URL
  1384. # SYNOPSIS
  1385. # IDS::urldecode($url);
  1386. #****
  1387. sub urldecode {
  1388. (my $theURL) = @_;
  1389. $theURL =~ tr/+/ /;
  1390. $theURL =~ s/%([a-fA-F0-9]{2,2})/chr(hex($1))/eg;
  1391. $theURL =~ s/<!(.|\n)*>//g;
  1392. utf8::decode($theURL);
  1393. return $theURL;
  1394. }
  1395. #****if* IDS/urlencode
  1396. # NAME
  1397. # urlencode
  1398. # DESCRIPTION
  1399. # Equivalent to PHP's urlencode
  1400. # INPUT
  1401. # string the URL to encode
  1402. # OUTPUT
  1403. # string the encoded URL
  1404. # SYNOPSIS
  1405. # IDS::urlencode($url);
  1406. #****
  1407. sub urlencode {
  1408. (my $theURL) = @_;
  1409. $theURL =~ s/([\W])/sprintf("%%%02X",ord($1))/eg;
  1410. utf8::encode($theURL);
  1411. return $theURL;
  1412. }
  1413. #****if* IDS/implode
  1414. # NAME
  1415. # implode
  1416. # DESCRIPTION
  1417. # Equivalent to PHP's implode (simply wrapper of join)
  1418. # INPUT
  1419. # string glue the glue to put between the pieces
  1420. # array pieces the pieces to be put together
  1421. # OUTPUT
  1422. # string the imploded string
  1423. # SYNOPSIS
  1424. # IDS::implode(';', @pieces);
  1425. #****
  1426. sub implode {
  1427. (my $glue, my @pieces) = @_;
  1428. return join($glue, @pieces);
  1429. }
  1430. #****if* IDS/explode
  1431. # NAME
  1432. # explode
  1433. # DESCRIPTION
  1434. # Equivalent to PHP's explode (simply wrapper of split, but escapes met characters)
  1435. # INPUT
  1436. # string glue the glue to put between the pieces
  1437. # string string the string to split
  1438. # OUTPUT
  1439. # array the exploded string
  1440. # SYNOPSIS
  1441. # IDS::explode(';', $string);
  1442. #****
  1443. sub explode {
  1444. (my $glue, my $string) = @_;
  1445. return split(quotemeta($glue), $string);
  1446. }
  1447. #****if* IDS/stripslashes
  1448. # NAME
  1449. # stripslashes
  1450. # DESCRIPTION
  1451. # Equivalent to PHP's stripslashes
  1452. # INPUT
  1453. # string string the string
  1454. # OUTPUT
  1455. # string the stripped string
  1456. # SYNOPSIS
  1457. # ID

Large files files are truncated, but you can click here to view the full file