/trunk/emsi/web/cgi-bin/search/searchmods/common.pl
Perl | 2687 lines | 2277 code | 348 blank | 62 comment | 161 complexity | 51386317c170751ad58db2cb37c319ff MD5 | raw file
Possible License(s): LGPL-2.1
- use strict;
- sub version_c {
- return '2.0.0.0063';
- }
- =head1 HEAD
- Copyright 1997-2003 by Zoltan Milosevic, All Rights Reserved
- See http://www.xav.com/scripts/search/ for more information.
- If you edit the source code, you'll find it useful to restore the function comments and #&Assert checks:
- cd "search/searchmods/powerusr/"
- hacksubs.pl build_map
- hacksubs.pl restore_comments
- hacksubs.pl assert_on
- This library, common.pl, contains simple standalone functions which are shared among all modes.
- =cut
- sub choose_interface_lang {
- my ($b_is_admin_rq, $browser_lang, $form_set_lang, $form_p_lang) = @_;
- my $options = '';
- my $lang = $Rules{'language'};
- my $err = '';
- Err: {
- my %valid;
- ($err, $options, %valid) = &get_valid_langs();
- next Err if ($err);
- last Err if ($b_is_admin_rq);
- my $uls = $Rules{'user language selection'};
- if (($uls == 1) or ($uls == 3)) {
- # detect lang based on browser
- my $browser = substr( &query_env('HTTP_ACCEPT_LANGUAGE'), 0, 2 );
- # only map non-2-char entries; others pass through
- my %fdse_name_map = (
- 'en' => 'english',
- 'pt' => 'portuguese',
- 'fr' => 'french',
- 'it' => 'italian',
- 'nl' => 'dutch',
- 'de' => 'german',
- 'es' => 'spanish',
- );
- $browser = $fdse_name_map{$browser} || $browser;
- if ($valid{$browser}) {
- $lang = $browser;
- }
- }
- if (($uls == 2) or ($uls == 3)) {
- # detect lang from form settings
- if (defined($FORM{'set:lang'})) {
- $FORM{'p:lang'} = $FORM{'set:lang'};
- delete $FORM{'set:lang'};
- }
- if (($FORM{'p:lang'}) and ($FORM{'p:lang'} =~ m!^(\w+)$!) and ($valid{$1})) {
- $lang = $1;
- }
- }
- last Err;
- }
- return ($err, $options, $lang);
- }
- sub get_valid_langs {
- my %valid = ();
- my $err = '';
- Err: {
- my $cache_string = '';
- my $template_time = (stat('templates'))[9];
- my $cache = 'valid_languages_cache.txt';
- if ((-e $cache) and (-f $cache)) {
- ($err, $cache_string) = &ReadFileL( $cache );
- next Err if ($err);
- my ($cache_version, $cache_build_time, $cache_template_time, %cache_valid) = split(m!\$!, $cache_string);
- if (
- ($cache_version ne $VERSION)
- or
- (($private{'script_start_time'} - $cache_build_time) > 86400)
- or
- ($cache_template_time != $template_time)
- ) {
- # discard cache
- }
- else {
- %valid = %cache_valid;
- last Err;
- }
- }
- # query file system, either because no cache present, or because it has been discarded:
- if (opendir(DIR, 'templates')) {
- my @folders = sort readdir(DIR);
- closedir(DIR);
- foreach (@folders) {
- next unless (-e "templates/$_/strings.txt");
- unless (open(FILE, "<templates/$_/strings.txt" )) {
- #$err = "unable to open file '$_/strings.txt' - $!"; next Err;
- next;
- }
- my ($ver, $selfname) = (<FILE>, <FILE>);
- close(FILE);
- if ($ver =~ m!^VERSION $VERSION!) {
- # ok
- $selfname =~ s!\r|\n|\015|\012!!sg;
- $valid{$_} = $selfname;
- }
- }
- }
- # save cache if possible:
- $cache_string = join( '$', $VERSION, $private{'script_start_time'}, $template_time, %valid );
- if (open(FILE, ">$cache")) {
- binmode(FILE);
- print FILE $cache_string;
- close(FILE);
- chmod($private{'file_mask'},$cache);
- }
- last Err;
- }
- my $options = '';
- foreach (sort keys %valid) {
- $options .= qq!<option value="$_">$valid{$_}</option>!;
- }
- return ($err, $options, %valid);
- }
- sub rewrite_url {
- my ($level, $url) = @_;
- my $key = "rewrite_url_" . $level;
- unless ($Rules{$key}) {
- return $url;
- }
- # format is b_enabled,p1,p2,comment,b_verbose,
- unless ($private{$key}) {
- # create a cache copy
- my @rules = ();
- my $rule;
- foreach $rule (split(m!\&!, $Rules{$key})) {
- my @fields = split(m!\=!, $rule);
- next unless ($fields[0]);
- my @rule = ( &ud($fields[1],$fields[2]), $fields[4] );
- push(@rules, \@rule);
- }
- $private{$key} = \@rules;
- }
- my $p_rules = $private{$key}; # pointer to an array of arrays
- my $p_rule;
- foreach $p_rule (@$p_rules) {
- my $init = $url;
- my ($p1, $p2, $b_verbose) = @$p_rule;
- #changed 0056; Brian Renken's contrib; rewrite rules now support $1, $2, uc/lc($1)
- my @backref = ($url =~ m!$p1!is);
- my $count = ($url =~ s!$p1!$p2!isg);
- my $i = 0;
- my $ref;
- foreach $ref (@backref) {
- $i++;
- $url =~ s!lc\(\$$i\)!lc($ref)!iesg;
- $url =~ s!uc\(\$$i\)!uc($ref)!iesg;
- $url =~ s!\$$i!$ref!sg;
- }
- if (($count) and ($b_verbose)) {
- my $h_init = &he($init);
- print "<p><b>Status:</b> URL rewrite feature has converted $h_init to " . &he($url) . ".</p>\n";
- }
- }
- return $url;
- }
- sub check_regex {
- my ($pattern) = @_;
- my $err = '';
- Err: {
- if ($pattern =~ m!\?\{!) {
- $err = &pstr(50,&he($pattern));
- next Err;
- }
- eval '"foo" =~ m!$pattern!;';
- if ($@) {
- $err = &pstr(51,&he($pattern,$@));
- undef($@);
- next Err;
- }
- }
- return $err;
- }
- sub pstr {
- local $_ = $str[$_[0]];
- my $x = 0;
- foreach $x (1..((scalar @_) - 1)) {
- my $c = (s!\$s$x!$_[$x]!g);
- #&Assert($c != 0);
- }
- #&Assert( $_ !~ m!\$s\d! );
- return $_;
- }
- sub ppstr {
- local $_ = $str[$_[0]];
- #&Assert(defined($_));
- my $x = 0;
- foreach $x (1..((scalar @_) - 1)) {
- #&Assert(defined($_[$x]));
- my $c = (s!\$s$x!$_[$x]!g);
- #&Assert($c != 0);
- }
- #&Assert( $_ !~ m!\$s\d! );
- print;
- }
- sub pppstr {
- local $_ = $str[$_[0]];
- my $x = 0;
- foreach $x (1..((scalar @_) - 1)) {
- my $c = (s!\$s$x!$_[$x]!g);
- #&Assert($c != 0);
- }
- #&Assert( $_ !~ m!\$s\d! );
- if ($const{'is_cmd'}) {
- print "\n$_\n";
- }
- else {
- print "<p>" . $_ . "</p>\n";
- }
- }
- sub CompressStrip {
- local $_ = defined($_[0]) ? $_[0] : '';
- $_ = &RawTranslate(" $_ ");
- s'\s+' 'og;
- eval($const{'code_strip_ignored_words'});
- die $@ if $@;
- s'\s+' 'og;
- s'^ '';
- s' $'';
- return " $_ ";
- }
- sub create_conversion_code {
- my ($b_verbose) = @_;
- my $code = '';
- # Format of %charset is { char_number => [ @values, $name ] }
- # where @values represents what the character should be converted to under 4 circumstances
- # -1 means "strip, is non-word"
- # 0 means "leave as is"
- # any other string value is the value to be converted to
- my %base_charset = (
- 9 => [ -1, -1, -1, -1, 'Horizontal tab'],
- 10 => [ -1, -1, -1, -1, 'Line feed'],
- 13 => [ -1, -1, -1, -1, 'Carriage Return'],
- 32 => [ -1, -1, -1, -1, 'Space'],
- 33 => [ -1, -1, -1, -1, 'Exclamation mark'],
- 34 => [ -1, -1, -1, -1, 'Quotation mark'],
- 35 => [ -1, -1, -1, -1, 'Number sign'],
- 36 => [ -1, -1, -1, -1, 'Dollar sign'],
- 37 => [ -1, -1, -1, -1, 'Percent sign'],
- 38 => [ -1, -1, -1, -1, 'Ampersand'],
- 39 => [ -1, -1, -1, -1, 'Apostrophe'],
- 40 => [ -1, -1, -1, -1, 'Left parenthesis'],
- 41 => [ -1, -1, -1, -1, 'Right parenthesis'],
- 42 => [ -1, -1, -1, -1, 'Asterisk'],
- 43 => [ -1, -1, -1, -1, 'Plus sign'],
- 44 => [ -1, -1, -1, -1, 'Comma'],
- 45 => [ -1, -1, -1, -1, 'Hyphen'],
- 46 => [ -1, -1, -1, -1, 'Period (fullstop)'],
- 47 => [ -1, -1, -1, -1, 'Solidus (slash)'],
- 48 => [ 0, 0, 0, 0, 'Digit 0'],
- 49 => [ 0, 0, 0, 0, 'Digit 1'],
- 50 => [ 0, 0, 0, 0, 'Digit 2'],
- 51 => [ 0, 0, 0, 0, 'Digit 3'],
- 52 => [ 0, 0, 0, 0, 'Digit 4'],
- 53 => [ 0, 0, 0, 0, 'Digit 5'],
- 54 => [ 0, 0, 0, 0, 'Digit 6'],
- 55 => [ 0, 0, 0, 0, 'Digit 7'],
- 56 => [ 0, 0, 0, 0, 'Digit 8'],
- 57 => [ 0, 0, 0, 0, 'Digit 9'],
- 58 => [ -1, -1, -1, -1, 'Colon'],
- 59 => [ -1, -1, -1, -1, 'Semicolon'],
- 60 => [ -1, -1, -1, -1, 'Less than'],
- 61 => [ -1, -1, -1, -1, 'Equals sign'],
- 62 => [ -1, -1, -1, -1, 'Greater than'],
- 63 => [ -1, -1, -1, -1, 'Question mark'],
- 64 => [ -1, -1, -1, -1, 'Commercial at'],
- 65 => [ 'a', 0, 'a', 0, 'Capital A'],
- 66 => [ 'b', 0, 'b', 0, 'Capital B'],
- 67 => [ 'c', 0, 'c', 0, 'Capital C'],
- 68 => [ 'd', 0, 'd', 0, 'Capital D'],
- 69 => [ 'e', 0, 'e', 0, 'Capital E'],
- 70 => [ 'f', 0, 'f', 0, 'Capital F'],
- 71 => [ 'g', 0, 'g', 0, 'Capital G'],
- 72 => [ 'h', 0, 'h', 0, 'Capital H'],
- 73 => [ 'i', 0, 'i', 0, 'Capital I'],
- 74 => [ 'j', 0, 'j', 0, 'Capital J'],
- 75 => [ 'k', 0, 'k', 0, 'Capital K'],
- 76 => [ 'l', 0, 'l', 0, 'Capital L'],
- 77 => [ 'm', 0, 'm', 0, 'Capital M'],
- 78 => [ 'n', 0, 'n', 0, 'Capital N'],
- 79 => [ 'o', 0, 'o', 0, 'Capital O'],
- 80 => [ 'p', 0, 'p', 0, 'Capital P'],
- 81 => [ 'q', 0, 'q', 0, 'Capital Q'],
- 82 => [ 'r', 0, 'r', 0, 'Capital R'],
- 83 => [ 's', 0, 's', 0, 'Capital S'],
- 84 => [ 't', 0, 't', 0, 'Capital T'],
- 85 => [ 'u', 0, 'u', 0, 'Capital U'],
- 86 => [ 'v', 0, 'v', 0, 'Capital V'],
- 87 => [ 'w', 0, 'w', 0, 'Capital W'],
- 88 => [ 'x', 0, 'x', 0, 'Capital X'],
- 89 => [ 'y', 0, 'y', 0, 'Capital Y'],
- 90 => [ 'z', 0, 'z', 0, 'Capital Z'],
- 91 => [ -1, -1, -1, -1, 'Left square bracket'],
- 92 => [ -1, -1, -1, -1, 'Reverse solidus (backslash)'],
- 93 => [ -1, -1, -1, -1, 'Right square bracket'],
- 94 => [ -1, -1, -1, -1, 'Caret'],
- 95 => [ -1, -1, -1, -1, 'Horizontal bar (underscore)'],
- 96 => [ -1, -1, -1, -1, 'Acute accent'],
- 97 => [ 0, 0, 0, 0, 'Small a'],
- 98 => [ 0, 0, 0, 0, 'Small b'],
- 99 => [ 0, 0, 0, 0, 'Small c'],
- 100 => [ 0, 0, 0, 0, 'Small d'],
- 101 => [ 0, 0, 0, 0, 'Small e'],
- 102 => [ 0, 0, 0, 0, 'Small f'],
- 103 => [ 0, 0, 0, 0, 'Small g'],
- 104 => [ 0, 0, 0, 0, 'Small h'],
- 105 => [ 0, 0, 0, 0, 'Small i'],
- 106 => [ 0, 0, 0, 0, 'Small j'],
- 107 => [ 0, 0, 0, 0, 'Small k'],
- 108 => [ 0, 0, 0, 0, 'Small l'],
- 109 => [ 0, 0, 0, 0, 'Small m'],
- 110 => [ 0, 0, 0, 0, 'Small n'],
- 111 => [ 0, 0, 0, 0, 'Small o'],
- 112 => [ 0, 0, 0, 0, 'Small p'],
- 113 => [ 0, 0, 0, 0, 'Small q'],
- 114 => [ 0, 0, 0, 0, 'Small r'],
- 115 => [ 0, 0, 0, 0, 'Small s'],
- 116 => [ 0, 0, 0, 0, 'Small t'],
- 117 => [ 0, 0, 0, 0, 'Small u'],
- 118 => [ 0, 0, 0, 0, 'Small v'],
- 119 => [ 0, 0, 0, 0, 'Small w'],
- 120 => [ 0, 0, 0, 0, 'Small x'],
- 121 => [ 0, 0, 0, 0, 'Small y'],
- 122 => [ 0, 0, 0, 0, 'Small z'],
- 123 => [ -1, -1, -1, -1, 'Left curly brace'],
- 124 => [ -1, -1, -1, -1, 'Vertical bar'],
- 125 => [ -1, -1, -1, -1, 'Right curly brace'],
- 126 => [ -1, -1, -1, -1, 'Tilde'],
- );
- my %extended_charset = (
- 138 => [ 's', 'S', chr(154), 0, 'Scaron'],
- 140 => [ 'oe', 'OE', chr(156), 0, 'OE ligature'],
- 142 => [ 'z', 'Z', chr(158), 0, ''],
- 154 => [ 's', 's', 0, 0, 'scaron'],
- 156 => [ 'oe', 'oe', 0, 0, 'oe ligature'],
- 158 => [ 'z', 'z', 0, 0, ''],
- 159 => [ 'y', 'Y', chr(255), 0, ''],
- 160 => [ -1, -1, -1, -1, 'Nonbreaking space'],
- 161 => [ -1, -1, -1, -1, 'Inverted exclamation'],
- 162 => [ -1, -1, -1, -1, 'Cent sign'],
- 163 => [ -1, -1, -1, -1, 'Pound sterling'],
- 164 => [ -1, -1, -1, -1, 'General currency sign'],
- 165 => [ -1, -1, -1, -1, 'Yen sign'],
- 166 => [ -1, -1, -1, -1, 'Broken vertical bar'],
- 167 => [ -1, -1, -1, -1, 'Section sign'],
- 168 => [ -1, -1, -1, -1, 'Di?resis / Umlaut'],
- 169 => [ -1, -1, -1, -1, 'Copyright'],
- 170 => [ -1, -1, -1, -1, 'Feminine ordinal'],
- 171 => [ -1, -1, -1, -1, 'Left angle quote, guillemet left'],
- 172 => [ -1, -1, -1, -1, 'Not sign'],
- 173 => [ -1, -1, -1, -1, 'Soft hyphen'],
- 174 => [ -1, -1, -1, -1, 'Registered trademark'],
- 175 => [ -1, -1, -1, -1, 'Macron accent'],
- 176 => [ -1, -1, -1, -1, 'Degree sign'],
- 177 => [ -1, -1, -1, -1, 'Plus or minus'],
- 178 => [ -1, -1, -1, -1, 'Superscript 2'],
- 179 => [ -1, -1, -1, -1, 'Superscript 3'],
- 180 => [ -1, -1, -1, -1, 'Acute accent'],
- 181 => [ -1, -1, -1, -1, 'Micro sign'],
- 182 => [ -1, -1, -1, -1, 'Paragraph sign'],
- 183 => [ -1, -1, -1, -1, 'Middle dot'],
- 184 => [ -1, -1, -1, -1, 'Cedilla'],
- 185 => [ -1, -1, -1, -1, 'Superscript 1'],
- 186 => [ -1, -1, -1, -1, 'Masculine ordinal'],
- 187 => [ -1, -1, -1, -1, 'Right angle quote, guillemet right'],
- 188 => [ -1, -1, -1, -1, 'Fraction one-fourth'],
- 189 => [ -1, -1, -1, -1, 'Fraction one-half'],
- 190 => [ -1, -1, -1, -1, 'Fraction three-fourths'],
- 191 => [ -1, -1, -1, -1, 'Inverted question mark'],
- 192 => [ 'a', 'A', chr(224), 0, 'Capital A, grave accent'],
- 193 => [ 'a', 'A', chr(225), 0, 'Capital A, acute accent'],
- 194 => [ 'a', 'A', chr(226), 0, 'Capital A, circumflex'],
- 195 => [ 'a', 'A', chr(227), 0, 'Capital A, tilde'],
- 196 => [ 'ae', 'Ae', chr(228), 0, 'Capital A, diaeresis / umlaut'],
- 197 => [ 'a', 'A', chr(229), 0, 'Capital A, ring'],
- 198 => [ 'ae', 'AE', chr(230), 0, 'Capital AE ligature'],
- 199 => [ 'c', 'c', chr(231), 0, 'Capital C, cedilla'],
- 200 => [ 'e', 'E', chr(232), 0, 'Capital E, grave accent'],
- 201 => [ 'e', 'E', chr(233), 0, 'Capital E, acute accent'],
- 202 => [ 'e', 'E', chr(234), 0, 'Capital E, circumflex'],
- 203 => [ 'e', 'E', chr(235), 0, 'Capital E, diaeresis / umlaut'],
- 204 => [ 'i', 'I', chr(236), 0, 'Capital I, grave accent'],
- 205 => [ 'i', 'I', chr(237), 0, 'Capital I, acute accent'],
- 206 => [ 'i', 'I', chr(238), 0, 'Capital I, circumflex'],
- 207 => [ 'i', 'I', chr(239), 0, 'Capital I, diaeresis / umlaut'],
- 208 => [ 'd', 'D', chr(240), 0, 'Capital Eth, Icelandic'],
- 209 => [ 'n', 'N', chr(241), 0, 'Capital N, tilde'],
- 210 => [ 'o', 'O', chr(242), 0, 'Capital O, grave accent'],
- 211 => [ 'o', 'O', chr(243), 0, 'Capital O, acute accent'],
- 212 => [ 'o', 'O', chr(244), 0, 'Capital O, circumflex'],
- 213 => [ 'o', 'O', chr(245), 0, 'Capital O, tilde'],
- 214 => [ 'oe', 'Oe', chr(246), 0, 'Capital O, diaeresis / umlaut'],
- 215 => [ -1, -1, -1, -1, 'Multiply sign'],
- 216 => [ 'o', 'O', chr(248), 0, 'Capital O, slash'],
- 217 => [ 'u', 'U', chr(249), 0, 'Capital U, grave accent'],
- 218 => [ 'u', 'U', chr(250), 0, 'Capital U, acute accent'],
- 219 => [ 'u', 'U', chr(251), 0, 'Capital U, circumflex'],
- 220 => [ 'ue', 'Ue', chr(252), 0, 'Capital U, diaeresis / umlaut'],
- 221 => [ 'y', 'Y', chr(253), 0, 'Capital Y, acute accent'],
- 222 => [ 'p', 'P', chr(254), 0, 'Capital Thorn, Icelandic'],
- 223 => [ 'ss', 'ss', 0, 0, 'Small sharp s, German sz'],
- 224 => [ 'a', 'a', 0, 0, 'Small a, grave accent'],
- 225 => [ 'a', 'a', 0, 0, 'Small a, acute accent'],
- 226 => [ 'a', 'a', 0, 0, 'Small a, circumflex'],
- 227 => [ 'a', 'a', 0, 0, 'Small a, tilde'],
- 228 => [ 'ae', 'ae', 0, 0, 'Small a, diaeresis / umlaut'],
- 229 => [ 'a', 'a', 0, 0, 'Small a, ring'],
- 230 => [ 'ae', 'ae', 0, 0, 'Small ae ligature'],
- 231 => [ 'c', 'c', 0, 0, 'Small c, cedilla'],
- 232 => [ 'e', 'e', 0, 0, 'Small e, grave accent'],
- 233 => [ 'e', 'e', 0, 0, 'Small e, acute accent'],
- 234 => [ 'e', 'e', 0, 0, 'Small e, circumflex'],
- 235 => [ 'e', 'e', 0, 0, 'Small e, diaeresis / umlaut'],
- 236 => [ 'i', 'i', 0, 0, 'Small i, grave accent'],
- 237 => [ 'i', 'i', 0, 0, 'Small i, acute accent'],
- 238 => [ 'i', 'i', 0, 0, 'Small i, circumflex'],
- 239 => [ 'i', 'i', 0, 0, 'Small i, diaeresis / umlaut'],
- 240 => [ 'o', 'o', 0, 0, 'Small eth, Icelandic'],
- 241 => [ 'n', 'n', 0, 0, 'Small n, tilde'],
- 242 => [ 'o', 'o', 0, 0, 'Small o, grave accent'],
- 243 => [ 'o', 'o', 0, 0, 'Small o, acute accent'],
- 244 => [ 'o', 'o', 0, 0, 'Small o, circumflex'],
- 245 => [ 'o', 'o', 0, 0, 'Small o, tilde'],
- 246 => [ 'oe', 'oe', 0, 0, 'Small o, diaeresis / umlaut'],
- 247 => [ -1, -1, -1, -1, 'Division sign'],
- 248 => [ 'o', 'o', 0, 0, 'Small o, slash'],
- 249 => [ 'u', 'u', 0, 0, 'Small u, grave accent'],
- 250 => [ 'u', 'u', 0, 0, 'Small u, acute accent'],
- 251 => [ 'u', 'u', 0, 0, 'Small u, circumflex'],
- 252 => [ 'ue', 'ue', 0, 0, 'Small u, diaeresis / umlaut'],
- 253 => [ 'y', 'y', 0, 0, 'Small y, acute accent'],
- 254 => [ 'p', 'p', 0, 0, 'Small thorn, Icelandic'],
- 255 => [ 'y', 'y', 0, 0, 'Small y, diaeresis / umlaut'],
- );
- =item reserved
- The %reserved hash contains the Latin character index of characters that FDSE uses internally to delimit data, including newlines, whitespace, and the equals sign. These characters are *always* stripped from incoming data regardless of locale settings.
- =cut
- my %reserved = (
- 34 => 1,
- 38 => 1,
- 60 => 1,
- 62 => 1,
- 9 => 1,
- 95 => 1,
- 10 => 1,
- 13 => 1,
- 32 => 1,
- 61 => 1,
- );
- =item named_entities
- The %named_entities hash maps HTML entities to their Latin character index.
- Numeric formats like "#ddd" and "xHH" are programmatically added to the hash -- there is no need to manually add them.
- Named entities which do not map to alphanumeric "word" characters, like "amp", are omitted as an optimization, since those characters are never included in the index.
- =cut
- my %named_entities = (
- '#338' => 140,
- '#339' => 156,
- '#352' => 138,
- '#353' => 154,
- 'AElig' => 198,
- 'Aacute' => 193,
- 'Acirc' => 194,
- 'Agrave' => 192,
- 'Aring' => 197,
- 'Atilde' => 195,
- 'Auml' => 196,
- 'Ccedil' => 199,
- 'ETH' => 208,
- 'Eacute' => 201,
- 'Ecirc' => 202,
- 'Egrave' => 200,
- 'Euml' => 203,
- 'Iacute' => 205,
- 'Icirc' => 206,
- 'Igrave' => 204,
- 'Iuml' => 207,
- 'Ntilde' => 209,
- 'OElig' => 140,
- 'Oacute' => 211,
- 'Ocirc' => 212,
- 'Ograve' => 210,
- 'Oslash' => 216,
- 'Otilde' => 213,
- 'Ouml' => 214,
- 'Scaron' => 138,
- 'THORN' => 222,
- 'Uacute' => 218,
- 'Ucirc' => 219,
- 'Ugrave' => 217,
- 'Uuml' => 220,
- 'Yacute' => 221,
- 'aacute' => 225,
- 'acirc' => 226,
- 'aelig' => 230,
- 'agrave' => 224,
- 'aring' => 229,
- 'atilde' => 227,
- 'auml' => 228,
- 'ccedil' => 231,
- 'eacute' => 233,
- 'ecirc' => 234,
- 'egrave' => 232,
- 'eth' => 240,
- 'euml' => 235,
- 'iacute' => 237,
- 'icirc' => 238,
- 'igrave' => 236,
- 'iquest' => 191,
- 'iuml' => 239,
- 'ntilde' => 241,
- 'oacute' => 243,
- 'ocirc' => 244,
- 'oelig' => 156,
- 'ograve' => 242,
- 'oslash' => 248,
- 'otilde' => 245,
- 'ouml' => 246,
- 'scaron' => 154,
- 'sup1' => 185,
- 'sup2' => 178,
- 'sup3' => 179,
- 'szlig' => 223,
- 'thorn' => 254,
- 'uacute' => 250,
- 'ucirc' => 251,
- 'ugrave' => 249,
- 'uuml' => 252,
- 'yacute' => 253,
- 'yuml' => 255,
- );
- my %entity_name_by_num = ();
- %entity_value_by_name = ();
- my ($name, $number) = ('', 0);
- while (($name, $number) = each %named_entities) {
- $entity_name_by_num{ $number } .= "$name ";
- $entity_value_by_name{ $name } = chr($number);
- }
- my %ac_map_cs = ();
- my @nonword = ();
- my $focus = (2 + (-2 * $Rules{'character conversion: accent insensitive'})) + (1 + (-1 * $Rules{'character conversion: case insensitive'}));
- my $chx = 0;
- if (not $b_verbose) {
- for (my $chx = 255; $chx > 0; $chx--) {
- my $ch = chr($chx);
- my $value = -1;
- if (defined($base_charset{$chx})) {
- $value = $base_charset{$chx}[$focus];
- }
- elsif (defined($extended_charset{$chx})) {
- $value = $extended_charset{$chx}[$focus];
- }
- if ($value eq '-1') {
- $nonword[$chx] = 1;
- }
- elsif ($value ne '0') {
- $ac_map_cs{$value} .= $ch;
- }
- }
- }
- else {
- print <<"EOM";
- <table border="1">
- <tr>
- <th>$str[62]</th>
- <th>$str[63]</th>
- <th>$str[61]</th>
- <th>$str[60]</th>
- <th>$str[59]<br />$str[57]</th>
- <th>$str[59]<br />$str[56]</th>
- <th>$str[58]<br />$str[57]</th>
- <th>$str[58]<br />$str[56]</th>
- </tr>
- EOM
- for (my $chx = 255; $chx > 0; $chx--) {
- my $ch = chr($chx);
- my @data = (-1, -1, -1, -1, 'Unused'); #default
- if (defined($base_charset{$chx})) {
- for (0..4) {
- $data[$_] = $base_charset{$chx}[$_];
- }
- }
- elsif (defined($extended_charset{$chx})) {
- for (0..4) {
- $data[$_] = $extended_charset{$chx}[$_];
- }
- }
- print qq!<tr><td align="center"><tt>! . substr(1000 + $chx, 1, 3) . qq!</tt></td><td align="center">$data[4]<br /></td><td nowrap="nowrap"><tt>!;
- if ($entity_name_by_num{$chx}) {
- my @list = split(m!\s+!, $entity_name_by_num{$chx});
- my $en;
- foreach $en (@list) {
- next unless ($en);
- print '&' . "amp;$en; - &$en;<br />";
- }
- }
- else {
- print "<br />";
- }
- print qq!</tt></td><td class="fdtan" align="center"><b>! . &he($ch) . "<br /></b></td>";
- my $zz = 0;
- for $zz (0..3) {
- if ($zz == $focus) {
- if ($data[$zz] eq '-1') {
- print qq!<td align="center" bgcolor="#cccccc">---</td>\n!;
- $nonword[$chx] = 1;
- }
- elsif ($data[$zz] eq '0') {
- print qq!<td class="fdtan" align="center"><b>$ch</b></td>\n!;
- }
- else {
- print qq!<td class="fdtan" align="center"><b>$data[$zz]</b></td>\n!;
- # format {dest} = {orig orig orig}
- $ac_map_cs{$data[$zz]} .= $ch;
- }
- }
- else {
- if ($data[$zz] eq '-1') {
- print qq!<td align="center"><br /></td>\n!;
- }
- elsif ($data[$zz] eq '0') {
- print qq!<td align="center">$ch</td>\n!;
- }
- else {
- print qq!<td align="center">$data[$zz]</td>\n!;
- }
- }
- }
- print "</tr>\n";
- next;
- }
- print '</table>';
- }
- # build the code to strip spans of non-word characters:
- my @kill = ();
- foreach (1..255) {
- next unless ($nonword[$_]);
- push(@kill,quotemeta(chr($_)));
- }
- my $frag = join("|",@kill);
- my $cnw = '';
- if ($frag) {
- $cnw = "s'($frag)+' 'og;\n";
- }
- my $ccc = '';
- foreach (keys %ac_map_cs) {
- my $ch = ();
- my @chars = ();
- foreach $ch (split(m!!, $ac_map_cs{$_})) {
- push(@chars, quotemeta($ch));
- }
- my $in = join('|',@chars);
- if (1 == length($in)) {
- $ccc .= "s!$in!$_!og;\n";
- }
- elsif ($in) {
- $ccc .= "s!($in)!$_!og;\n";
- }
- }
- # Add numeric entities for 1..255:
- for (1..255) {
- next if ($nonword[$_]);
- $entity_value_by_name{ "#$_" } = chr($_);
- }
- @kill = ();
- foreach (keys %reserved) {
- push(@kill, quotemeta(chr($_)));
- }
- $frag = join('|', @kill);
- my $csr = '';
- if ($frag) {
- $csr = "s!($frag)+! !sog;\n";
- }
- #changed 0056 - map %20 to ' ' as very special case to avoid "foo%20bar" from mapping to "foo 20bar"
- $code = <<'EOM';
- s!\%20! !sg;
- # Replace all hex entities:
- s!&#x(..);!chr(hex($1))!eisg;
- # Replace all numeric and named entities with their single-character equivalent; unknown entities will be replaced with spaces:
- s!&(\S+?);!{$entity_value_by_name{$1} || ' '}!esg;
- EOM
- $code .= $csr;
- $code .= $ccc;
- $code .= $cnw;
- return $code;
- }
- =item RawTranslate
- Usage:
- my $lc_ai_string = &RawTranslate($string);
- Returns a lowercase, accent-stripped version on its input. Replaces HTML-encoded characters with their ASCII equivalents.
- This function is called mainly by &CompressStrip; also by &LoadRules when preparing the code for ignore words.
- See http://www.utoronto.ca/webdocs/HTMLdocs/NewHTML/iso_table.html
- Dependencies:
- Called by: CompressStrip
- Called by: LoadRules
- Global: %Rules - 1
- Dependency: none
- =cut
- sub RawTranslate {
- local $_ = defined($_[0]) ? $_[0] : '';
- if (not exists($const{'conversion_code'})) {
- $const{'conversion_code'} = &create_conversion_code(0);
- }
- eval $const{'conversion_code'};
- return $_;
- }
- sub SelectAdEx {
- my ($p_terms) = @_;
- my @Ads = ('','','','');
- my $err = '';
- Err: {
- last Err if ($const{'mode'} == 3);
- my $text = '';
- ($err, $text) = &ReadFileL('ads.xml');
- next Err if ($err);
- my $ads_ver = 1;
- if ($text =~ m! version=\"(\d)!s) {
- $ads_ver = $1;
- }
- last Err unless ($text =~ m!<FDSE:Ads placement="(.*?)">(.+)</FDSE:Ads>!s);
- my ($master_pos_str, $ads) = ($1, $2);
- next unless ($master_pos_str);
- my $term_pattern = '';
- foreach (@$p_terms) {
- $term_pattern .= quotemeta($_) . '|';
- }
- if ($FORM{'Realm'}) {
- $term_pattern .= "realm:$FORM{'Realm'}|";
- }
- $term_pattern =~ s!\|$!!;
- $term_pattern = "($term_pattern)" if ($term_pattern);
- my @match_ads = ();
- my @all_ads = ();
- foreach (split(m!<FDSE:Ad !s, $ads)) {
- next unless (m!(.*?)>(.*)</FDSE:Ad>!s);
- my %adinfo = ();
- $adinfo{'text'} = $2;
- my $attributes = $1;
- while ($attributes =~ m!^\s*(\S+)\=\"(.*?)\"(.*)$!s) {
- $adinfo{$1} = $2;
- $attributes = $3;
- }
- if ($ads_ver > 1) {
- foreach (keys %adinfo) {
- $adinfo{$_} = &ud($adinfo{$_});
- }
- }
- push(@all_ads, \%adinfo);
- }
- # for each of 4 positions, select an ad:
- my $i = 1;
- for ($i = 1; $i < 5; $i++) {
- # skip if we've globally decided not to put ads in this position
- next unless ($master_pos_str =~ m!$i!);
- my ($matchweight, $weight) = (0, 0);
- my (@my_ads, @match_ads) = ();
- # Select an ad for position $i
- my $p_data = ();
- foreach $p_data (@all_ads) {
- # skip this ad if we've decided to to show it at position $i:
- next unless ($$p_data{'placement'} =~ m!$i!);
- # ok, do we have search words to work with, and are there keywords with this ad?
- my $is_keyword_match = 0;
- if (($term_pattern) and ($$p_data{'keywords'})) {
- # Is there a keyword match?
- if ($$p_data{'keywords'} =~ m!$term_pattern!i) {
- $matchweight += $$p_data{'weight'};
- push(@match_ads, $p_data);
- $is_keyword_match = 1;
- }
- }
- # have they decided that this ad *only* appears for keyword matches?
- if (($$p_data{'kw'}) and (not $is_keyword_match)) {
- # sorry maybe next time:
- next;
- }
- $weight += $$p_data{'weight'};
- push(@my_ads, $p_data);
- }
- if ($matchweight) {
- $weight = $matchweight;
- @my_ads = @match_ads;
- }
- my $num = int($weight * rand());
- foreach $p_data (@my_ads) {
- $num -= $$p_data{'weight'};
- next if ($num > 0);
- # Increment the logfile
- my $logfile = "ads_hitcount_$$p_data{'ident'}.txt";
- my $hits = 0;
- if ((not (-e $logfile)) and (open(FILE, ">$logfile" ))) {
- print FILE 0;
- close(FILE);
- }
- if (open(FILE, "+<$logfile")) {
- $hits = <FILE>;
- seek(FILE, 0, 0);
- print FILE ++$hits;
- close(FILE);
- }
- $Ads[$i-1] = $$p_data{'text'};
- last;
- }
- }
- }
- return @Ads;
- }
- sub PrintTemplate {
- my ($b_return_as_string, $file, $language, $p_replace, $p_visited, $p_cache) = @_;
- my $return_text = '';
- my $err = '';
- Err: {
- # Initialize:
- unless ($p_replace) {
- my %hash = ();
- $p_replace = \%hash;
- }
- $$p_replace{'version'} = $VERSION;
- unless ($p_visited) {
- my %hash = ();
- $p_visited = \%hash;
- }
- my $text = '';
- if (($p_cache) and ('HASH' eq ref($p_cache)) and (exists($$p_cache{$file}))) {
- $text = $$p_cache{$file};
- }
- else {
- my $fullfile = '';
- my $base = "templates/$language/";
- my $max_parents = 12;
- for (0..$max_parents) {
- $fullfile = $base . ('../' x $_) . $file;
- $fullfile =~ s!/+!/!g;
- last if (-e $fullfile);
- }
- unless (-e $fullfile) {
- $err = "unable to find file '$file'";
- next Err;
- }
- if ($fullfile =~ m!([^\\|/]+)$!) {
- $$p_visited{$1}++;
- }
- ($err, $text) = &ReadFileL($fullfile);
- next Err if ($err);
- if (($p_cache) and ('HASH' eq ref($p_cache))) {
- $$p_cache{$file} = $text;
- }
- }
- #conditionals
- foreach (reverse sort keys %$p_replace) {
- next unless (defined($_));
- $$p_replace{$_} = '' if (not defined($$p_replace{$_}));
- if ($$p_replace{$_}) {
- # true
- $text =~ s!<%\s*if\s+$_\s*%\>(.*?)<%\s*end\s*if\s*%>!$1!isg;
- $text =~ s!<%\s*(if\s+not|unless)\s+$_\s*%>.*?<%\s*end\s*if\s*%>!!isg;
- }
- else {
- # false
- $text =~ s!<%\s*if\s+$_\s*%>.*?<%\s*end\s*if\s*%>!!isg;
- $text =~ s!<%\s*(if\s+not|unless)\s+$_\s*%>(.*?)<%\s*end\s*if\s*%>!$2!isg;
- }
- }
- foreach (reverse sort keys %$p_replace) {
- #revcompat
- $text =~ s!\$$_!$$p_replace{$_}!isg;
- $text =~ s!\_\_$_\_\_!$$p_replace{$_}!isg;
- #/revcompat
- $text =~ s!\%$_\%!$$p_replace{$_}!isg;
- }
- my $pattern = '<!--#(include file|include virtual|echo var)=\"(.*?)\" -->';
- while ($text =~ m!^(.*?)$pattern(.*)$!is) {
- my ($start, $c1, $incfile, $end) = ($1, lc($2), $3, $4);
- if ($b_return_as_string) {
- $return_text .= $start;
- }
- else {
- print $start;
- }
- if ($c1 eq 'echo var') {
- my $var = uc($incfile);
- my $vardata = '';
- if ($var eq 'DATE_GMT') {
- $vardata = scalar gmtime();
- }
- elsif ($var eq 'DATE_LOCAL') {
- $vardata = scalar localtime();
- }
- elsif ($var eq 'DOCUMENT_NAME') {
- $vardata = $1 if ($0 =~ m!([^\\|/]+)$!);
- }
- elsif ($var eq 'DOCUMENT_URI') {
- $vardata = &query_env('SCRIPT_NAME');
- }
- elsif ($var eq 'LAST_MODIFIED') {
- $vardata = scalar localtime( (stat($0))[9] );
- }
- elsif (defined($ENV{$var})) {
- $vardata = &query_env($var);
- }
- if ($b_return_as_string) {
- $return_text .= $vardata;
- }
- else {
- print $vardata;
- }
- }
- else {
- my $basefile = $incfile;
- if ($incfile =~ m!.*(\\|/)(.*?)$!) {
- $basefile = $2;
- }
- my $outstr = '';
- # Do we have a file extension?
- if ($basefile !~ m!\.(txt|htm|html|shtml|stm|inc)$!i) {
- $outstr = "<!-- FDSE: not including file '$incfile' because does not have a text/html file extension -->";
- }
- elsif ($$p_visited{$basefile}) {
- $outstr = "<!-- FDSE: loop avoidance: already parsed file '$basefile' -->";
- }
- else {
- $$p_visited{$basefile}++;
- $outstr .= &PrintTemplate( $b_return_as_string, $incfile, $language, $p_replace, $p_visited );
- }
- if ($b_return_as_string) {
- $return_text .= $outstr;
- }
- else {
- print $outstr;
- }
- }
- $text = $end;
- }
- if ($b_return_as_string) {
- $return_text .= $text;
- }
- else {
- print $text;
- }
- last Err;
- }
- continue {
- if ($b_return_as_string) {
- $return_text .= &pstr(64,$err);
- }
- else {
- &ppstr(64,$err);
- }
- }
- return $return_text;
- }
- sub ReadInput {
- # Initialize:
- %FORM = ();
- my @Pairs = ();
- if (&query_env('REQUEST_METHOD') eq 'POST') {
- my $buffer = '';
- read(STDIN, $buffer, &query_env('CONTENT_LENGTH',0));
- &untaintme(\$buffer);
- @Pairs = split(m!\&!, $buffer);
- }
- elsif ($ENV{'QUERY_STRING'}) {
- @Pairs = split(m!\&!, &query_env('QUERY_STRING'));
- }
- else {
- @Pairs = @ARGV;
- }
- #changed 0054 - support for multi-select
- my ($name, $value);
- foreach (@Pairs) {
- next unless (m!^(.*?)=(.*)$!);
- ($name, $value) = &ud($1,$2);
- if (defined($FORM{$name})) {
- # multi
- $FORM{$name} .= ",$value";
- }
- else {
- $FORM{$name} = $value;
- }
- }
- #changed 0053 - support for undefined-alt-value
- foreach (keys %FORM) {
- next unless (m!^(.*)_udav$!);
- next if (defined($FORM{$1}));
- $FORM{$1} = $FORM{$_};
- }
- $FORM{'Mode'} = '' if (not (defined($FORM{'Mode'})));
- }
- sub db_exec {
- my ($statement) = @_;
- my $dbh = undef();
- my $sth = undef();
- my $err = '';
- Err: {
- $err = &get_dbh(\$dbh);
- next Err if ($err);
- unless ($sth = $dbh->prepare($statement)) {
- $err = $str[45] . ' ' . $dbh->errstr();
- next Err;
- }
- unless ($sth->execute()) {
- $err = $str[29] . ' ' . $sth->errstr();
- next Err;
- }
- }
- $sth->finish() if ($sth);
- $dbh->disconnect() if ($dbh);
- return $err;
- }
- sub get_dbh {
- my ($ref_dbh) = @_;
- my $err = '';
- Err: {
- foreach ('database', 'hostname', 'username', 'password') {
- my $var = "sql: $_";
- unless ($Rules{$var}) {
- $err = &pstr(21, $var );
- next Err;
- }
- }
- # load the DBI
- my %rq_mods = (
- 'DBI' => 0.9,
- 'DBD::mysql' => 1,
- );
- my $mod = ();
- foreach $mod ('DBI', 'DBD::mysql') {
- my $dbiver = 0;
- my $code = 'use ' . $mod . '; $dbiver = $' . $mod . '::VERSION; ';
- eval $code;
- if ($@) {
- $err = &pstr(22, $mod, $@ );
- undef($@);
- next Err;
- }
- elsif ($dbiver < $rq_mods{$mod}) {
- $err = &pstr(23, $mod, $dbiver, $rq_mods{$mod} );
- next Err;
- }
- }
- $$ref_dbh = DBI->connect("DBI:mysql:$Rules{'sql: database'}:$Rules{'sql: hostname'}", $Rules{'sql: username'}, $Rules{'sql: password'});
- unless ($$ref_dbh) {
- my $dberr = '';
- $err = $str[20];
- eval '$dberr = DBI->errstr();';
- if ($@) {
- # well, some old DBI versions don't support DBI->errstr()
- undef($@);
- }
- else {
- $err .= ' - ' . $dberr;
- }
- next Err;
- }
- }
- return $err;
- }
- sub Trim {
- local $_ = defined($_[0]) ? $_[0] : '';
- s!^[\r\n\s]+!!o;
- s![\r\n\s]+$!!o;
- return $_;
- }
- sub url_encode {
- local $_ = defined($_[0]) ? $_[0] : '';
- s!([^a-zA-Z0-9_.-])!uc(sprintf("%%%02x", ord($1)))!eg;
- return $_;
- }
- sub ud {
- my @out = @_;
- local $_;
- foreach (@out) {
- next unless (defined($_));
- tr!+! !;
- s!\%([a-fA-F0-9][a-fA-F0-9])!pack('C', hex($1))!eg;
- }
- if ((wantarray) or ($#out > 0)) {
- return @out;
- }
- else {
- return $out[0];
- }
- }
- sub ReadFile {
- my ($file) = @_;
- my ($err, $text) = ('', '');
- Err: {
- my ($BytesToRead, $BytesRead, $obj, $p_rhandle) = (-s $file);
- last Err unless ($BytesToRead);
- $obj = &LockFile_new();
- ($err, $p_rhandle) = $obj->Read($file);
- next Err if ($err);
- $BytesRead = read($$p_rhandle, $text, $BytesToRead);
- $err = $obj->Close();
- next Err if ($err);
- unless ($BytesRead == $BytesToRead) {
- $err = &pstr(47, $file, $BytesRead, $BytesToRead );
- next Err;
- }
- }
- return ($err, $text);
- }
- sub ReadFileL {
- my ($file) = @_;
- my ($err,$text) = ('','');
- Err: {
- unless (open(FILE, "<$file")) {
- $err = &pstr(44,$file,$!);
- next Err;
- }
- unless (binmode(FILE)) {
- $err = &pstr(39,$file,$!);
- next Err;
- }
- $text = join('',<FILE>);
- }
- close(FILE);
- return ($err,$text);
- }
- sub log_search {
- my ($realm, $terms, $rank, $documents_found, $documents_searched) = @_;
- my $err = '';
- Err: {
- last unless ($Rules{'logging: enable'});
- $terms = &he( $terms );
- #changed 0058
- if ($realm eq 'include-by-name') {
- my @realms = ();
- foreach (keys %FORM) {
- next unless (m!^Realm:(.+)$!);
- push(@realms, $1);
- }
- $realm = join('|',sort @realms);
- }
- my $host = &query_env('REMOTE_HOST') || $private{'visitor_ip_addr'} || 'undefined';
- my $time = time();
- my $human_time = &FormatDateTime( $time, 14, 0 );
- if ($Rules{'sql: logfile'}) {
- $terms =~ s!\'!\'\'!g;
- my $query = "INSERT INTO $Rules{'sql: table name: logs'} (visitor_ip, unix_time, human_time, realm, terms, rank, documents_found, documents_searched) VALUES ('$host', $time, now(), '$realm', '$terms', $rank, $documents_found, $documents_searched)";
- $err = &db_exec($query);
- next Err if ($err);
- }
- else {
- my $lang = $Rules{'language'};
- $lang =~ s!\,|\r|\n|\015|\012!!sg;
- my @fields = ($host,$time,$human_time,$realm,$terms,$rank,$documents_found,$documents_searched,$lang);
- #validate/cleanse all fields so as not to corrupt CSV
- foreach (@fields) {
- s!(\,|\s|\r|\n|\015|\012|\")+! !sg;
- }
- my $logline = join(',', @fields) . ",\n";
- $logline =~ s!^(.+?)\,(.*)!$1 ,$2!; # insert space before first comma
- unless (open(LOGFILE, ">>search.log.txt")) {
- $err = &pstr(42,'search.log.txt',$!);
- next Err;
- }
- binmode(LOGFILE);
- print LOGFILE $logline;
- close(LOGFILE);
- chmod($private{'file_mask'},'search.log.txt');
- }
- eval {
- DBMLog: {
- last DBMLog unless ($Rules{'use dbm routines'});
- if (length($terms) > 64) {# prevent overflow in dbm key-value len
- $terms = substr($terms,0,64);
- }
- my (%str_all, %str_t20) = ();
- last DBMLog unless (dbmopen( %str_all, 'dbm_strlog_all', 0666 ));
- my $total = ++$str_all{$terms};
- #maxval
- if (not defined($str_all{'+++'})) {
- $str_all{'+++'} = $total;
- }
- elsif ($total > $str_all{'+++'}) {
- $str_all{'+++'} = $total;
- }
- $str_all{'++'} = time() unless ($str_all{'++'});
- $str_all{'+'} = $str_all{'+'} || 0; # boundary
- last unless ($total >= $str_all{'+'});
- last DBMLog unless ($Rules{'logging: display most popular'});
- dbmopen( %str_t20, 'dbm_strlog_top', 0666 ) || die &pstr( 43, 'dbm_strlog_top', $! );
- $str_t20{'++'} = time() unless ($str_t20{'++'});
- $str_t20{$terms} = $total;
- my $maxval = 0;
- my $count = 0;
- foreach (sort { $str_t20{$b} <=> $str_t20{$a} || $a cmp $b } keys %str_t20) {
- next if (m!^\++$!);
- $count++;
- if ($count > $Rules{'logging: display most popular'}) {
- delete $str_t20{$_};
- }
- else {
- if ($str_t20{$_} > $maxval) {
- $maxval = $str_t20{$_};
- }
- $str_all{'+'} = $str_t20{$_};
- }
- }
- if ($count < $Rules{'logging: display most popular'}) {
- $str_all{'+'} = 0;
- }
- #maxval
- if (not defined($str_t20{'+++'})) {
- $str_t20{'+++'} = $maxval;
- }
- elsif ($maxval > $str_t20{'+++'}) {
- $str_t20{'+++'} = $maxval;
- }
- }
- };
- if ($@) {
- &ppstr(53, &pstr(67, &he($@), "$const{'help_file'}1169.html" ) );
- }
- }
- return $err;
- }
- sub FormatNumber {
- my ( $expression, $decimal_places, $include_leading_digit, $use_parens_for_negative, $group_digits, $euro_style ) = @_;
- my $dec_ch = ($euro_style) ? ',' : '.';
- my $tho_ch = ($euro_style) ? '.' : ',';
- my $qm_dec_ch = quotemeta( $dec_ch );
- local $_ = $expression;
- unless (m!^\-?\d*\.?\d*$!) {
- #print "Warning: arg '$num' isn't numeric.\n";
- $_ = 0;
- }
- my $exp = 1;
- for (1..$decimal_places) {
- $exp *= 10;
- }
- $_ *= $exp;
- $_ = int($_);
- $_ = ($_ / $exp);
- # Add a trailing decimal divider if we don't have one yet
- $_ .= '.' unless (m!\.!);
- # Pad zero'es if appropriate:
- if ($decimal_places) {
- if (m!^(.*)\.(.*)$!) {
- $_ .= '0' x ($decimal_places - length($2));
- }
- }
- # Re-write with localized decimal divider:
- s!\.!$dec_ch!o;
- # Group digits:
- if ($group_digits) {
- while (m!(.*)(\d)(\d\d\d)(\,|\.)(.*)!) {
- $_ = "$1$2$tho_ch$3$4$5";
- }
- }
- if ($include_leading_digit) {
- s!^$qm_dec_ch!0$dec_ch!o;
- }
- # Have we somehow ended up with just a decimal point? Make it zero then:
- if ("foo$_" eq "foo$dec_ch") {
- $_ = "0";
- }
- # Strip trailing decimal point
- s!$qm_dec_ch$!!o;
- if ($use_parens_for_negative) {
- s!^\-(.*)$!\($1\)!o;
- }
- return $_;
- }
- sub FormatDateTime {
- my ($time, $format_type, $b_format_as_gmt) = @_;
- $format_type = 0 unless ($format_type);
- my $date_str = '';
- $time = 0 unless ($time);
- if ($format_type == 13) {
- if ($b_format_as_gmt) {
- $date_str = scalar gmtime( $time );
- }
- else {
- $date_str = scalar localtime( $time );
- }
- }
- else {
- my ($sec, $min, $milhour, $day, $month_index, $year, $weekday_index) = ($b_format_as_gmt) ? gmtime( $time ) : localtime( $time );
- $year += 1900;
- my $ampm = ( $milhour >= 12 ) ? 'PM' : 'AM';
- my $relhour = (($milhour - 1) % 12) + 1;
- my $month = $month_index + 1;
- foreach ($milhour, $relhour, $min, $sec, $month, $day) {
- $_ = "0$_" if (1 == length($_));
- }
- my @MonthNames = (
- $str[8],
- $str[9],
- $str[26],
- $str[32],
- $str[40],
- $str[48],
- $str[36],
- $str[34],
- $str[33],
- $str[31],
- $str[30],
- $str[27],
- );
- my @WeekNames = (
- $str[25],
- $str[24],
- $str[28],
- $str[7],
- $str[6],
- $str[5],
- $str[66],
- );
- my $full_weekday = $WeekNames[$weekday_index];
- my $short_weekday = substr($full_weekday, 0, 3);
- my $full_monthname = $MonthNames[$month_index];
- my $short_monthname = substr($full_monthname, 0, 3); #localize bug?
- if ($format_type == 0) {
- $date_str = "$month/$day/$year $relhour:$min:$sec $ampm";
- }
- elsif ($format_type == 1) {
- $date_str = "$full_weekday, $full_monthname $day, $year";
- }
- elsif ($format_type == 2) {
- $date_str = "$month/$day/$year";
- }
- elsif ($format_type == 3) {
- $date_str = "$relhour:$min:$sec $ampm";
- }
- elsif ($format_type == 4) {
- $date_str = "$milhour:$min";
- }
- elsif ($format_type == 10) {
- $date_str = "$short_weekday $month/$day/$year $relhour:$min:$sec $ampm";
- }
- elsif ($format_type == 11) {
- $date_str = "$short_weekday, $day $short_monthname $year $milhour:$min:$sec -0000";
- }
- elsif ($format_type == 12) {
- $date_str = "$year-$month-$day $milhour:$min:$sec";
- }
- elsif ($format_type == 14) {
- $date_str = "$month/$day/$year $milhour:$min";
- }
- }
- return $date_str;
- }
- sub SetDefaults {
- my ($text, $p_params) = @_;
- # short-circuit:
- if ((ref($p_params) ne 'HASH') or (not (%$p_params))) {
- return $text;
- }
- my @array = split(m!<(INPUT|SELECT|TEXTAREA)([^\>]+?)\>!is, $text);
- my $finaltext = $array[0];
- my $setval;
- my $x = 1;
- for ($x = 1; $x < $#array; $x += 3) {
- my ($uctag, $origtag, $attribs, $trail) = (uc($array[$x]), $array[$x], $array[$x+1] || '', $array[$x+2] || '');
- Tweak: {
- my $tag_name = '';
- if ($attribs =~ m! NAME\s*=\s*\"([^\"]+?)\"!is) {
- $tag_name = $1;
- }
- elsif ($attribs =~ m! NAME\s*=\s*(\S+)!is) {
- $tag_name = $1;
- }
- else {
- # we cannot modify what we do not understand:
- last Tweak;
- }
- last Tweak unless (defined($$p_params{$tag_name}));
- $setval = &he($$p_params{$tag_name});
- if ($uctag eq 'INPUT') {
- # discover VALUE and TYPE
- my $type = 'TEXT';
- if ($attribs =~ m! TYPE\s*=\s*\"([^\"]+?)\"!is) {
- $type = uc($1);
- }
- elsif ($attribs =~ m! TYPE\s*=\s*(\S+)!is) {
- $type = uc($1);
- }
- # discover VALUE and TYPE
- my $value = '';
- if ($attribs =~ m! VALUE\s*=\s*\"([^\"]+?)\"!is) {
- $value = $1;
- }
- elsif ($attribs =~ m! VALUE\s*=\s*(\S+)!is) {
- $value = $1;
- }
- # we can only set values for known types:
- if (($type eq 'RADIO') or ($type eq 'CHECKBOX')) {
- #changed 2001-11-15; strip pre-existing checks
- $attribs =~ s! (checked="checked"|checked)($| )!$2!ois;
- if ($setval eq $value) {
- $attribs = qq! checked="checked"$attribs!;
- }
- }
- elsif (($type eq 'TEXT') or ($type eq 'PASSWORD') or ($type eq 'HIDDEN')) {
- # but only hidden fields if value is null:
- last Tweak if (($type eq 'HIDDEN') and ($value ne ''));
- # replace any existing VALUE tag:
- my $qm_value = quotemeta($value);
- $attribs =~ s! value\s*=\s*\"$qm_value\"! value="$setval"!iso;
- $attribs =~ s! value\s*=\s*$qm_value! value="$setval"!iso;
- # add the tag if it's not present (i.e. if no VALUE was present in original tag)
- my $qm_setval = quotemeta($setval);
- unless ($attribs =~ m! VALUE="$qm_setval"!is) {
- $attribs = " value=\"$setval\"$attribs";
- }
- }
- }
- elsif ($uctag eq 'SELECT') {
- # does not support <OPTION>value syntax, only <OPTION VALUE="value">value
- my $lc_set_value = lc($setval);
- my @frags = ();
- foreach (split(m!<option!is, $trail)) {
- #changed 2001-11-15; strip pre-existing "selected"
- $_ =~ s! (selected|selected="selected")($| )!$2!ois;
- if (m!VALUE\s*=\s*\"(.*?)\"!is) {
- if ($lc_set_value eq lc($1)) {
- $_ = ' selected="selected"' . $_;
- }
- }
- elsif (m!VALUE\s*=\s*(\S+)!is) {
- if ($lc_set_value eq lc($1)) {
- $_ = ' selected="selected"' . $_;
- }
- }
- push(@frags, $_);
- }
- $trail = join('<option', @frags);
- }
- elsif ($uctag eq 'TEXTAREA') {
- $trail =~ s!^.*?</(textarea)>!$setval</$1>!osi;
- }
- last Tweak;
- }
- $finaltext .= "<$origtag$attribs>$trail";
- }
- return $finaltext;
- }
- sub SearchIndexFile {
- my $err = '';
- Err: {
- local $_;
- my ($file, $search_code, $r_pages_searched, $r_hits) = @_;
- my ($obj, $p_rhandle) = ();
- $obj = &LockFile_new();
- ($err, $p_rhandle) = $obj->Read( $file );
- next Err if ($err);
- eval($search_code);
- die $@ if ($@);
- $err = $obj->Close();
- next Err if ($err);
- last Err;
- }
- continue {
- &ppstr(64,$err);
- }
- }
- sub SearchDatabase {
- local $_;
- my $dbh = undef();
- my $sth = undef();
- my ($where_clause, $DocSearch, $r_hits) = @_;
- my @WordCount = ();
- my $pages_searched = 0;
- my $r_pages_searched = \$pages_searched;
- my ($WordMatches, $sort_num, $u, $t, $d, $k, $hdr, $n_context_matches, $context_str, $delta, $text);
- my $err = '';
- Err: {
- my $query = "SELECT * FROM $Rules{'sql: table name: addresses'}";
- if ($where_clause) {
- $query .= ' WHERE ' . $where_clause;
- }
- $err = &get_dbh(\$dbh);
- next Err if ($err);
- unless ($sth = $dbh->prepare($query)) {
- $err = $str[45] . ' ' . $dbh->errstr();
- next Err;
- }
- unless ($sth->execute()) {
- $err = $str[29] . ' ' . $sth->errstr();
- next Err;
- }
- undef($@);
- my $p_data = ();
- while ($p_data = $sth->fetchrow_hashref()) {
- ($err, $_) = &text_record_from_hash( $p_data );
- next if ($err);
- eval($DocSearch);
- die($@) if ($@);
- }
- last Err;
- }
- continue {
- &ppstr(64,$err);
- }
- $sth->finish() if ($sth);
- $dbh->disconnect() if ($dbh);
- }
- sub leadpad {
- my ($expr, $padch, $padlen) = @_;
- if (length($expr) <= $padlen) {
- return ($padch x ($padlen - length($expr))) . $expr;
- }
- else {
- return substr($expr, length($expr) - $padlen, 6);
- }
- }
- sub text_record_from_hash {
- my ($p_pagedata) = @_;
- my ($err, $text_record) = ('', '');
- Err: {
- my @require_fields = ('url', 'promote', 'size', 'title', 'description', 'keywords', 'text', 'links');
- foreach (@require_fields) {
- next if (defined($$p_pagedata{$_}));
- $err = &pstr(21,$_);
- next Err;
- }
- &compress_hash( $p_pagedata );
- $text_record = '';
- foreach ('promote', 'dd', 'mm') {
- $text_record .= &leadpad( $$p_pagedata{$_}, '0', 2 );
- }
- #changed 0053 - not longer forcing size to be 6 digits
- $text_record .= $$p_pagedata{'yyyy'} . $$p_pagedata{'size'};
- foreach ('url', 'title', 'description') {
- $$p_pagedata{$_} =~ s'= '=%20'og;
- }
- $text_record .= ' ' . $$p_pagedata{'lastmodtime'};
- $text_record .= ' ' . $$p_pagedata{'lastindex'};
- $text_record .= ' u= ' . $$p_pagedata{'url'};
- $text_record .= ' t= ' . $$p_pagedata{'title'};
- $text_record .= ' d= ' . $$p_pagedata{'description'};
- $text_record .= ' uM=' . $$p_pagedata{'um'};
- $text_record .= 'uT=' . $$p_pagedata{'ut'};
- $text_record .= 'uD=' . $$p_pagedata{'ud'};
- $text_record .= 'uK=' . $$p_pagedata{'uk'};
- $text_record .= 'h=' . $$p_pagedata{'text'};
- $text_record .= 'l=' . $$p_pagedata{'links'};
- $text_record .= "\n";
- last Err;
- }
- return ($err, $text_record);
- }
- sub compress_hash {
- my ($p_pagedata) = @_;
- return if ($$p_pagedata{'compressed'});
- # Solidify time fields:
- foreach ('lastindex', 'lastmodtime') {
- $$p_pagedata{$_} = time() unless ($$p_pagedata{$_});
- }
- unless (($$p_pagedata{'dd'}) and ($$p_pagedata{'mm'}) and ($$p_pagedata{'yyyy'})) {
- ($$p_pagedata{'dd'}, $$p_pagedata{'mm'}, $$p_pagedata{'yyyy'}) = (localtime($$p_pagedata{'lastmodtime'}))[3..5];
- $$p_pagedata{'yyyy'} += 1900;
- }
- my %pairs = (
- 'um' => 'url',
- 'ut' => 'title',
- 'ud' => 'description',
- 'uk' => 'keywords',
- 'text' => 'text',
- 'links' => 'links',
- );
- my ($name, $value) = ();
- while (($name, $value) = each %pairs) {
- $$p_pagedata{$name} = &CompressStrip($$p_pagedata{$value});
- }
- $$p_pagedata{'compressed'} = 1;
- }
- sub StandardVersion {
- my ($p_search_terms, %pagedata) = @_;
- local $_;
- foreach ('redirector', 'relevance', 'record_realm', 'context') {
- $pagedata{$_} = '' unless (defined($pagedata{$_}));
- }
- unless ((defined($pagedata{'dd'})) and (defined($pagedata{'mm'})) and (defined($pagedata{'yyyy'}))) {
- if ($pagedata{'lastindex'}) {
- ($pagedata{'dd'}, $pagedata{'mm'}, $pagedata{'yyyy'}) = (localtime($pagedata{'lastmodtime'}))[3..5];
- $pagedata{'yyyy'} += 1900;
- }
- }
- $pagedata{'day'} = $pagedata{'dd'};
- $pagedata{'month'} = ('Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec')[$pagedata{'mm'}];
- $pagedata{'year'} = $pagedata{'yyyy'};
- #changed 0056
- $pagedata{'size'} = &FormatNumber( ($pagedata{'size'} + 1023) / 1024, 0, 1, 0, 1, $Rules{'ui: number format'} ) . 'KB';
- if ($p_search_terms) {
- # use two marker chars for start-pattern and end-pattern
- # marker chars are those that are guaranteed to be stripped
- my $sm1 = chr(10);
- my $em1 = chr(13);
- my $sm2 = chr(7);
- my $em2 = chr(8);
- my $Term = '';
- foreach $Term (@$p_search_terms) {
- $Term =~ s!\*!!g; #changed 0054
- my $Temp = quotemeta(&Trim($Term)); #changed 0046
- next if ($Temp =~ m!$sm1|$em1|$sm2|$em2!s);
- $pagedata{'description'} =~ s!($Temp)!$sm1$1$em1!isg;
- $pagedata{'context'} =~ s!($Temp)!$sm2$1$em2!isg;
- }
- $pagedata{'description'} =~ s!$sm1!<b class="hl1">!sg;
- $pagedata{'description'} =~ s!$em1!</b>!sg;
- $pagedata{'context'} =~ s!$sm2!<b class="hl2">!sg;
- $pagedata{'context'} =~ s!$em2!</b>!sg;
- }
- if ($pagedata{'context'}) {
- $pagedata{'context_line'} = "<br /><b>$str[35]:</b> $pagedata{'context'}";
- }
- else {
- $pagedata{'context_line'} = '';
- }
- $pagedata{'admin_options'} = '' unless (defined($pagedata{'admin_options'}));
- $pagedata{'url'} = &rewrite_url( 1, $pagedata{'url'} );
- if ($pagedata{'url'} =~ m!^\w+\://([^/]+)!) {
- $pagedata{'host'} = $1;
- }
- #revcompat - 0033
- $pagedata{'target'} = '';
- #/revcompat
- #changed 0050
- $pagedata{'url_terms'} = &url_encode($const{'terms'});
- $pagedata{'url_url'} = &url_encode($pagedata{'url'});
- $pagedata{'html_url'} = $pagedata{'url'} = &he($pagedata{'url'});
- #changed 0053 - all const avail
- my ($n,$v);
- while (($n,$v) = each %const) {
- $pagedata{$n} = $const{$n} unless defined($pagedata{$n});
- }
- return &PrintTemplate( 1, 'line_listing.txt', $Rules{'language'}, \%pagedata, 0, \%const);
- }
- sub str_jumptext {
- my ( $start_pos, $units_per_page, $maximum, $url, $b_is_exact_count ) = @_;
- $start_pos = 1 if ($start_pos < 1);
- my $end_pos = $start_pos + $units_per_page - 1;
- unless ($b_is_exact_count) {
- $b_is_exact_count = 1 if ($maximum < $end_pos);
- }
- $end_pos = $maximum if ($maximum < $end_pos);
- my ($jump_sum, $jumptext) = ('', '');
- if ($b_is_exact_count) {
- $jump_sum = &pstr(15, $start_pos, $end_pos, $maximum );
- }
- else {
- $jump_sum = &pstr(15, $start_pos, $end_pos, $end_pos . '+' );
- # Okay, we've printed what we know. Now, for purposes of generating advance links, pretend that there's at least one page beyond this one (we know that if max < curr+units then we would have toggled to b_is_exact_count earlier. and if max already exceeds this page's worth fo data, then there's no need to tweak it:
- if ($maximum == $end_pos) {
- $maximum++;
- }
- }
- if ($maximum > $units_per_page) {
- # Time for a scrolling thing - "<- Previous 1 2 3 4 5 Next ->"
- $jumptext .= '<p class="fd_results">';
- $jumptext .= $str[16];
- $jumptext .= ' ';
- if ($start_pos > 1) {
- $jumptext .= "[ <a href=\"$url" . ($start_pos - $units_per_page) . "\"><< $str[17]</a> ] ";
- }
- my $nlinks = 1 + int(($maximum - 1) / $units_per_page);
- my $thislink = 1 + int($start_pos / $units_per_page);
- my $start = 1;
- if ($thislink > 15) {
- $start = $thislink - 15;
- }
- my $x = 0;
- for ($x = $start; $x <= $nlinks; $x++) {
- if ($x == $thislink) {
- $jumptext .= " <b>$x</b>";
- }
- else {
- $jumptext .= " <a href=\"$url" . (1 + (($x - 1) * $units_per_page)) . "\">$x</a>\n";
- }
- last if ($x > ($start + 18));
- }
- if ($maximum > $end_pos) {
- $jumptext .= " [ <a href=\"$url" . ($start_pos + $units_per_page) . "\">$str[18] >></a> ]";
- }
- $jumptext .= "</p>\n";
- }
- return ('<p class="fd_results">' . $jump_sum . '</p>', $jumptext);#changed 0054 - para
- }
- sub Assert {
- return if ($_[0]);
- my ($package, $file, $line) = caller();
- print "Content-Type: text/html\015\012\015\012";
- print "<hr /><h1><pre>Assertion Error:<br /> Package: $package<br /> File: $file<br /> Line: $line</pre></h1><hr />";
- }
- sub LoadRules {
- my ($DEFAULT_LANGUAGE) = @_;
- my $err = '';
- Err: {
- %Rules = ();
- my $FDR = &FD_Rules_new();
- $Rules{'file'} = $FDR->{'file'};
- my $text = '';
- ($err, $text) = &ReadFile($Rules{'file'});
- next Err if ($err);
- my $line = '';
- foreach $line (split(m!\r|\n!s, $text)) {
- next if ($line =~ m!^\s*\#!); # skip comments
- next unless ($line =~ m!(.*?)=(.*)!);
- my ($name, $value) = (lc(&Trim($1)), &Trim($2));
- my ($is_valid, $valid_value) = $FDR->_fdr_validate($name, $value);
- $Rules{$name} = $valid_value;
- }
- #revcompat pre 0056
- if ((($Rules{'allow index entire site'}) or ($Rules{'allow filtered realms'})) and (not defined($Rules{'show advanced commands'}))) {
- $Rules{'show advanced commands'} = 1;
- }
- #/revcompat
- my $r_defaults = $FDR->{'r_defaults'};
- if (($r_defaults) and ('HASH' eq ref($r_defaults))) {
- my %defhash = %$r_defaults;
- local $_;
- while (defined($_ = each %defhash)) {
- next if defined($Rules{$_});
- if ($_ eq 'language') {
- $Rules{$_} = $DEFAULT_LANGUAGE;
- next;
- }
- $Rules{$_} = $defhash{$_}[0];
- }
- }
- # build derived values:
- if ($Rules{'admin notify: sendmail program'}) {
- my $b_is_valid = 0;
- foreach (@sendmail) {
- $b_is_valid = 1 if ($_ eq $Rules{'admin notify: sendmail program'});
- }
- unless ($b_is_valid) {
- $Rules{'admin notify: sendmail program'} = '';
- }
- }
- foreach ('wildcard match','ignore words') {
- next unless ($Rules{$_});
- $Rules{$_} =~ s!\?\{!!sg; # strip code-exec regex
- }
- my %NewWords = ();
- foreach (split(m!\s+!s, &RawTranslate($Rules{'ignore words'}))) {
- $NewWords{quotemeta($_)}++;
- }
- my $frag = join('|', sort keys %NewWords);
- $frag =~ s!^\|!!;
- $frag =~ s!\|$!!;
- $const{'code_strip_ignored_words'} = "s! ($frag) ! !sog;";
- my @ignored_extensions = split(m!\s+!, $Rules{'crawler: ignore links to'});
- if (@ignored_extensions) {
- my %ig_ext = ();
- foreach (@ignored_extensions) {
- $ig_ext{quotemeta(lc($_))}++;
- }
- $const{'pattern_is_ignored_extension'} = '\.(' . join('|', sort keys %ig_ext) . ')$';
- }
- else {
- $const{'pattern_is_ignored_extension'} = '';
- }
- }
- return $err;
- }
- sub str_search_form {
- my ($action) = @_;
- my %replace = %const;
- #revcompat - 0032
- $replace{'displayterms'} = '';
- $replace{'selectmatch'} = '<select name="match"><option value="1">All</option><option value="0">Any</option></select>';
- #/revcompat
- $replace{'realm_options'} = '';
- $replace{'selectrealm'} = '<select name="Realm"><option value="All">[ All ]</option>';
- my $p_realm = ();
- foreach $p_realm ($realms->listrealms('no_error')) {
- $replace{'selectrealm'}.= "\t<option value=\"$$p_realm{'html_name'}\">$$p_realm{'html_name'}</option>\n";
- $replace{'realm_options'} .= "\t<option value=\"$$p_realm{'html_name'}\">$$p_realm{'html_name'}</option>\n";
- }
- $replace{'selectrealm'} .= '</select>';
- my $html = qq!<form method="get" action="$action">\n!;
- my $custom = &PrintTemplate( 1, 'searchform.htm', $Rules{'language'}, \%replace );
- local $_;
- foreach (keys %FORM) {
- next unless (m!^p:!);
- my $qm_n = quotemeta($_);
- next if ($custom =~ m!$qm_n!s); # if user already has something like "p:pm" in their custom search form, don't risk double-ing up with a hidden field
- my ($n, $v) = &he( $_, $FORM{$_} );
- $html .= qq!<input type="hidden" name="$n" value="$v" />\n!;
- }
- $html .= $custom;
- $html .= "\n</form>\n";
- my %defaults = %FORM;
- $defaults{'Terms'} = $FORM{'Terms'} || '';
- $defaults{'p:ssm'} = defined($FORM{'p:ssm'}) ? $FORM{'p:ssm'} : $Rules{'default substring match'};
- if ($defaults{'Terms'} eq '') {
- $defaults{'Terms'} = $Rules{'default search terms'};
- }
- unless ($defaults{'Realm'}) {
- $defaults{'Realm'} = 'All';
- }
- return &SetDefaults($html,\%defaults);
- }
- sub parse_search_terms {
- my ($str_terms, $str_match, $b_substring_match) = @_;
- my ($bTermsExist, $Ignored_Terms, $Important_Terms, $DocSearch, $RealmSearch, $where_clause, $sort_method, @search_terms) = (0, '', '', '', '', '', 0);
- # use match==2 to force search-as-phrase
- if ($str_match eq '2') {
- $str_terms =~ s!\s+!_!sg;
- }
- my $terms = $str_terms;
- my $IgnoreQuotedTerms = 0;
- unless ($str_match) {
- # if this is a non-special string - without meta characters, but containing a phrase - addx a special phrased version
- # of the terms for better matching:
- if (($terms =~ m! !) and not ($terms =~ m!(\W|\-|not |and |or )!i)) {
- $terms = "\"$terms\" $terms";
- $IgnoreQuotedTerms = 1;
- }
- }
- $terms = ' '.$terms.' ';
- $terms =~ s'\s+' 'g;
- # changed 0056 - support "title: keyword" w/ space
- $terms =~ s! (url|host|domain|title|text|link)\: (\S)! $1:$2!ig;
- my ($i, $ProcTerms) = (0, '');
- foreach (split(m!\"!, $terms)) {
- tr! !_! if $i;
- $i = not $i;
- $ProcTerms .= $_;
- }
- $ProcTerms =~ s' not ' -'ig;
- $ProcTerms =~ s' and ' +'ig;
- $ProcTerms =~ s' or ' |'ig;
- my ($EvalForbid, $EvalRequired, $EvalOptional, $EvalExtraRequired, $EvalExtraOptional) = ('', '', '', '', '');
- my $tm = $Rules{'multiplier: title'};
- my $um = $Rules{'multiplier: url'};
- my $km = $Rules{'multiplier: keyword'};
- my $dm = $Rules{'multiplier: description'};
- my (@invalid_terms, @valid_terms) = ();
- my $default_type = ($str_match) ? 3 : 2;
- my $chars_per_context_hit = 36;
- my $str_context_hit_before = '';
- my $str_context_hit_after = '... ';
- my (@required_clauses, @optional_clauses) = ();
- Term: foreach (split(m!\s+!, $ProcTerms)) {
- # Remove the underscores that are binding the phrases together:
- s!_! !g;
- next unless ($_);
- my ($type, $is_attrib_search, $str_pattern, $sql_clause) = &format_term_ex($_, $default_type, $b_substring_match);
- if ($type == 0) {
- push(@invalid_terms, $_);
- next;
- }
- if ($sql_clause) {
- if ($type == 2) {
- push(@optional_clauses, $sql_clause);
- }
- else {
- push(@required_clauses, $sql_clause);
- }
- }
- # only get the search context if this is *not* an attrib search
- if ($type == 1) {
- $EvalForbid .= "\tlast SearchBlock if (m!$str_pattern!o);\n";
- }
- elsif ($type == 2) {
- if (($Rules{'show examples: enable'}) and (not ($is_attrib_search))) {
- my @temp;
- my $ignore_blocks = scalar (@temp = ($str_pattern =~ m!\(!og));
- $EvalOptional .= <<"EOM";
- if (\$n_context_matches) {
- \$delta = scalar (\@WordCount = (\$text =~ m!([^\=]{0,$chars_per_context_hit})($str_pattern)([^\=]{0,$chars_per_context_hit})!og));
- if (\$delta) {
- \$WordMatches += \$delta;
- my \$x = 0;
- while ((\$x + 2 + $ignore_blocks) <= \$#WordCount) {
- my \$full_context = \$WordCount[\$x] . \$WordCount[\$x + 1] . \$WordCount[\$x + 2 + $ignore_blocks];
- \$x += 3 + $ignore_blocks;
- next unless (\$full_context =~ m! (.*) !);
- \$context_str .= "$str_context_hit_before\$1$str_context_hit_after";
- \$n_context_matches--;
- last unless (\$n_context_matches);
- }
- }
- else {
- # second-try pattern match, for those outside the h= l= area:
- \$WordMatches += scalar (\@WordCount = m!$str_pattern!og);
- }
- }
- else {
- \$WordMatches += scalar (\@WordCount = m!$str_pattern!og);
- }
- EOM
- }
- else {
- $EvalOptional .= <<"EOM";
- \$WordMatches += scalar (\@WordCount = m!$str_pattern!og);
- EOM
- }
- }
- elsif ($type == 3) {
- if (($Rules{'show examples: enable'}) and (not ($is_attrib_search))) {
- my @temp;
- my $ignore_blocks = scalar (@temp = ($str_pattern =~ m!\(!og));
- $EvalRequired .= <<"EOM";
- if (\$n_context_matches) {
- \$delta = scalar (\@WordCount = (\$text =~ m!([^\=]{0,$chars_per_context_hit})($str_pattern)([^\=]{0,$chars_per_context_hit})!og));
- if (\$delta) {
- \$WordMatches += \$delta;
- my \$x = 0;
- while ((\$x + 2 + $ignore_blocks) <= \$#WordCount) {
- my \$full_context = \$WordCount[\$x] . \$WordCount[\$x + 1] . \$WordCount[\$x + 2 + $ignore_blocks];
- \$x += 3 + $ignore_blocks;
- next unless (\$full_context =~ m! (.*) !);
- \$context_str .= "$str_context_hit_before\$1$str_context_hit_after";
- \$n_context_matches--;
- last unless (\$n_context_matches);
- }
- }
- else {
- # second-try pattern match, for those outside the h= l= area:
- \$delta = scalar (\@WordCount = m!$str_pattern!og);
- last SearchBlock unless (\$delta);
- \$WordMatches += \$delta;
- }
- }
- else {
- \$delta = scalar (\@WordCount = m!$str_pattern!og);
- last SearchBlock unless (\$delta);
- \$WordMatches += \$delta;
- }
- EOM
- }
- else {
- $EvalRequired .= <<"EOM";
- \$delta = scalar (\@WordCount = m!$str_pattern!og);
- last SearchBlock unless (\$delta);
- \$WordMatches += \$delta;
- EOM
- }
- }
- if ($type == 1) {
- push(@invalid_terms, $_);
- }
- else {
- push(@valid_terms, $_);
- $EvalExtraRequired .= "\t\$WordMatches += $um * (\@WordCount = (\$u =~ m!$str_pattern!og));\n" if $um;
- $EvalExtraRequired .= "\t\$WordMatches += $tm * (\@WordCount = (\$t =~ m!$str_pattern!og));\n" if $tm;
- $EvalExtraRequired .= "\t\$WordMatches += $dm * (\@WordCount = (\$d =~ m!$str_pattern!og));\n" if $dm;
- $EvalExtraRequired .= "\t\$WordMatches += $km * (\@WordCount = (\$k =~ m!$str_pattern!og));\n" if $km;
- $bTermsExist = 1;
- }
- # strip leading characters:
- s!^(\-|\+|\|)!!;
- push(@search_terms, $_) if ($_);
- }
- # double-quote terms with embedded spaces:
- @invalid_terms = map { m! ! ? "$_" : $_ } @invalid_terms;
- # double-quote terms with embedded spaces:
- if ($IgnoreQuotedTerms) {
- @valid_terms = map { m! ! ? '' : $_ } @valid_terms;
- }
- else {
- @valid_terms = map { m! ! ? "$_" : $_ } @valid_terms;
- }
- $Ignored_Terms = join(', ', @invalid_terms);
- $Important_Terms = join(', ', @valid_terms);
- # extract $text early if we're doing context matching - otherwise wait till later
- my $sort_code = '';
- $sort_method = $Rules{'sorting: default sort method'};
- if (($FORM{'sort-method'}) and ($FORM{'sort-method'} =~ m!^\d+$!) and (0 < $FORM{'sort-method'}) and ($FORM{'sort-method'} < 7)) {
- $sort_method = $FORM{'sort-method'};
- }
- if (($sort_method < 3) and (($Rules{'sorting: time sensitive'}) or ($FORM{'p:ts'}))) {
- $sort_code = <<'EOM';
- m!^\d+ (\d+)!;
- my $age = $private{'script_start_time'} - $1;
- if ($age < 172800) {
- $WordMatches *= 4;
- }
- elsif ($age < 345600) {
- $WordMatches *= 3;
- }
- elsif ($age < 691200) {
- $WordMatches *= 2;
- }
- EOM
- }
- # relevance:
- if ($sort_method == 1) {
- $sort_code .= ' $sort_num = 10E6 - ($WordMatches * substr($_,0,2)); ';
- }
- # reverse relevance:
- elsif ($sort_method == 2) {
- $sort_code .= ' $sort_num = 10E6 + ($WordMatches * substr($_,0,2)); ';
- }
- # by lastmod time
- elsif ($sort_method == 3) {
- $sort_code = <<'EOM';
- m!^\d+ (\d+)!;
- $sort_num = 2147400000 - $1;
- $sort_num = '0' x (10 - length($sort_num)) . $sort_num;
- EOM
- }
- elsif ($sort_method == 4) {
- $sort_code = <<'EOM';
- m!^\d+ (\d+)!;
- $sort_num = $1;
- $sort_num = '0' x (10 - length($sort_num)) . $sort_num;
- EOM
- }
- # by lastindex time
- elsif ($sort_method == 5) {
- $sort_code = <<'EOM';
- m!^\d+ \d+ (\d+)!;
- $sort_num = 2147400000 - $1;
- $sort_num = '0' x (10 - length($sort_num)) . $sort_num;
- EOM
- }
- elsif ($sort_method == 6) {
- $sort_code = <<'EOM';
- m!^\d+ \d+ (\d+)!;
- $sort_num = $1;
- $sort_num = '0' x (10 - length($sort_num)) . $sort_num;
- EOM
- }
- if ($Rules{'sorting: randomize equally-relevant search results'}) {
- $sort_code .= ' $sort_num .= 1000 + int(rand(8999)); ';
- }
- $sort_code .= ' $sort_num .= "." . (10E6 - ($WordMatches * substr($_,0,2))); ';
- if ($Rules{'show examples: enable'}) {
- $DocSearch = <<"EOM";
- SearchBlock: {
- \$\$r_pages_searched++;
- \$WordMatches = 0;
- \$text = '';
- $EvalForbid
- \$n_context_matches = $Rules{'show examples: number to display'};
- \$context_str = '';
- unless (m!^(.*?)uM=(.*?)uT=(.*?)uD=(.*?)uK=(.*?)h=(.*)l=!o) { \$\$r_pages_searched--; last SearchBlock; }
- (\$hdr, \$u, \$t, \$d, \$k, \$text) = (\$1, \$2, \$3, \$4, \$5, \$6);
- $EvalRequired
- $EvalOptional
- last SearchBlock unless \$WordMatches;
- $EvalExtraRequired
- $EvalExtraOptional
- $sort_code
- push(\@\$r_hits, \$sort_num . '.' . \$hdr . ' c= ' . \$context_str . ' r= ' . \$const{'record_realm'});
- }
- EOM
- }
- elsif (($EvalExtraRequired) or ($EvalExtraOptional)) {
- $DocSearch = <<"EOM";
- SearchBlock: {
- \$\$r_pages_searched++;
- \$WordMatches = 0;
- $EvalForbid
- $EvalRequired
- $EvalOptional
- last SearchBlock unless \$WordMatches;
- unless (m!^(.*?)uM=(.*?)uT=(.*?)uD=(.*?)uK=(.*?)h=!o) { \$\$r_pages_searched--; last SearchBlock; }
- (\$hdr, \$u, \$t, \$d, \$k) = (\$1, \$2, \$3, \$4, \$5);
- $EvalExtraRequired
- $EvalExtraOptional
- $sort_code
- push(\@\$r_hits, \$sort_num . '.' . \$hdr . ' c= r= ' . \$const{'record_realm'});
- }
- EOM
- }
- else {
- $DocSearch = <<"EOM";
- SearchBlock: {
- \$\$r_pages_searched++;
- \$WordMatches = 0;
- $EvalForbid
- $EvalRequired
- $EvalOptional
- last SearchBlock unless \$WordMatches;
- unless (m!^(.*?)uM=!o) { \$\$r_pages_searched--; last SearchBlock; }
- \$hdr = \$1;
- $sort_code
- push(\@\$r_hits, \$sort_num . '.' . \$hdr . ' c= r= ' . \$const{'record_realm'});
- }
- EOM
- }
- $RealmSearch = <<"EOM";
- my \@WordCount = ();
- my (\$WordMatches, \$sort_num, \$u, \$t, \$d, \$k, \$hdr, \$n_context_matches, \$context_str, \$delta, \$text);
- Record: while (defined(\$_