/calamaris-2.59/calamaris
# · Perl · 3372 lines · 3172 code · 74 blank · 126 comment · 356 complexity · 8d9cb1a84b53ca09ed88d6b075ab5133 MD5 · raw file
Large files are truncated click here to view the full file
- #!/usr/bin/perl -w
- #
- # $Id: calamaris,v 2.59 2004/06/06 16:26:14 cord Exp $
- #
- # DESCRIPTION: calamaris - statistic package for diverse Proxy-Cache-Servers
- #
- # Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 Cord Beermann
- #
- # URL: http://Calamaris.Cord.de/
- # Announcement-Mailing-list: send Mail with 'subscribe' in the Mail-Body to
- # Calamaris-announce-request@Cord.de
- #
- # AUTHOR: Cord Beermann <Cord@Wunder-Nett.org>
- #
- # Thanks to these contributors, bug reporters, and feature requesters:
- # John Heaton <John@MCC.ac.uk>
- # Andreas Lamprecht <Andreas.Lamprecht@siemens.at>
- # Kenny Ng <kennyng@cyberway.com.sg>
- # Claus Langhans <langhans@rz.uni-frankfurt.de>
- # Andreas Jung <ajung@sz-sb.de>
- # Ernst Heiri <heiri@switch.ch>
- # Shamil R. Yahin <SSHY@cclib.nsu.ru>
- # Thoralf Freitag <Thoralf.Freitag@isst.fhg.de>
- # Marco Paganini <paganini@paganini.net>
- # Michael Riedel <mr@fto.de>
- # Kris Boulez <krbou@pgsgent.be>
- # Mark Visser <mark@snt.utwente.nl>
- # Gary Palmer <gjp@erols.com>
- # Stefan Watermann <stefan@metronet.de>
- # Roar Smith <Roar.Smith@Ericsson.Dk>
- # Bernd Lienau <lienau@tli.de>
- # Gary Lindstrom <gplindstrom@exodus.nnc.edu>
- # Jost Krieger <Jost.Krieger@ruhr-uni-bochum.de>
- # Gerd Michael Hoffmann <Hoffmann@dvgw.de>
- # Gerold Meerkoetter <gerold@noc.fh-lippe.de>
- # Iain Lea <iain@bricbrac.de>
- # Emmanuel Adeline <emmanuel.adeline@mail.dotcom.fr>
- # John Line <webadm@info.cam.ac.uk>
- # Christos Cheretakis <xalkina@otenet.gr>
- # Ryan Donnelly <rmd@doit.wisc.edu>
- # Richard Vaughan <richard_vaughan@timewarp.co.uk>
- # Jonas Luster <jonas@nethammer.qad.org>
- # Clare Lahiff <Clare.Lahiff@anu.edu.au>
- # Toni Andjelkovic <toni@telecom.at>
- # Chris Teakle <ccteakle@its.uq.edu.au>
- # Dancer Vesperman <dancer@zeor.simegen.com>
- # Vincent ? <vincent@aib.net>
- # Elrond ? <Elrond@Wunder-Nett.org>
- # Holger Marzen <holger@marzen.de>
- # Panagiotis Christias <P.Christias@noc.ntua.gr>
- # Patrik Rak <patrik@ash.ein.cz>
- # Steve Snyder <swsnyder@insightbb.com>
- # Michael Copeland <michael.copeland@bell.ca>
- # Warren Brown <wbrown@inktomi.com>
- # Andy Nik <nik@che.nsk.su>
- # Frank Roechter <frank@fhd.de>
- # Antonio Casado Rodríguez <acasado@ualm.es>
- # Pavol Adamec <pavol_adamec@tempest.sk>
- # Ram Cherukuri <ram@edgix.com>
- # Marco Koch <MK@electricpaper.de>
- # Stephen Welker <stephen.welker@nemostar.com.au>
- # Christian Niederdorfer <christian.niederdorfer@infineon.com>
- # Klaus Brinkmeyer <Klaus_Brinkmeyer@inasys.de>
- # Filip ? <mechanix@digibel.org>
- # Matt Hubbard <m.hubbard@ic.ac.uk>
- # James Crocker <jcrocker@menasha.com>
- # Enrico Ardizzoni <enrico@unife.it>
- # Shawn Switenky <S.Switenky@telesat.ca>
- # Jarkko Saloranta <jjs@kpo.fi>
- # Jigar Rasalawala <jrasalawala@fourelle.com>
- # Philipp Frauenfelder <philipp.frauenfelder@swissonline.ch>
- # Alexey Markov <markov@crpi.ru>
- # Mark Güthling <privat@mague-pcservice.de>
- # Sergey Zarubin <serge-home@yandex.ru>
- # Helge Oldach <calamaris@oldach.net>
- # Michael R. Schwarzbach <spg@fs.tum.de>
- # Radu - Eosif Mihailescu <rmihailescu@lumina.ro>
- # Michael Pophal <michael.pophal@nefkom.net>
- # Steffen Sledz <sledz@zone42.org>
- # Kenytt Avery <kavery@willingminds.com>
- # SO Kwok Tsun <ktso@cuhk.edu.hk>
- # Chris Knight <chris@aims.com.au>
- # ycdtosa ? <ycdtosa@eupla.unizar.es>
- # Peter W. Osel <pwo@Infineon.COM>
- # Pawel Worach <pawel.worach@nordea.com>
- # This program is free software; you can redistribute it and/or modify it
- # under the terms of the GNU General Public License as published by the Free
- # Software Foundation; either version 2 of the License, or (at your option)
- # any later version.
- # (If you modify and want to publish it under the name 'Calamaris', please ask
- # me. I don't want to confuse the 'audience' with many different versions of
- # the same name and/or Version number. (This is not part of the license, it
- # is only a favour i asked of you.))
- # This program is distributed in the hope that it will be useful, but WITHOUT
- # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- # more details.
- # You should have received a copy of the GNU General Public License along with
- # this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- # Place - Suite 330, Boston, MA 02111-1307, USA.
- # A Perl script is "correct" if it gets the job done before your boss fires
- # you.
- # -- 'Programming Perl Second Edition'
- # by Larry Wall, Tom Christiansen & Randal L. Schwartz
- # If you have to remove this, read the README!
- require 5.002;
- use vars qw($opt_3 $opt_a $opt_b $opt_c $opt_C $opt_d $opt_D $opt_f $opt_F
- $opt_h $opt_H $opt_i $opt_I $opt_l $opt_L $opt_m $opt_M $opt_n
- $opt_N $opt_o $opt_O $opt_p $opt_P $opt_r $opt_R $opt_s $opt_S
- $opt_t $opt_T $opt_u $opt_U $opt_v $opt_V $opt_w $opt_W $opt_z);
- use Getopt::Std;
- use Sys::Hostname;
- getopts('3ab:cCd:D:f:F:hH:i:I:l:LmM:nN:o:Op:P:r:R:sS:t:T:uU:vVwWz');
- if ( $opt_b and $opt_b < 1 ) {
- print STDERR "$0: wrong value at -b -option: \"$opt_b\"\n\n";
- $usage_err = 1;
- } else {
- $| = 1;
- }
- if ($opt_U) {
- unless ( $opt_U =~ m#^[KMGT]$# ) {
- print STDERR "$0: wrong value at -U -option: \"$opt_U\"\n\n";
- $usage_err = 1;
- }
- } else {
- $opt_U = '';
- }
- if ($opt_D) {
- if ( $opt_D <= 1 ) {
- print STDERR "$0: wrong value at -D -option: \"$opt_D\"\n\n";
- $usage_err = 1;
- }
- }
- if ($opt_H) {
- if ( $opt_H eq '1' or $opt_H eq 'lookup' ) {
- $host_name = hostname() . ' ';
- } else {
- $host_name = $opt_H . ' ';
- }
- } else {
- $host_name = '';
- }
- if ( $opt_N and $opt_N != -1 and $opt_N < 2 ) {
- print STDERR "$0: wrong value at -N -option: \"$opt_N\"\n\n";
- $usage_err = 1;
- }
- if ($opt_I) {
- use Time::Local;
- ( $interval_begin, $interval_end ) = split ( '-', $opt_I );
- if ($interval_begin > $interval_end) {
- print STDERR "$0: wrong value at -I -option: \"$opt_I\". Interval begin newer than interval end!\n\n";
- $usage_err = 1;
- }
- if ( $interval_begin =~ m#^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$# ) {
- $interval_begin = timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
- } elsif ( $interval_begin eq '' ) {
- $interval_begin = 0;
- } else {
- print STDERR "$0: wrong value at -I -option: \"$opt_I\"\n\n";
- $usage_err = 1;
- }
- if ( $interval_end =~ m#^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})$# ) {
- $interval_end = timegm( $6, $5, $4, $3, $2 - 1, $1 - 1900 );
- } elsif ( $interval_end eq '' ) {
- $interval_end = 9999999999;
- } else {
- print STDERR "$0: wrong value at -I -option: \"$opt_I\"\n\n";
- $usage_err = 1;
- }
- }
- if ($opt_F) {
- foreach $output ( split ( /\s*,\s*/, $opt_F ) ) {
- if ( $output eq 'mail' ) {
- $outtype_mail = 1;
- } elsif ( $output eq 'html' ) {
- $outtype_html = 1;
- } elsif ( $output eq 'html-embed' ) {
- $outtype_htmlembed = 1;
- } elsif ( $output eq 'unformatted' ) {
- $outtype_unformatted = 1;
- } else {
- print STDERR "$0: unknown output-format: $output\n\n";
- $usage_err = 1;
- }
- }
- }
- # remove this at end of 2003
- if ($opt_3) {
- print STDERR "$0: obsoleted option -3: use -N3 instead\n\n";
- $usage_err = 1;
- }
- # remove at end of 2004
- if ( defined $opt_f and $opt_f =~ m#^squid-(mime|smartfilter)$# ) {
- print STDERR "$0: obsoleted input format '$opt_f':
- use -f 'squid-extended' instead\n\n";
- $usage_err = 1;
- }
- # change at end of 2003
- # remove at end of 2004
- if ( defined $opt_w ) {
- print STDERR "$0: obsoleted output format switch -w:
- use -F 'html' instead\n\n";
- $outtype_html = 1;
- $output_warn = 1;
- }
- # change at end of 2003
- # remove at end of 2004
- if ( defined $opt_W ) {
- print STDERR "$0: obsoleted output format switch -W:
- use -F 'html-embed' instead\n\n";
- $outtype_htmlembed = 1;
- $output_warn = 1;
- }
- # change at end of 2003
- # remove at end of 2004
- if ( defined $opt_m ) {
- print STDERR "$0: obsoleted output format switch -m:
- use -F 'mail' instead\n\n";
- $outtype_mail = 1;
- $output_warn = 1;
- }
- $sortorder = '';
- $sortorder = '_size' if ($opt_O);
- if ($opt_a) {
- $opt_s = 1;
- $opt_P = 60 unless $opt_P;
- $opt_d = 20 unless $opt_d;
- $opt_r = 20 unless $opt_r;
- $opt_t = 20 unless $opt_t;
- $opt_D = 10 unless $opt_D;
- }
- $opt_N = 2 unless $opt_N;
- $opt_T = 0 unless $opt_T;
- $opt_r = $opt_R unless $opt_r;
- $P = $opt_P ? "$opt_P minute" : '60 minute';
- if ( defined($opt_P) and ($opt_P%60) == 0 ) { $P = $opt_P/60 . " hour" };
- if ( defined($opt_P) and ($opt_P%1440) == 0 ) { $P = $opt_P/1440 . " day" };
- if ( $opt_N == -1 or $opt_N > 2 ) {
- if ( $opt_N == 3 ) {
- $N = '3rd';
- } elsif ( $opt_N == -1 ) {
- $N = 'all';
- } else {
- $N = $opt_N . 'th';
- }
- } else {
- $N = '2nd';
- }
- @reports = (
- 'Summary', 'Incoming request peak per protocol',
- 'Incoming transfer volume per protocol',
- 'Incoming requests by method', 'Incoming UDP-requests by status',
- 'Incoming TCP-requests by status', 'Outgoing requests by status',
- 'Outgoing requests by destination',
- "Request-destinations by ${N}-level-domain",
- 'Request-destinations by toplevel-domain', 'TCP-Request-protocol',
- 'Requested content-type', 'Requested extensions',
- 'Incoming UDP-requests by host', 'Incoming TCP-requests by host',
- 'Distribution Histogram', "Performance in $P steps"
- );
- $VERSION='Calamaris $Revision: 2.59 $';
- $COPYRIGHT =
- 'Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 Cord Beermann.
- Calamaris comes with ABSOLUTELY NO WARRANTY. It is free software, and you are
- welcome to redistribute it under certain conditions. See source for details.
- Calamaris-Homepage: http://Calamaris.Cord.de/';
- $USAGE = 'Usage: cat log | ' . $0 . ' [switches]
- Reports:
- -a all (extracts all reports available,
- -a equals -D 10 -d 20 -P 60 -r 20 -s -t 20)
- -d n domain (show n Top-level and n second-level destinations,
- -1 = unlimited)
- -p type peak (measure peak requests)
- old = make old request-peak mesurement
- new = make new request&byte-peak measurement
- (both slow Calamaris significantly down.)
- -P n Performance (show throughput data for every n minutes)
- -r n requester (show n Requesters, -1 = unlimited)
- -R n targets for requester (show n targets for every Requester,
- -1 = unlimited), implies -r (*)
- -s status (show verbose status reports)
- -t n type (show n content-type, n extensions and requested protocols,
- -1 = unlimited)
- -D n Distribution Histogram (shows size-based distribution of requested
- objects, smaller numbers result in more verbose reports.
- choose 2, 10 or 100 for useful output. (You can also play
- with this ;-))
- Input:
- -z zero (no input via stdin)
- -f type format (sets the type of input logfiles)
- auto = tries to guess the input format
- (This is the Default)
- squid = Native-Logfile derived from Squid V1.1.beta26-V2.x
- squid-extended = Native-Logfile with log_mime_hdrs enabled
- derived from Squid V1.1.beta26-V2.x (*)
- or Cisco Content Engines (*)
- or Squid with SmartFilter-patch (*)
- squid-old = Native-Logfile derived from Squid
- V1.1.alpha1-V1.1.beta25
- nc = Squid-style Logfiles derived from NetCache V?? (<5.x)
- its = Logfiles derived from Inktomi Traffic Server
- elff = Extended Logfile Format (i.e Compaq Tasksmart, Novell
- Internet Caching System, NetCache 5.x)
- nse = Netscape Extended-1/2 Logfile Format
- Output: (Default is plain formatted text)
- -F type Format (output format (comma-seperated list))
- mail = mail format
- html = HTML format
- html-embed = HTML format without HTML-headers
- unformatted = plain unformatted output
- -l string logo (add this string to the head of the report. works only in
- combination with -F html)
- -M string Meta (includes the given strings in html-<head>. works only in
- combination with -F html)
- -H name Host-name (a name for the Output, -H \'lookup\' issues a lookup for
- the current host)
- -O Order (changes the sort order in the reports to request size,
- default is sorting by number of requests)
- -N n N-level (change all 2nd-level-reports to N-level-reports. N can be
- any number from 2 up. -1 means full report.)
- -U string Unit (define the Unit for the Byte-values, else it will be auto)
- K(ilo), M(ega), G(iga), T(era)
- -S list Show (Shows only the defined reports (comma-seperated list) in the
- specified order.) The following numbers are defined:
- ';
- foreach ( 0 .. $#reports ) {
- $USAGE .= "\t\t$_\t$reports[$_]\n";
- }
- $USAGE .=
- ' (Note: only putting out one report does not speed up Calamaris
- as the internal operations were done based on the
- report-switches. Default: Reports are displayed based on
- activated reports.)
- Caching:
- -i file input-file (input-datafile for caching, to add many files separate
- them with a \':\')
- -o file output-file (output-datafile for caching, can be the same as -i)
- Misc:
- -n no-lookup (don\'t look IP-Numbers up)
- -T n Time (adjust the Performance-Report in minutes)
- -c case (switch to case-insensitive (useful for extensions-report))
- -u user (use ident information if available) (*)
- -I t-t Interval (defines which time-interval should be parsed)
- t has to be the format yyyymmddhhmmss
- omitting the beginning or ending is allowed.
- -b n benchmark (prints a hash-sign (#) to stderr for each n lines
- processed)
- -v verbose (print information what Calamaris is doing. Useful for
- debugging.)
- -L Loop (dumps the generated internal loop to STDERR for debugging.)
- -C copyright (prints the copyright)
- -h help (prints out this message)
- -V Version (prints version-info)
- (*) These options break the privacy of your users. Please read the README
- on this.';
- print "$USAGE\n\n" if $opt_h;
- print "$VERSION\n\n" if $opt_V;
- print "$COPYRIGHT\n\n" if $opt_C or $opt_h or $opt_V;
- exit 0 if $opt_h or $opt_C or $opt_V;
- if ($usage_err) {
- print STDERR "run '$0 -h' for help.\n\n";
- exit 1;
- }
- # initialize variables
- $counter = $hier = $hier_direct = $hier_direct_size = $hier_direct_time =
- $hier_parent = $hier_parent_size = $hier_parent_time = $hier_sibling =
- $hier_sibling_size = $hier_sibling_time = $hier_size = $hier_time =
- $invalid = $peak_all_min = $peak_all_min_time = $peak_all_sec =
- $peak_all_sec_time = $peak_all_hour = $peak_all_hour_time = $peak_tcp_min =
- $peak_tcp_min_time = $peak_tcp_sec = $peak_tcp_sec_time = $peak_tcp_hour =
- $peak_tcp_hour_time = $peak_udp_min = $peak_udp_min_time = $peak_udp_sec =
- $peak_udp_sec_time = $peak_udp_hour = $peak_udp_hour_time = $size =
- $skipped = $tcp = $tcp_hit = $tcp_hit_size = $tcp_hit_time = $tcp_miss =
- $tcp_miss_none = $tcp_miss_none_size = $tcp_miss_none_time =
- $tcp_miss_size = $tcp_miss_time = $tcp_size = $tcp_time = $time =
- $time_end = $time_run = $udp = $udp_hit = $udp_hit_size = $udp_hit_time =
- $udp_miss = $udp_miss_size = $udp_miss_time = $udp_size = $udp_time = 0;
- $time_begin = 9999999999;
- @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec);
- ### Read Cache.
- if ($opt_i) {
- foreach $file ( split ':', $opt_i ) {
- open( CACHE, "$file" ) or die ("$0: can't open $file for reading: $!\n");
- while (<CACHE>) {
- chomp;
- @cache = split '?';
- $x = shift (@cache);
- next unless ($x);
- if ( $x eq 'A' and $#cache == 39 ) {
- $time_begin = $cache[0] if $cache[0] < $time_begin;
- $time_end = $cache[1] if $cache[1] > $time_end;
- $counter += $cache[2];
- $size += $cache[3];
- $time += $cache[4];
- $invalid += $cache[5];
- $time_run += $cache[6];
- $udp += $cache[7];
- $udp_size += $cache[8];
- $udp_time += $cache[9];
- $udp_hit += $cache[10];
- $udp_hit_size += $cache[11];
- $udp_hit_time += $cache[12];
- $udp_miss += $cache[13];
- $udp_miss_size += $cache[14];
- $udp_miss_time += $cache[15];
- $tcp += $cache[16];
- $tcp_size += $cache[17];
- $tcp_time += $cache[18];
- $tcp_hit += $cache[19];
- $tcp_hit_size += $cache[20];
- $tcp_hit_time += $cache[21];
- $tcp_miss += $cache[22];
- $tcp_miss_size += $cache[23];
- $tcp_miss_time += $cache[24];
- $tcp_miss_none += $cache[25];
- $tcp_miss_none_size += $cache[26];
- $tcp_miss_none_time += $cache[27];
- $hier += $cache[28];
- $hier_size += $cache[29];
- $hier_time += $cache[30];
- $hier_direct += $cache[31];
- $hier_direct_size += $cache[32];
- $hier_direct_time += $cache[33];
- $hier_sibling += $cache[34];
- $hier_sibling_size += $cache[35];
- $hier_sibling_time += $cache[36];
- $hier_parent += $cache[37];
- $hier_parent_size += $cache[38];
- $hier_parent_time += $cache[39];
- } elsif ( $x eq 'B' and $#cache == 17 ) {
- unless ( $peak_udp_sec == 0 ) {
- warn("multiple cache files.\n") if $opt_v;
- $peak_warn = 'Peak values are possibly wrong!';
- }
- if ( $peak_udp_sec < $cache[0] ) {
- $peak_udp_sec = $cache[0];
- $peak_udp_sec_time = $cache[1];
- }
- if ( $peak_udp_min < $cache[2] ) {
- $peak_udp_min = $cache[2];
- $peak_udp_min_time = $cache[3];
- }
- $peak_udp_hour{ $cache[5] } = 0
- unless defined $peak_udp_hour{ $cache[5] };
- $peak_udp_hour{ $cache[5] } += $cache[4];
- if ( $peak_tcp_sec < $cache[6] ) {
- $peak_tcp_sec = $cache[6];
- $peak_tcp_sec_time = $cache[7];
- }
- if ( $peak_tcp_min < $cache[8] ) {
- $peak_tcp_min = $cache[8];
- $peak_tcp_min_time = $cache[9];
- }
- $peak_tcp_hour{ $cache[11] } = 0
- unless defined $peak_tcp_hour{ $cache[11] };
- $peak_tcp_hour{ $cache[11] } += $cache[10];
- if ( $peak_all_sec < $cache[12] ) {
- $peak_all_sec = $cache[12];
- $peak_all_sec_time = $cache[13];
- }
- if ( $peak_all_min < $cache[14] ) {
- $peak_all_min = $cache[14];
- $peak_all_min_time = $cache[15];
- }
- $peak_all_hour{ $cache[17] } = 0
- unless defined $peak_all_hour{ $cache[17] };
- $peak_all_hour{ $cache[17] } += $cache[16];
- } elsif ( $x eq 'B' and $#cache == 23 ) {
- unless ( $peak_udp_sec == 0 ) {
- warn("multiple cache files.\n") if $opt_v;
- $peak_warn = 'Peak values are possibly wrong!';
- }
- if ( $peak_udp_sec < $cache[0] ) {
- $peak_udp_sec = $cache[0];
- $peak_udp_sec_time = $cache[1];
- }
- if ( $peak_udp_min < $cache[2] ) {
- $peak_udp_min = $cache[2];
- $peak_udp_min_time = $cache[3];
- }
- $peak_udp_hour{ $cache[5] } = 0
- unless defined $peak_udp_hour{ $cache[5] };
- $peak_udp_hour{ $cache[5] } += $cache[4];
- $peak_udp_hour_size{ $cache[7] } = 0
- unless defined $peak_udp_hour_size{ $cache[7] };
- $peak_udp_hour_size{ $cache[7] } += $cache[6];
- if ( $peak_tcp_sec < $cache[8] ) {
- $peak_tcp_sec = $cache[8];
- $peak_tcp_sec_time = $cache[9];
- }
- if ( $peak_tcp_min < $cache[10] ) {
- $peak_tcp_min = $cache[10];
- $peak_tcp_min_time = $cache[11];
- }
- $peak_tcp_hour{ $cache[13] } = 0
- unless defined $peak_tcp_hour{ $cache[13] };
- $peak_tcp_hour{ $cache[13] } += $cache[12];
- $peak_tcp_hour_size{ $cache[15] } = 0
- unless defined $peak_tcp_hour_size{ $cache[15] };
- $peak_tcp_hour_size{ $cache[15] } += $cache[14];
- if ( $peak_all_sec < $cache[16] ) {
- $peak_all_sec = $cache[16];
- $peak_all_sec_time = $cache[17];
- }
- if ( $peak_all_min < $cache[18] ) {
- $peak_all_min = $cache[18];
- $peak_all_min_time = $cache[19];
- }
- $peak_all_hour{ $cache[21] } = 0
- unless defined $peak_all_hour{ $cache[21] };
- $peak_all_hour{ $cache[21] } += $cache[20];
- $peak_all_hour_size{ $cache[23] } = 0
- unless defined $peak_all_hour_size{ $cache[23] };
- $peak_all_hour_size{ $cache[23] } += $cache[22];
- } elsif ( $x eq 'C' and $#cache == 3 ) {
- $y = shift (@cache);
- $method{$y} = $method_size{$y} = $method_time{$y} = 0
- unless defined $method{$y};
- $method{$y} += $cache[0];
- $method_size{$y} += $cache[1];
- $method_time{$y} += $cache[2];
- } elsif ( $x eq 'D' and $#cache == 3 ) {
- $y = shift (@cache);
- $udp_hit{$y} = $udp_hit_size{$y} = $udp_hit_time{$y} = 0
- unless defined $udp_hit{$y};
- $udp_hit{$y} += $cache[0];
- $udp_hit_size{$y} += $cache[1];
- $udp_hit_time{$y} += $cache[2];
- } elsif ( $x eq 'E' and $#cache == 3 ) {
- $y = shift (@cache);
- $udp_miss{$y} = $udp_miss_size{$y} = $udp_miss_time{$y} = 0
- unless defined $udp_miss{$y};
- $udp_miss{$y} += $cache[0];
- $udp_miss_size{$y} += $cache[1];
- $udp_miss_time{$y} += $cache[2];
- } elsif ( $x eq 'F' and $#cache == 3 ) {
- $y = shift (@cache);
- $tcp_hit{$y} = $tcp_hit_size{$y} = $tcp_hit_time{$y} = 0
- unless defined $tcp_hit{$y};
- $tcp_hit{$y} += $cache[0];
- $tcp_hit_size{$y} += $cache[1];
- $tcp_hit_time{$y} += $cache[2];
- } elsif ( $x eq 'G' and $#cache == 3 ) {
- $y = shift (@cache);
- $tcp_miss{$y} = $tcp_miss_size{$y} = $tcp_miss_time{$y} = 0
- unless defined $tcp_miss{$y};
- $tcp_miss{$y} += $cache[0];
- $tcp_miss_size{$y} += $cache[1];
- $tcp_miss_time{$y} += $cache[2];
- } elsif ( $x eq 'H' and $#cache == 3 ) {
- $y = shift (@cache);
- $tcp_miss_none{$y} = $tcp_miss_none_size{$y} =
- $tcp_miss_none_time{$y} = 0
- unless defined $tcp_miss_none{$y};
- $tcp_miss_none{$y} += $cache[0];
- $tcp_miss_none_size{$y} += $cache[1];
- $tcp_miss_none_time{$y} += $cache[2];
- } elsif ( $x eq 'I' and $#cache == 3 ) {
- $y = shift (@cache);
- $hier_direct{$y} = $hier_direct_size{$y} = $hier_direct_time{$y} = 0
- unless defined $hier_direct{$y};
- $hier_direct{$y} += $cache[0];
- $hier_direct_size{$y} += $cache[1];
- $hier_direct_time{$y} += $cache[2];
- } elsif ( $x eq 'J' and $#cache == 3 ) {
- $y = shift (@cache);
- $hier_sibling{$y} = $hier_sibling_size{$y} = $hier_sibling_time{$y} =
- 0
- unless defined $hier_sibling{$y};
- $hier_sibling{$y} += $cache[0];
- $hier_sibling_size{$y} += $cache[1];
- $hier_sibling_time{$y} += $cache[2];
- } elsif ( $x eq 'K' and $#cache == 3 ) {
- $y = shift (@cache);
- $hier_parent{$y} = $hier_parent_size{$y} = $hier_parent_time{$y} = 0
- unless defined $hier_parent{$y};
- $hier_parent{$y} += $cache[0];
- $hier_parent_size{$y} += $cache[1];
- $hier_parent_time{$y} += $cache[2];
- } elsif ( $x eq 'L' and $#cache == 3 ) {
- $y = shift (@cache);
- $hier_neighbor{$y} = $hier_neighbor_size{$y} =
- $hier_neighbor_time{$y} = 0
- unless defined $hier_neighbor{$y};
- $hier_neighbor{$y} += $cache[0];
- $hier_neighbor_size{$y} += $cache[1];
- $hier_neighbor_time{$y} += $cache[2];
- } elsif ( $x eq 'M' and $#cache == 4 ) {
- $y = shift (@cache);
- $z = shift (@cache);
- $hier_neighbor_status{$y}{$z} = $hier_neighbor_status_size{$y}{$z} =
- $hier_neighbor_status_time{$y}{$z} = 0
- unless defined $hier_neighbor_status{$y}{$z};
- $hier_neighbor_status{$y}{$z} += $cache[0];
- $hier_neighbor_status_size{$y}{$z} += $cache[1];
- $hier_neighbor_status_time{$y}{$z} += $cache[2];
- } elsif ( $x eq 'N' and $#cache == 3 ) {
- $y = shift (@cache);
- $tcp_urlhost{$y} = $tcp_urlhost_size{$y} = $tcp_hit_urlhost{$y} = 0
- unless defined $tcp_urlhost{$y};
- $tcp_urlhost{$y} += $cache[0];
- $tcp_urlhost_size{$y} += $cache[1];
- $tcp_hit_urlhost{$y} += $cache[2];
- } elsif ( $x eq 'O' and $#cache == 3 ) {
- $y = shift (@cache);
- $tcp_urltld{$y} = $tcp_urltld_size{$y} = $tcp_hit_urltld{$y} = 0
- unless defined $tcp_urltld{$y};
- $tcp_urltld{$y} += $cache[0];
- $tcp_urltld_size{$y} += $cache[1];
- $tcp_hit_urltld{$y} += $cache[2];
- } elsif ( $x eq 'P' and $#cache == 3 ) {
- $y = shift (@cache);
- $tcp_urlprot{$y} = $tcp_urlprot_size{$y} = $tcp_hit_urlprot{$y} = 0
- unless defined $tcp_urlprot{$y};
- $tcp_urlprot{$y} += $cache[0];
- $tcp_urlprot_size{$y} += $cache[1];
- $tcp_hit_urlprot{$y} += $cache[2];
- } elsif ( $x eq 'Q' and $#cache == 3 ) {
- $y = shift (@cache);
- $tcp_content{$y} = $tcp_content_size{$y} = $tcp_hit_content{$y} = 0
- unless defined $tcp_content{$y};
- $tcp_content{$y} += $cache[0];
- $tcp_content_size{$y} += $cache[1];
- $tcp_hit_content{$y} += $cache[2];
- } elsif ( $x eq 'R' and $#cache == 3 ) {
- $y = shift (@cache);
- $tcp_urlext{$y} = $tcp_urlext_size{$y} = $tcp_hit_urlext{$y} = 0
- unless defined $tcp_urlext{$y};
- $tcp_urlext{$y} += $cache[0];
- $tcp_urlext_size{$y} += $cache[1];
- $tcp_hit_urlext{$y} += $cache[2];
- } elsif ( $x eq 'S' and $#cache == 5 ) {
- $y = shift (@cache);
- $udp_requester{$y} = $udp_requester_size{$y} =
- $udp_requester_time{$y} = $udp_hit_requester{$y} =
- $udp_hit_requester_size{$y} = 0
- unless defined $udp_requester{$y};
- $udp_requester{$y} += $cache[0];
- $udp_requester_size{$y} += $cache[1];
- $udp_requester_time{$y} += $cache[2];
- $udp_hit_requester{$y} += $cache[3];
- $udp_hit_requester_size{$y} += $cache[4];
- } elsif ( $x eq 'T' and $#cache == 5 ) {
- $y = shift (@cache);
- $tcp_requester{$y} = $tcp_requester_size{$y} =
- $tcp_requester_time{$y} = $tcp_hit_requester{$y} =
- $tcp_hit_requester_size{$y} = 0
- unless defined $tcp_requester{$y};
- $tcp_requester{$y} += $cache[0];
- $tcp_requester_size{$y} += $cache[1];
- $tcp_requester_time{$y} += $cache[2];
- $tcp_hit_requester{$y} += $cache[3];
- $tcp_hit_requester_size{$y} += $cache[4];
- } elsif ( $x eq 'U' and $#cache == 13 ) {
- $y = shift (@cache);
- ( $perf_counter{$y}, $perf_size{$y}, $perf_time{$y},
- $perf_tcp_hit_size{$y}, $perf_tcp_hit_time{$y},
- $perf_tcp_miss_size{$y}, $perf_tcp_miss_time{$y},
- $perf_hier_direct_size{$y}, $perf_hier_direct_time{$y},
- $perf_hier_sibling_size{$y}, $perf_hier_sibling_time{$y},
- $perf_hier_parent_size{$y}, $perf_hier_parent_time{$y} )
- = @cache;
- # This is for a stupid bug I brought in... it should save older Cache-Files,
- # and put them in so that we can work with them.. remove at end of 2003
- } elsif ( $x eq 'U' and $#cache == 12 ) {
- $cache_warn1 = 'The Performance data misses the Cache-Hits-value';
- $y = shift (@cache);
- ( $perf_counter{$y}, $perf_size{$y}, $perf_time{$y},
- $perf_tcp_hit_size{$y}, $perf_tcp_miss_size{$y},
- $perf_tcp_miss_time{$y}, $perf_hier_direct_size{$y},
- $perf_hier_direct_time{$y}, $perf_hier_sibling_size{$y},
- $perf_hier_sibling_time{$y}, $perf_hier_parent_size{$y},
- $perf_hier_parent_time{$y} )
- = @cache;
- # stupid, yes...
- # I set this to 0/.000001 so removezerotime prints a - in the report.
- $perf_tcp_hit_size{$y} = 0;
- $perf_tcp_hit_time{$y} = .000001;
- # End of stupid bug-workaround
- } elsif ( $x eq 'V' and $#cache == 6 ) {
- $y = shift (@cache);
- $z = shift (@cache);
- $udp_requester_urlhost{$y}{$z} = $udp_requester_urlhost_size{$y}{$z} =
- $udp_requester_urlhost_time{$y}{$z} =
- $udp_hit_requester_urlhost{$y}{$z} =
- $udp_hit_requester_urlhost_size{$y}{$z} = 0
- unless defined $udp_requester_urlhost{$y}{$z};
- $udp_requester_urlhost{$y}{$z} += $cache[0];
- $udp_requester_urlhost_size{$y}{$z} += $cache[1];
- $udp_requester_urlhost_time{$y}{$z} += $cache[2];
- $udp_hit_requester_urlhost{$y}{$z} += $cache[3];
- $udp_hit_requester_urlhost_size{$y}{$z} += $cache[4];
- # Start of Bug-fix - remove this at end of 2003
- } elsif ( $x eq 'V' and $#cache == 5 ) {
- $cache_warn2 = 'The UDP-Requester-Report misses the other-value';
- $y = '<other>' unless ($y);
- $z = shift (@cache);
- $udp_requester_urlhost{$y}{$z} = $udp_requester_urlhost_size{$y}{$z} =
- $udp_requester_urlhost_time{$y}{$z} =
- $udp_hit_requester_urlhost{$y}{$z} =
- $udp_hit_requester_urlhost_size{$y}{$z} = 0
- unless defined $udp_requester_urlhost{$y}{$z};
- $udp_requester_urlhost{$y}{$z} += $cache[0];
- $udp_requester_urlhost_size{$y}{$z} += $cache[1];
- $udp_requester_urlhost_time{$y}{$z} += $cache[2];
- $udp_hit_requester_urlhost{$y}{$z} += $cache[3];
- $udp_hit_requester_urlhost_size{$y}{$z} += $cache[4];
- # End of Bug-fix
- } elsif ( $x eq 'W' and $#cache == 6 ) {
- $y = shift (@cache);
- $z = shift (@cache);
- $tcp_requester_urlhost{$y}{$z} = $tcp_requester_urlhost_size{$y}{$z} =
- $tcp_requester_urlhost_time{$y}{$z} =
- $tcp_hit_requester_urlhost{$y}{$z} =
- $tcp_hit_requester_urlhost_size{$y}{$z} = 0
- unless defined $tcp_requester_urlhost{$y}{$z};
- $tcp_requester_urlhost{$y}{$z} += $cache[0];
- $tcp_requester_urlhost_size{$y}{$z} += $cache[1];
- $tcp_requester_urlhost_time{$y}{$z} += $cache[2];
- $tcp_hit_requester_urlhost{$y}{$z} += $cache[3];
- $tcp_hit_requester_urlhost_size{$y}{$z} += $cache[4];
- # Start of Bugfix - remove at end of 2003
- } elsif ( $x eq 'W' and $#cache == 5 ) {
- $cache_warn3 = 'The TCP-Requester-Report misses the other-value';
- $y = '<other>' unless ($y);
- $z = shift (@cache);
- $tcp_requester_urlhost{$y}{$z} = $tcp_requester_urlhost_size{$y}{$z} =
- $tcp_requester_urlhost_time{$y}{$z} =
- $tcp_hit_requester_urlhost{$y}{$z} =
- $tcp_hit_requester_urlhost_size{$y}{$z} = 0
- unless defined $tcp_requester_urlhost{$y}{$z};
- $tcp_requester_urlhost{$y}{$z} += $cache[0];
- $tcp_requester_urlhost_size{$y}{$z} += $cache[1];
- $tcp_requester_urlhost_time{$y}{$z} += $cache[2];
- $tcp_hit_requester_urlhost{$y}{$z} += $cache[3];
- $tcp_hit_requester_urlhost_size{$y}{$z} += $cache[4];
- # End of Bugfix
- } elsif ( $x eq 'X' and $#cache == 5 ) {
- $y = shift (@cache);
- $tcp_distribution{$y} = $tcp_distribution_size{$y} =
- $tcp_distribution_time{$y} = $tcp_hit_distribution{$y} =
- $tcp_hit_distribution_size{$y} = 0
- unless defined $tcp_distribution{$y};
- $tcp_distribution{$y} += $cache[0];
- $tcp_distribution_size{$y} += $cache[1];
- $tcp_distribution_time{$y} += $cache[2];
- $tcp_hit_distribution{$y} += $cache[3];
- $tcp_hit_distribution_size{$y} += $cache[4];
- } else {
- print STDERR "can't parse cache-line: \"$x @cache\"\n";
- }
- }
- close(CACHE);
- }
- }
- unless ($opt_z) {
- while ( defined( $line = <> ) ) {
- if ( not defined $opt_f or $opt_f eq 'auto' ) {
- if ( $line =~
- m#^\d+\.\d+\s+\d+\s+[\w\-\.:]+\s+\w+/\d+\s+\d+\s+\w+\s+\S+\s+\S+\s+\w+/\S+\s+\S+$#
- )
- {
- $opt_f = 'squid';
- print STDERR "guessing... using '-f squid'\n" if ($opt_v);
- last;
- } elsif ( $line =~
- m#^\d+\s+\d+\s+[\w\-\.:]+\s+\w+/\d+\s+\d+\s+\w+\s+\S+\s+\S+\s+\w+/\S+\s+\S+$#
- )
- {
- $opt_f = 'its';
- print STDERR "guessing... using '-f its'\n" if ($opt_v);
- last;
- } elsif ( $line =~
- m#^\d+\.\d+\s+\d+\s+[\w\-\.:]+\s+\w+/\d+\s+\d+\s+\w+\s+\S+\s+\S+\s+\w+/\S+$#
- )
- {
- $opt_f = 'squid-old';
- print STDERR "guessing... using '-f squid-old'\n" if ($opt_v);
- last;
- } elsif ( $line =~
- m#^\d+\.\d+\s+\d+\s+[\w\-\.:]+\s+\w+/\d+\s+\d+\s+\w+\s+\S+\s+\S+\s+\w+/\S+\s+(\S|; c)+\s+\S+$#
- )
- {
- $opt_f = 'nc';
- print STDERR "guessing... using '-f nc'\n" if ($opt_v);
- last;
- } elsif ( $line =~ s/^\s*#\s*Fields:\s*// ) {
- $opt_f = 'elff';
- print STDERR "guessing... using '-f elff'\n" if ($opt_v);
- last;
- } elsif ( $line =~ s/^\s*format=\s*// ) {
- $opt_f = 'nse';
- print STDERR "guessing... using '-f nse'\n" if ($opt_v);
- last;
- } elsif ( $line =~
- m#^\d+\.\d+\s+\d+\s+[\w\-\.:]+\s+\w+/\-?\d+\s+\d+\s+\w+\s+\S+\s+\S+\s+\w+/\S+\s+\S+\s+#
- )
- {
- $opt_f = 'squid-extended';
- print STDERR "guessing... using '-f squid-extended'\n" if ($opt_v);
- last;
- } elsif ( $line =~
- m#^[\w\-\.:]+\s+\S+\s+\S+\s+\[.+\]\s+\"\w+\s+\S+\s+\S+\"\s+\d+\s+\d+\s+\S+(\s+\[.*\]\s+\[.*\])?$#
- )
- {
- print STDERR "$0: The first line of the input looks to me as if you
- switched 'emulate_httpd_log' to on. I can't parse that format. Please read
- the README on this.\n\n";
- exit(1);
- } elsif ( $line =~ m/^\s*(#|$)/ ) {
- print STDERR "skipping: $line\n" if ($opt_v);
- next;
- } else {
- print STDERR "$0: I don't know this input format. Please check the
- input. If you\'re sure that the following line is NOT corrupt and the error
- also occurs with the recent version of Calamaris (see the README for pointers
- and known bugs) then report it with the following line to
- <Calamaris-bug\@Cord.de>. Thank You.\n\n$line\n\n";
- exit(1);
- }
- } else {
- last;
- }
- }
- if ( not defined $opt_f or $opt_f eq 'auto' ) {
- print "\nno requests found\n";
- exit(0);
- }
- print STDERR "print a hash-sign for each $opt_b lines:\n" if ($opt_b);
- $loop = '
- for ( ; $line ; $line = <> ) {';
- if ( $opt_f eq 'squid' ) {
- $loop .= '
- ( $log_date, $log_reqtime, $log_requester, $log_status, $log_size,
- $log_method, $log_url, $log_ident, $log_hier, $log_content, $foo )
- = split ( /\s+/, $line );
- if ( not defined $foo
- or not defined $log_content
- or $foo ne \'\'
- or $log_content eq \'\'
- or $log_reqtime < 0
- or $log_date !~ m#^\d+\.\d{3}$# )
- {';
- } elsif ( $opt_f eq 'squid-extended' ) {
- $loop .= '
- $line =~ s/ \[[^\[\]]*\]//g;
- ( $log_date, $log_reqtime, $log_requester, $log_status, $log_size,
- $log_method, $log_url, $log_ident, $log_hier, $log_content)
- = split ( /\s+/, $line, 10 );
- chomp($log_content);
- if ( not defined $log_content
- or $log_content eq \'\'
- or $log_reqtime < 0
- or $log_date !~ m#^\d+\.\d{3}$# )
- {';
- } elsif ( $opt_f eq 'its' ) {
- $loop .= '
- ( $log_date, $log_reqtime, $log_requester, $log_status, $log_size,
- $log_method, $log_url, $log_ident, $log_hier, $log_content, $foo ) =
- split ( /\s+/, $line );
- if ( not defined $foo
- or not defined $log_content
- or $foo ne \'\'
- or $log_content eq \'\'
- or $log_reqtime < 0
- or $log_date !~ m#^\d+$# )
- {';
- } elsif ( $opt_f eq 'squid-old' ) {
- $loop .= '
- ( $log_date, $log_reqtime, $log_requester, $log_status, $log_size,
- $log_method, $log_url, $log_ident, $log_hier, $foo )
- = split ( /\s+/, $line );
- unless ( not defined $foo
- or not defined $log_hier
- or $foo ne \'\'
- or $log_hier eq \'\'
- or $log_reqtime < 0
- or $log_date !~ m#^\d+\.\d{3}$# )
- {';
- } elsif ( $opt_f eq 'nc' ) {
- $loop .= '
- $line =~ s#\; c#\;_c#og; # Hack to handle buggy logfiles of NetCache V3.2.x
- ( $log_date, $log_reqtime, $log_requester, $log_status, $log_size,
- $log_method, $log_url, $log_ident, $log_hier, $log_content, $log_abort,
- $foo )
- = split ( /\s+/, $line );
- if ( not defined $foo or not defined $log_abort or $foo ne \'\' or
- $log_abort eq \'\' or $log_reqtime < 0 or
- $log_date !~ m#^\d+\.\d{3}$# )
- {';
- } elsif ( $opt_f eq 'elff' ) {
- @fields = split ( /\s+/, $line );
- $loop .= '
- use Time::Local;
- if ( $line =~
- m#^';
- foreach (@fields) {
- $tmpline1 .= '\s+' if ($tmpline1);
- if ( $_ eq 'date' ) {
- $tmpline1 .= '(\d+)-(\d+)-(\d+)';
- $tmpline2 .= '
- $log_year = $' . ++$offset . ';
- $log_month = $' . ++$offset . ';
- $log_day = $' . ++$offset . ';';
- } elsif ( $_ eq 'time' ) {
- $tmpline1 .= '(\d+):(\d+):(\d+)';
- $tmpline2 .= '
- $log_hour = $' . ++$offset . ';
- $log_min = $' . ++$offset . ';
- $log_sec = $' . ++$offset . ';';
- } elsif ( $_ eq 'x-timestamp' ) {
- $tmpline1 .= '(\d+\.\d+)';
- $tmpline2 .= '
- $log_date = $' . ++$offset . ';';
- $log_date = 0;
- } elsif ( $_ eq 'c-ip' ) {
- $tmpline1 .= '([\w\-\.:]+)';
- $tmpline2 .= '
- $log_requester = $' . ++$offset . ';';
- } elsif ( $_ eq 'cs-authname'
- or $_ eq 'x-remote-id'
- or $_ eq 'x-username'
- or $_ eq 'cs-username' )
- {
- $tmpline1 .= '(\S+)';
- $tmpline2 .= '
- $log_ident = $' . ++$offset . ';';
- } elsif ( $_ eq 's-ip' or $_ eq 's-sitename' ) {
- $tmpline1 .= '[\w\-\.]+';
- } elsif ( $_ eq 'cs-method' ) {
- $tmpline1 .= '([\w\-]+)';
- $tmpline2 .= '
- $log_method = $' . ++$offset . ';';
- } elsif ( $_ eq 'cs-uri' ) {
- $tmpline1 .= '(\S+)';
- $tmpline2 .= '
- $log_url = $' . ++$offset . ';';
- } elsif ( $_ eq 'cs-uri-stem'
- or $_ eq 'cs-uri-query'
- or $_ eq 'x-note' )
- {
- $tmpline1 .= '\S+';
- } elsif ( $_ eq 'c-version' ) {
- $tmpline1 .= '\w+/[\d\.]+';
- } elsif ( $_ eq 'sc-status' ) {
- $tmpline1 .= '(\d+)';
- $tmpline2 .= '
- $log_code = $' . ++$offset . ';';
- } elsif ( $_ eq 'sc-bytes' or $_ eq 'bytes' ) {
- $tmpline1 .= '(\d+)';
- $tmpline2 .= '
- $log_size = $' . ++$offset . ';';
- } elsif ( $_ eq 'cs-bytes' ) {
- $tmpline1 .= '\d+';
- } elsif ( $_ eq 'x-elapsed-milliseconds' ) {
- $tmpline1 .= '(\d+)';
- $tmpline2 .= '
- $log_reqtime = $' . ++$offset . ';';
- } elsif ( $_ eq 'time-taken' ) {
- $tmpline1 .= '([\d\.]+)';
- $tmpline2 .= '
- $log_reqtime = $' . ++$offset . ' * 1000;';
- } elsif ( $_ eq 'cs(User-Agent)'
- or $_ eq 'cs(Cookie)'
- or $_ eq 'cs(Referer)'
- or $_ eq 'sc(Referer)'
- or $_ eq 'cs(X-Forwarded-For)'
- or $_ eq 'x-hiername' )
- {
- $tmpline1 .= '(\"[^\"]*\"|-)';
- $tmpline2 .= '
- ++$offset;';
- } elsif ( $_ eq 'cached' ) {
- $tmpline1 .= '(\d+)';
- $tmpline2 .= '
- $log_cached = $' . ++$offset . ';';
- } elsif ( $_ eq 'x-transaction' ) {
- $tmpline1 .= '(\w+\/[\d\-]+)';
- $tmpline2 .= '
- $log_status = $' . ++$offset . ';';
- $log_status = 0;
- } elsif ( $_ eq 'x-fill-proxy-ip' ) {
- $tmpline1 .= '([\w\-\.]+)';
- $tmpline2 .= '
- $log_proxy_ip = $' . ++$offset . ';';
- } elsif ( $_ eq 'x-origin-ip' ) {
- $tmpline1 .= '([\w\-\.]+)';
- $tmpline2 .= '
- $log_origin_ip = $' . ++$offset . ';';
- } elsif ( $_ eq 'x-hiercode' ) {
- $tmpline1 .= '([\w\-\.\/]+)';
- $tmpline2 .= '
- $log_hier = $' . ++$offset . ';';
- $log_hier = 0;
- } elsif ( $_ eq 'rs(Content-Type)' ) {
- $tmpline1 .= '\"[^\"]*\"';
- } else {
- print STDERR "$0: I don't know this input format. Please check the
- input. If you\'re sure that the following is NOT corrupt and the error also
- occurs with the recent version of Calamaris (see the README for pointers and
- known bugs) then report it with the following line to <Calamaris-bug\@Cord.de>.
- Thank You.\n\n$_\n\n";
- exit(1);
- }
- }
- foreach $pattern ( qw(date time c-ip cs-method cs-uri sc-status sc-bytes
- time-taken cached x-fill-proxy-ip x-origin-ip)
- )
- {
- unless ( grep $pattern, @fields ) {
- print STDERR "$0: Your input file format is missing at least the field
- \'$pattern\'. I can\'t parse it. Sorry. If you think that this field isn't
- important to you, please report this error to <Calamaris-bug\@Cord.de>.
- Thank You.\n\n";
- exit(1);
- }
- }
- foreach $pattern (qw(cs-authname)) {
- unless ( grep $pattern, @fields ) {
- if ( $pattern eq 'cs-authname' ) {
- $tmpline2 .= '
- $log_ident = "-";';
- }
- }
- }
- $loop .= $tmpline1 . '.?$#
- )
- {' . $tmpline2;
- $loop .= '
- $log_date = timegm(
- $log_sec, $log_min, $log_hour, $log_day, $log_month - 1,
- $log_year - 1900
- );' unless defined $log_date;
- $loop .= '
- $log_status = "$log_cached/$log_code";' unless defined $log_status;
- $loop .= '
- if ( $log_origin_ip ne \'-\' ) {
- $log_hier = "DIRECT/$log_origin_ip";
- } elsif ( $log_proxy_ip ne \'-\' ) {
- if ( $log_cached eq \'0\' ) {
- $log_hier = "PARENT_MISS/$log_proxy_ip";
- } else {
- $log_hier = "PARENT_HIT/$log_proxy_ip";
- }
- } else {
- $log_hier = "NONE/-";
- }' unless defined $log_hier;
- $loop .= '
- } else {';
- while ( defined( $line = <> ) ) {
- last unless $line =~ /^\s*#/;
- }
- } elsif ( $opt_f eq 'nse' ) {
- $line =~ s#^format=##og;
- @fields = split ( /\s+/, $line );
- $loop .= '
- use Time::Local;
- if ( $line =~
- m#^';
- foreach (@fields) {
- $tmpline1 .= '\s+' if ($tmpline1);
- if ( $_ eq '[%SYSDATE%]' ) {
- $tmpline1 .= '\[(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+)\s+\S+\]';
- $tmpline2 .= '
- $log_day = $' . ++$offset . ';
- $log_month = $' . ++$offset . ';
- $log_year = $' . ++$offset . ';
- $log_hour = $' . ++$offset . ';
- $log_min = $' . ++$offset . ';
- $log_sec = $' . ++$offset . ';';
- } elsif ( $_ eq '%Ses->client.ip%' ) {
- $tmpline1 .= '([\w\-\.:]+)';
- $tmpline2 .= '
- $log_requester = $' . ++$offset . ';';
- } elsif ( $_ eq '%Req->vars.pauth-user%' ) {
- $tmpline1 .= '(\S+)';
- $tmpline2 .= '
- $log_ident = $' . ++$offset . ';';
- } elsif ( $_ eq '%Req->vars.remote-status%'
- or $_ eq '%Req->vars.r2p-cl%'
- or $_ eq '%Req->vars.cli-status%'
- or $_ eq '%Req->vars.svr-status%'
- or $_ eq '%Req->vars.cch-status%' )
- {
- $tmpline1 .= '[\w\-\.]+';
- } elsif ( $_ eq '"%Req->reqpb.proxy-request%"' ) {
- $tmpline1 .= '\"(\w+)\s+([^\"]+)\s+\S+\"';
- $tmpline2 .= '
- $log_method = $' . ++$offset . ';
- $log_url = $' . ++$offset . ';';
- } elsif ( $_ eq '%Req->srvhdrs.clf-status%' ) {
- $tmpline1 .= '([\d\-]+)';
- $tmpline2 .= '
- $log_code = $' . ++$offset . ';';
- } elsif ( $_ eq '%Req->vars.p2c-cl%' ) {
- $tmpline1 .= '([\d\-]+)';
- $tmpline2 .= '
- $log_size = $' . ++$offset . ';';
- } elsif ( $_ eq '%Req->headers.content-length%'
- or $_ eq '%Req->vars.p2r-cl%'
- or $_ eq '%Req->vars.c2p-hl%'
- or $_ eq '%Req->vars.p2c-hl%'
- or $_ eq '%Req->vars.p2r-hl%'
- or $_ eq '%Req->vars.r2p-hl%' )
- {
- $tmpline1 .= '[\d\-]+';
- } elsif ( $_ eq '%Req->vars.xfer-time%' ) {
- $tmpline1 .= '([\d\.]+)';
- $tmpline2 .= '
- $log_reqtime = $' . ++$offset . ' * 1000;';
- } elsif ( $_ eq '%route%' ) {
- $tmpline1 .= '(\d+)';
- $tmpline2 .= '
- $log_cached = $' . ++$offset . ';';
- } elsif ( $_ eq '%Req->vars.actual-route%' ) {
- $tmpline1 .= '(\S+)';
- $tmpline2 .= '
- $log_hier = $' . ++$offset . ';
- if ( $log_hier =~ m#[\(\)]# ) {
- $log_hier =~ s#^(\w+)(\((\S+)\))?$#$1/$3#;
- $log_status = \'TCP_MISS/-\';
- } elsif ( $log_hier =~ m#-# ) {
- $log_hier = $log_hier . \'/-\';
- $log_status = \'TCP_HIT/-\';
- } else {
- $log_hier = $log_hier . \'/-\';
- $log_status = \'TCP_MISS/-\';
- }';
- } elsif ( $_ eq '-' ) {
- $tmpline1 .= '-';
- } else {
- print STDERR "$0: I don't know this input format. Please check the
- input. If you\'re sure that the following is NOT corrupt and the error also
- occurs with the recent version of Calamaris (see the README for pointers and
- known bugs) then report it with the following line to <Calamaris-bug\@Cord.de>.
- Thank You.\n\n$_\n\n";
- exit(1);
- }
- }
- foreach $pattern ( qw(%Ses->client.ip% [%SYSDATE%]
- %Req->reqpb.proxy-request% %Req->srvhdrs.clf-status%
- %Req->vars.p2c-cl% %Req->vars.xfer-time%)
- )
- {
- unless ( grep $pattern, @fields ) {
- print STDERR "$0: Your input file format is missing at least the field
- \'$pattern\'. I can\'t parse it. Sorry. If you think that this field isn't
- important to you, please report this error to <Calamaris-bug\@Cord.de>.
- Thank You.\n\n";
- exit(1);
- }
- }
- foreach $pattern (qw(%Req->vars.pauth-user% %Req->vars.actual-route%)) {
- unless ( grep /$pattern/, @fields ) {
- if ( $pattern eq '%Req->vars.pauth-user%' ) {
- $tmpline2 .= '
- $log_ident = "-";';
- } elsif ( $pattern eq '%Req->vars.actual-route%' ) {
- $tmpline2 .= '
- $log_hier = \'-/-\';
- $log_status = \'-/-\';';
- }
- }
- }
- $loop .= $tmpline1 . '.?$#
- )
- {' . $tmpline2 . '
- $monthcount = -1;
- foreach $month (@months) {
- $monthcount++;
- last if ( $month eq $log_month );
- }
- $log_date = timegm(
- $log_sec, $log_min, $log_hour, $log_day, $monthcount,
- $log_year - 1900
- );
- } else {';
- while ( defined( $line = <> ) ) {
- last unless $line =~ /^\s*#/;
- }
- } else {
- print STDERR "$0: unknown value at -f -option: \"$opt_f\"\n\n$USAGE\n\n";
- exit 1;
- }
- $loop .= '
- chomp($line);';
- $loop .= '
- warn(\'invalid line: "\' . $line . "\"\n");' if $opt_v;
- $loop .= '
- $invalid++;
- next;
- }';
- $loop .= '
- if (($log_date < $interval_begin) or ($log_date > $interval_end)) {
- $skipped++;
- next;
- }
- ' if $opt_I;
- $loop .= '
- $log_reqtime = .1 if $log_reqtime == 0;
- ( $log_hitfail, $log_code ) = split \'/\', $log_status;
- $log_size = 0 if ( $log_size eq \'-\' );
- $log_url =~ s/\?.*$/?/;
- @url = split m#[/\\\]#o, $log_url;
- ( $urlprot, $urlhost, $urlext ) = (@url)[ 0, 2, $#url ];
- $urlext = \'.<none>\' if $#url <= 2;
- if ( $#url <= -1 ) {
- $urlext = \'.<error>\';
- $urlprot = $urlhost = \'<error>\';
- }
- $urlext = \'.<dynamic>\'
- if ( $urlext =~ m#[\?;&\$,!@=|%]#o or $log_method eq \'POST\' );
- unless ( defined $urlhost ) {
- $urlhost = $urlprot;';
- $loop .= '
- $log_content = '
- unless $opt_f eq 'squid-old'
- or $opt_f eq 'elff'
- or $opt_f eq 'nse';
- $loop .= '
- $urlprot = \'<secure>\';
- $urlext = \'.<secure>\';
- }
- $urlhost =~ s#^.*@##o;
- $urlhost =~ s#[:\?].*$##o;
- $urlhost =~ tr/A-Z/a-z/;
- @urlext = split \'\.\', $urlext;
- $urlext = (@urlext)[$#urlext];
- $urlext = \'<none>\' if $#urlext <= 0;';
- $loop .= '
- $urlext =~ tr/A-Z/a-z/;' if $opt_c;
- $loop .= '
- if ( $urlhost =~ m#^(([0-9][0-9]{0,2}\.){3})[0-9][0-9]{0,2}$#o ) {';
- $loop .= '
- $urlhost = $1 . \'*\';' unless $opt_N == -1;
- $loop .= '
- $urltld = \'<unresolved>\';
- } elsif ( $urlhost =~ m#^(.*\.([^\.]+\.)?)?([^\.]+\.([^\.]+))\.?$#o ) {
- @list = split \'\.\', $urlhost;
- $urltld = pop @list;';
- if ( $opt_N != -1 ) {
- $loop .= '
- $urlhost = $urltld;';
- for ( $i = $opt_N ; $i != 1 ; $i-- ) {
- $loop .= '
- $urlhost = pop (@list) . \'.\' . $urlhost if $#list >= 0;';
- }
- $loop .= '
- $urlhost = pop (@list) . \'.\' . $urlhost
- if ( $urltld =~
- m#^(a[rtu]|br|c[no]|hk|i[dlm]|jp|kr|l[by]|m[oxy]|nz|p[elnry]|sg|t[hrw]|u[aks]|ve|yu|za)$#o
- and $#list >= 0 );
- $urlhost = \'*.\' . $urlhost if $#list >= 0;
- $urltld = \'*.\' . $urltld;';
- }
- $loop .= '
- } elsif ( $urlhost =~ m#([!a-z0-9\.\-]|\.\.)#o ) {
- $urlhost = $urltld = $urlext = $urlprot = \'<error>\';
- } else {
- $urltld = $urlhost;
- }';
- if ($opt_n) {
- $loop .= '
- $requester = $log_requester;';
- } else {
- $loop .= '
- $requester = getfqdn($log_requester);';
- }
- $loop .= '
- $requester = $log_ident . \'@\' . $requester if $log_ident ne \'-\';'
- if $opt_u;
- $loop .= '
- ( $log_hier_method, $log_hier_host ) = ( split \'/\', $log_hier )[ 0, 1 ];';
- $loop .= '
- $log_content = \'<unknown>\' if $log_content eq \'-\';
- $log_content =~ tr/A-Z/a-z/;
- $log_content = '
- unless $opt_f eq 'squid-old'
- or $opt_f eq 'elff'
- or $opt_f eq 'nse';
- $loop .= '
- $urlhost = $urltld = $urlext = $urlprot = \'<error>\'
- if ( $log_code =~ m#^[45]#o );';
- $loop .= "
- print STDERR '#' if (0 == (\$counter % $opt_b));" if $opt_b;
- $loop .= '
- $counter++;
- $size += $log_size;
- $time += $log_reqtime;
- $method{$log_method} = $method_size{$log_method} =
- $method_time{$log_method} = 0
- unless defined $method{$log_method};
- $method{$log_method}++;
- $method_size{$log_method} += $log_size;
- $method_time{$log_method} += $log_reqtime;
- $time_begin = $log_date if $log_date < $time_begin;
- $time_end = $log_date if $log_date > $time_end;';
- if ( defined $opt_p ) {
- $loop .= '
- if ( defined(@peak_all) ) {
- if ( $log_date < $peak_all[$#peak_all] ) {
- $peak_warn =
- \'Peak values are most likely wrong due to unsorted input!\';
- undef(@peak_all);
- undef(@peak_udp);
- undef(@peak_tcp);
- $peak_all_min_pointer = $peak_all_sec_pointer = $peak_tcp_min_pointer =
- $peak_tcp_sec_pointer = $peak_udp_min_pointer =
- $peak_udp_sec_pointer = 0;
- chomp($line);
- warn( \'unsorted input: "\' . $line . "\"\n" ) if $opt_v;
- }
- }';
- if ( $opt_p eq 'old' ) {
- $loop .= '
- $peak_all_sec_pointer++;
- $peak_all_min_pointer++;
- unshift ( @peak_all, $log_date );
- $peak_all_sec_pointer--
- while $peak_all[ $peak_all_sec_pointer - 1 ] < ( $log_date - 1 );
- $peak_all_min_pointer--
- while $peak_all[ $peak_all_min_pointer - 1 ] < ( $log_date - 60 );
- pop (@peak_all) while $peak_all[$#peak_all] < ( $log_date - 3600 );
- if ( $peak_all_hour < @peak_all ) {
- $peak_all_hour = @peak_all;
- $peak_all_hour_time = $log_date - 3600;
- }
- if ( $peak_all_min < $peak_all_min_pointer ) {
- $peak_all_min = $peak_all_min_pointer;
- $peak_all_min_time = $log_date - 60;
- }
- if ( $peak_all_sec < $peak_all_sec_pointer ) {
- $peak_all_sec = $peak_all_sec_pointer;
- $peak_all_sec_time = $log_date - 1;
- }';
- } elsif ( $opt_p eq 'new' ) {
- $loop .= '
- $date_hour = int( ( $log_date - ( $log_reqtime / 1000 ) ) / 3600 ) * 3600;
- foreach $i ( ( $date_hour / 3600 ) .. int( $log_date / 3600 ) ) {
- $peak_all_hour{ $i * 3600 } = $peak_all_hour_size{ $i * 3600 } = 0
- unless defined $peak_all_hour{ $i * 3600 };
- $peak_all_hour{ $i * 3600 }++;
- $peak_all_hour_size{ $i * 3600 } +=
- $log_size / int( $log_reqtime / 3600000 + 1 );
- }
- $peak_all_sec_pointer++;
- unshift ( @peak_all, $log_date );
- $peak_all_sec_pointer--
- while $peak_all[ $peak_all_sec_pointer - 1 ] < ( $log_date - 1 );
- pop (@peak_all) while $peak_all[$#peak_all] < ( $log_date - 60 );
- if ( $peak_all_min < @peak_all ) {
- $peak_all_min = @peak_all;
- $peak_all_min_time = $log_date - 60;
- }
- if ( $peak_all_sec < $peak_all_sec_pointer ) {
- $peak_all_sec = $peak_all_sec_pointer;
- $peak_all_sec_time = $log_date - 1;
- }';
- } else {
- print STDERR
- "$0: unknown value at -p -option: \"$opt_p\"\n\n$USAGE\n\n";
- exit 1;
- }
- }
- $loop .= '
- if ( ( $log_method =~ m#^ICP_#o ) or ( $log_status =~ m#^ICP#o ) ) {
- $udp++;
- $udp_size += $log_size;
- $udp_time += $log_reqtime;';
- $loop .= '
- $udp_requester{$requester} = $udp_requester_size{$requester} =
- $udp_requester_time{$requester} = $udp_hit_requester{$requester} =
- $udp_hit_requester_size{$requester} = 0
- unless defined $udp_requester{$requester};
- $udp_requester{$requester}++;
- $udp_requester_size{$requester} += $log_size;
- $udp_requester_time{$requester} += $log_reqtime;' if ($opt_r);
- $loop .= '
- $udp_requester_urlhost{$requester}{$urlhost} =
- $udp_requester_urlhost_size{$requester}{$urlhost} =
- $udp_requester_urlhost_time{$requester}{$urlhost} =
- $udp_hit_requester_urlhost{$requester}{$urlhost} =
- $udp_hit_requester_urlhost_size{$requester}{$urlhost} = 0
- unless defined $udp_requester_urlhost{$requester}{$urlhost};
- $udp_requester_urlhost{$requester}{$urlhost}++;
- $udp_requester_urlhost_size{$requester}{$urlhost} += $log_size;
- $udp_requester_urlhost_time{$requester}{$urlhost} += $log_reqtime;'
- if ($opt_R);
- if ( not defined $opt_p ) {
- } elsif ( $opt_p eq 'old' ) {
- $loop .= '
- $peak_udp_sec_pointer++;
- $peak_udp_min_pointer++;
- unshift ( @peak_udp, $log_date );
- $peak_udp_sec_pointer--
- while $peak_udp[ $peak_udp_sec_pointer - 1 ] < ( $log_date - 1 );
- $peak_udp_min_pointer--
- while $peak_udp[ $peak_udp_min_pointer - 1 ] < ( $log_date - 60 );
- pop @peak_udp while $peak_udp[$#peak_udp] < ( $log_date - 3600 );
- if ( $peak_udp_hour < @peak_udp ) {
- $p…