PageRenderTime 52ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 1ms

/elsa/node/elsa.pl

http://enterprise-log-search-and-archive.googlecode.com/
Perl | 556 lines | 454 code | 64 blank | 38 comment | 39 complexity | ff217796947f61c01d7ccd3fa14832e6 MD5 | raw file
Possible License(s): GPL-2.0
  1. #!/usr/bin/perl
  2. use strict;
  3. use Data::Dumper;
  4. use IO::Handle;
  5. use POSIX;
  6. use Config::JSON;
  7. use Getopt::Std;
  8. use String::CRC32;
  9. use Log::Log4perl;
  10. use DBI;
  11. use Socket qw(inet_aton);
  12. use FindBin;
  13. # Include the directory this script is in
  14. use lib $FindBin::Bin;
  15. use Indexer;
  16. my %Opts;
  17. getopts('onc:f:', \%Opts);
  18. $| = 1;
  19. my $pipes = {};
  20. my $conf_file = $Opts{c} ? $Opts{c} : '/etc/elsa_node.conf';
  21. my $Conf = Config::JSON->new( $conf_file );
  22. $Conf = $Conf->{config}; # native hash is 10x faster than using Config::JSON->get()
  23. # Setup logger
  24. my $logdir = $Conf->{logdir};
  25. my $debug_level = $Conf->{debug_level};
  26. my $l4pconf = qq(
  27. log4perl.category.ELSA = $debug_level, File
  28. log4perl.appender.File = Log::Log4perl::Appender::File
  29. log4perl.appender.File.filename = $logdir/node.log
  30. log4perl.appender.File.syswrite = 1
  31. log4perl.appender.File.recreate = 1
  32. log4perl.appender.File.layout = Log::Log4perl::Layout::PatternLayout
  33. log4perl.appender.File.layout.ConversionPattern = * %p [%d] %F (%L) %M %P %m%n
  34. log4perl.filter.ScreenLevel = Log::Log4perl::Filter::LevelRange
  35. log4perl.filter.ScreenLevel.LevelMin = $debug_level
  36. log4perl.filter.ScreenLevel.LevelMax = ERROR
  37. log4perl.filter.ScreenLevel.AcceptOnMatch = true
  38. log4perl.appender.Screen = Log::Log4perl::Appender::Screen
  39. log4perl.appender.Screen.Filter = ScreenLevel
  40. log4perl.appender.Screen.stderr = 1
  41. log4perl.appender.Screen.layout = Log::Log4perl::Layout::PatternLayout
  42. log4perl.appender.Screen.layout.ConversionPattern = * %p [%d] %F (%L) %M %P %m%n
  43. );
  44. Log::Log4perl::init( \$l4pconf ) or die("Unable to init logger\n");
  45. my $Log = Log::Log4perl::get_logger("ELSA") or die("Unable to init logger\n");
  46. my $Dbh = DBI->connect(($Conf->{database}->{dsn} or 'dbi:mysql:database=syslog;'),
  47. $Conf->{database}->{username},
  48. $Conf->{database}->{password},
  49. {
  50. InactiveDestroy => 1,
  51. PrintError => 0,
  52. mysql_auto_reconnect => 1,
  53. HandleError => \&_sql_error_handler,
  54. })
  55. or die 'connection failed ' . $! . ' ' . $DBI::errstr;
  56. my $num_children = $Conf->{num_log_readers} or die("undefined config for num_log_readers");
  57. my $continue = 1;
  58. my $Run = 1;
  59. my $Missing_field_tolerance = 1;
  60. my $Default_class_id = 1;
  61. my $Proto_map = {
  62. 'ICMP' => 1,
  63. 'icmp' => 1,
  64. 'TCP' => 6,
  65. 'tcp' => 6,
  66. 'UDP' => 17,
  67. 'udp' => 17,
  68. };
  69. use constant FIELD_TIMESTAMP => 0;
  70. use constant FIELD_HOST => 1;
  71. use constant FIELD_PROGRAM => 2;
  72. use constant FIELD_CLASS_ID => 3;
  73. use constant FIELD_MSG => 4;
  74. use constant FIELD_I0 => 5;
  75. use constant FIELD_I1 => 6;
  76. use constant FIELD_I2 => 7;
  77. use constant FIELD_I3 => 8;
  78. use constant FIELD_I4 => 9;
  79. use constant FIELD_I5 => 10;
  80. use constant FIELD_S0 => 11;
  81. use constant FIELD_S1 => 12;
  82. use constant FIELD_S2 => 13;
  83. use constant FIELD_S3 => 14;
  84. use constant FIELD_S4 => 15;
  85. use constant FIELD_S5 => 16;
  86. my $Class_info = _get_class_info();
  87. my $Cache = {};
  88. _init_cache();
  89. unless (-f $Conf->{sphinx}->{config_file}){
  90. _create_sphinx_conf();
  91. }
  92. unless ($Opts{n}){
  93. print "Validating directory...\n";
  94. my $indexer = new Indexer(log => $Log, conf => Config::JSON->new( $conf_file ), class_info => $Class_info);
  95. $indexer->initial_validate_directory();
  96. }
  97. if ($Opts{f}){
  98. print "Processing file $Opts{f}...\n";
  99. _process_batch($Opts{f});
  100. exit;
  101. }
  102. if ($Opts{o}){
  103. print "Running once\n";
  104. $Run = 0;
  105. }
  106. $SIG{TERM} = sub { $Run = 0; $Log->warn('Shutting down'); };
  107. $SIG{CHLD} = 'IGNORE'; # will do the wait() so we don't create zombies
  108. my $total_processed = 0;
  109. do {
  110. $Log->debug("Starting process_batch");
  111. eval {
  112. my $num_processed = _process_batch();
  113. $total_processed += $num_processed;
  114. $Log->debug("Processed $num_processed records");
  115. sleep 1 unless $num_processed; # avoid batch-bombing if our parent handle closes
  116. };
  117. if ($@) {
  118. my $e = $@;
  119. $Log->error($e);
  120. sleep 1 if $Run; # to avoid errmsg flooding
  121. }
  122. $Conf = Config::JSON->new( $conf_file )->{config} if $Run; # reload the config in case it has changed on disk
  123. } while ($Run);
  124. $Log->info('Exiting after processing ' . $total_processed . ' records');
  125. exit;
  126. sub _sql_error_handler {
  127. my $errstr = shift;
  128. my $dbh = shift;
  129. my $query = $dbh->{Statement};
  130. my $full_errstr = 'SQL_ERROR: ' . $errstr . ', query: ' . $query;
  131. $Log->error($full_errstr);
  132. #return 1; # Stops RaiseError
  133. die($full_errstr);
  134. }
  135. sub _create_sphinx_conf {
  136. my $indexer = new Indexer(log => $Log, conf => Config::JSON->new( $conf_file ), class_info => $Class_info);
  137. open(FH, '>' . $Conf->{sphinx}->{config_file}) or die("Cannot open config file for writing: $!");
  138. print FH $indexer->get_sphinx_conf();
  139. close(FH);
  140. print 'Wrote new config to file ' . $Conf->{sphinx}->{config_file} . "\n";
  141. }
  142. sub _process_batch {
  143. my $filename = shift;
  144. # Grab a fresh copy of class info in case there has been an update
  145. $Class_info = _get_class_info();
  146. my $args = { run => 1 };
  147. my $fh = \*STDIN;
  148. if ($filename){
  149. open($fh, $filename) or die 'Unable to open file: ' . $!;
  150. $Log->debug('Reading from file ' . $filename);
  151. $args->{offline_processing} = 1;
  152. $args->{offline_processing_start} = time();
  153. $args->{offline_processing_end} = 0;
  154. }
  155. $fh->autoflush(1);
  156. $fh->blocking(1);
  157. die "Non-existent buffer_dir: " . $Conf->{buffer_dir}
  158. unless -d $Conf->{buffer_dir};
  159. # $Log->debug("Starting up with batch_id: $batch_id, first_id: $first_id");
  160. $args->{start_time} = Time::HiRes::time();
  161. $args->{tempfile} = File::Temp->new( DIR => $Conf->{buffer_dir}, UNLINK => 0 );
  162. unless ($args->{tempfile}){
  163. $Log->error('Unable to create tempfile: ' . $!);
  164. return 0;
  165. }
  166. $args->{tempfile}->autoflush(1);
  167. $args->{batch_counter} = 0;
  168. $args->{error_counter} = 0;
  169. # Reset the miss cache
  170. $args->{cache_add} = {};
  171. # End the loop after index_interval seconds
  172. local $SIG{ALRM} = sub {
  173. $Log->trace("ALARM");
  174. $args->{run} = 0;
  175. # safety in case we don't receive any logs, we'll still do post_proc and restart loop
  176. $fh->blocking(0);
  177. };
  178. unless ($args->{offline_processing}){
  179. alarm $Conf->{sphinx}->{index_interval};
  180. }
  181. while (<$fh>){
  182. eval {
  183. $args->{tempfile}->print(join("\t", @{ _parse_line($args, $_) }) . "\n");
  184. $args->{batch_counter}++;
  185. };
  186. if ($@){
  187. my $e = $@;
  188. $args->{error_counter}++;
  189. if ($Conf->{log_parse_errors}){
  190. $Log->error($e)
  191. }
  192. }
  193. last unless $args->{run};
  194. }
  195. # Update args to be results
  196. $args->{file} = $args->{tempfile}->filename();
  197. $args->{start} = $args->{offline_processing} ? $args->{offline_processing_start} : $args->{start_time};
  198. $args->{end} = $args->{offline_processing} ? $args->{offline_processing_end} : Time::HiRes::time();
  199. $args->{total_processed} = $args->{batch_counter};
  200. $args->{total_errors} = $args->{error_counter};
  201. # Report back that we've finished
  202. $Log->debug("Finished job process_batch with cache hits: $args->{batch_counter} and " . (scalar keys %{ $args->{cache_add} }) . ' new programs');
  203. $Log->debug('Total errors: ' . $args->{error_counter} . ' (%' . (($args->{error_counter} / $args->{batch_counter}) * 100) . ')' ) if $args->{batch_counter};
  204. my ($query, $sth);
  205. if (scalar keys %{ $args->{cache_add} }){
  206. $Log->trace('Adding programs: ' . Dumper($args->{cache_add}));
  207. $query = 'INSERT INTO programs (id, program) VALUES(?,?) ON DUPLICATE KEY UPDATE id=?';
  208. $sth = $Dbh->prepare($query);
  209. $query = 'REPLACE INTO class_program_map (class_id, program_id) VALUES(?,?)';
  210. my $sth_map = $Dbh->prepare($query);
  211. foreach my $program (keys %{ $args->{cache_add} }){
  212. $sth->execute($args->{cache_add}->{$program}->{id}, $program, $args->{cache_add}->{$program}->{id});
  213. if ($sth->rows){ # this was not a duplicate, proceed with the class map insert
  214. $sth_map->execute($args->{cache_add}->{$program}->{class_id}, $args->{cache_add}->{$program}->{id});
  215. }
  216. else {
  217. $Log->error('Duplicate CRC found for ' . $program . ' with CRC ' . $args->{cache_add}->{$program}->{id});
  218. }
  219. }
  220. $args->{cache_add} = {};
  221. }
  222. if ($args->{batch_counter}){
  223. $query = 'INSERT INTO buffers (filename) VALUES (?)';
  224. $sth = $Dbh->prepare($query);
  225. $sth->execute($args->{file});
  226. $Log->trace('inserted filename ' . $args->{file} . ' with batch_counter ' . $args->{batch_counter});
  227. }
  228. # Reset the run marker
  229. $args->{run} = 1;
  230. # Fork our post-batch processor
  231. return $args->{batch_counter} unless $args->{batch_counter};
  232. my $pid = fork();
  233. if ($pid){
  234. # Parent
  235. return $args->{batch_counter};
  236. }
  237. # Child
  238. $Log->trace('Child started');
  239. eval {
  240. my $indexer = new Indexer(log => $Log, conf => Config::JSON->new( $conf_file ), class_info => $Class_info);
  241. $indexer->load_buffers();
  242. };
  243. if ($@){
  244. $Log->error('Child encountered error: ' . $@);
  245. }
  246. $Log->trace('Child finished');
  247. exit; # done with child
  248. }
  249. sub _parse_line {
  250. my $args = shift;
  251. my $raw_line = shift;
  252. chomp($raw_line);
  253. # Escape any backslashes
  254. $raw_line =~ s/\\/\\\\/g;
  255. my @line = split(/\t/, $raw_line);
  256. # Fix class_id for "unknown"
  257. if ($line[FIELD_CLASS_ID] eq 'unknown'){
  258. $line[FIELD_CLASS_ID] = $Default_class_id;
  259. }
  260. # If we're configured to do so, we'll tolerate missing a missing field and make up a default
  261. if ($Missing_field_tolerance){
  262. my $missing_fields = 0;
  263. # Make sure that we've got the basics--things we don't want to fake
  264. unless ($line[FIELD_HOST] and $line[FIELD_MSG]){
  265. die "Unable to parse log line: $raw_line. Only parsed into:\n" . Dumper(\@line);
  266. }
  267. unless ($line[FIELD_TIMESTAMP]){
  268. $line[FIELD_TIMESTAMP] = time();
  269. $Log->warn('Missing required field timestamp') if $Conf->{log_parse_errors};
  270. $missing_fields++;
  271. }
  272. unless ($line[FIELD_PROGRAM]){
  273. # Check to see if this is a dumb situation in which Cisco put program in msg
  274. $line[FIELD_PROGRAM] = 'unknown';
  275. $Log->warn('Missing required field program') if $Conf->{log_parse_errors};
  276. $missing_fields++;
  277. }
  278. unless ($line[FIELD_CLASS_ID]){
  279. $line[FIELD_CLASS_ID] = '1';
  280. $Log->warn('Missing required field class id') if $Conf->{log_parse_errors};
  281. $missing_fields++;
  282. }
  283. if ($missing_fields > $Missing_field_tolerance){
  284. die "Unable to parse log line $raw_line: not enough fields. Only parsed into:\n" . Dumper(\@line);
  285. }
  286. }
  287. else {
  288. # No tolerance for any missing fields
  289. unless ($line[FIELD_TIMESTAMP] and $line[FIELD_CLASS_ID] and $line[FIELD_HOST] and
  290. $line[FIELD_PROGRAM] and $line[FIELD_MSG]){
  291. die "Unable to parse log line $raw_line: no tolerance for missing fields. Only parsed into:\n" . Dumper(\@line);
  292. }
  293. }
  294. unless ($Class_info->{classes_by_id}->{ $line[FIELD_CLASS_ID] }){
  295. die "Unable to parse valid class id from log line $raw_line. Only parsed into:\n" . Dumper(\@line);
  296. }
  297. # Fix weird programs that may be wrong
  298. if ($line[FIELD_PROGRAM] =~ /^\d+$/){
  299. # $Log->debug("ALL NUMBER PROG: " . $line[FIELD_PROGRAM] . ", raw_line: $raw_line");
  300. $line[FIELD_PROGRAM] = 'unknown';
  301. }
  302. # Normalize program name to be all lowercase
  303. $line[FIELD_PROGRAM] = lc($line[FIELD_PROGRAM]);
  304. # Normalize program name to swap any weird chars with underscores
  305. #$line[FIELD_PROGRAM] =~ s/[^a-zA-Z0-9\_\-]/\_/g;
  306. # Host gets the int version of itself
  307. $line[FIELD_HOST] = unpack('N*', inet_aton($line[FIELD_HOST]));
  308. # Perform a crc32 conversion of the program and store it in the cache for later recording
  309. if ($Cache->{ $line[FIELD_PROGRAM] }){
  310. $line[FIELD_PROGRAM] = $Cache->{ $line[FIELD_PROGRAM] };
  311. }
  312. else {
  313. my $program = $line[FIELD_PROGRAM];
  314. $line[FIELD_PROGRAM] = crc32( $program );
  315. $args->{cache_add}->{ $program } = { id => $line[FIELD_PROGRAM], class_id => $line[FIELD_CLASS_ID] };
  316. $Cache->{ $program } = $line[FIELD_PROGRAM];
  317. }
  318. if ($line[FIELD_CLASS_ID] ne 1){ #skip default since there aren't any fields
  319. # Convert any IP fields as necessary
  320. foreach my $field_order (keys %{ $Class_info->{field_conversions}->{ $line[FIELD_CLASS_ID] }->{'IPv4'} }){
  321. $line[$field_order] = unpack('N', inet_aton($line[$field_order]));
  322. }
  323. # Convert any proto fields as necessary
  324. foreach my $field_order (keys %{ $Class_info->{field_conversions}->{ $line[FIELD_CLASS_ID] }->{PROTO} }){
  325. $line[$field_order] = $Proto_map->{ $line[$field_order] };
  326. }
  327. }
  328. # Update start/end times if necessary
  329. if ($args->{offline_processing}){
  330. if ($line[FIELD_TIMESTAMP] < $args->{offline_processing_start}){
  331. $args->{offline_processing_start} = $line[FIELD_TIMESTAMP];
  332. }
  333. if ($line[FIELD_TIMESTAMP] > $args->{offline_processing_end}){
  334. $args->{offline_processing_end} = $line[FIELD_TIMESTAMP];
  335. }
  336. }
  337. # Push our auto-inc dummy val on
  338. unshift(@line, '0');
  339. return \@line;
  340. }
  341. sub _get_class_info {
  342. my $ret = { classes => {}, classes_by_id => {}, fields => [], field_conversions => {}, };
  343. my ($query, $sth);
  344. # Get classes
  345. $query = "SELECT id, class FROM classes";
  346. $sth = $Dbh->prepare($query);
  347. $sth->execute;
  348. while (my $row = $sth->fetchrow_hashref){
  349. $ret->{classes_by_id}->{ $row->{id} } = $row->{class};
  350. }
  351. # Get fields
  352. $query = "SELECT DISTINCT field, class, field_type, input_validation, field_id, class_id, field_order,\n" .
  353. "IF(class!=\"\", CONCAT(class, \".\", field), field) AS fqdn_field, pattern_type\n" .
  354. "FROM fields\n" .
  355. "JOIN fields_classes_map t2 ON (fields.id=t2.field_id)\n" .
  356. "JOIN classes t3 ON (t2.class_id=t3.id)\n";
  357. $sth = $Dbh->prepare($query);
  358. $sth->execute;
  359. while (my $row = $sth->fetchrow_hashref){
  360. push @{ $ret->{fields} }, {
  361. field => $row->{field},
  362. fqdn_field => $row->{fqdn_field},
  363. class => $row->{class},
  364. value => $row->{field},
  365. text => uc($row->{field}),
  366. field_id => $row->{field_id},
  367. class_id => $row->{class_id},
  368. field_order => $row->{field_order},
  369. field_type => $row->{field_type},
  370. input_validation => $row->{input_validation},
  371. pattern_type => $row->{pattern_type},
  372. };
  373. }
  374. # Find unique classes;
  375. foreach my $class_id (keys %{ $ret->{classes_by_id} }){
  376. $ret->{classes}->{ $ret->{classes_by_id}->{$class_id} } = $class_id;
  377. }
  378. # Find unique field conversions
  379. foreach my $field_hash (@{ $ret->{fields} }){
  380. $ret->{field_conversions}->{ $field_hash->{class_id} } ||= {};
  381. if ($field_hash->{pattern_type} eq 'IPv4'){
  382. $ret->{field_conversions}->{ $field_hash->{class_id} }->{IPv4} ||= {};
  383. $ret->{field_conversions}->{ $field_hash->{class_id} }->{IPv4}->{ $field_hash->{field_order} } = $field_hash->{field};
  384. }
  385. elsif ($field_hash->{field} eq 'proto' and $field_hash->{pattern_type} eq 'QSTRING'){
  386. $ret->{field_conversions}->{ $field_hash->{class_id} }->{PROTO} ||= {};
  387. $ret->{field_conversions}->{ $field_hash->{class_id} }->{PROTO}->{ $field_hash->{field_order} } = $field_hash->{field};
  388. }
  389. elsif ($field_hash->{field} eq 'country_code' and $field_hash->{pattern_type} eq 'QSTRING'){
  390. $ret->{field_conversions}->{ $field_hash->{class_id} }->{COUNTRY_CODE} ||= {};
  391. $ret->{field_conversions}->{ $field_hash->{class_id} }->{COUNTRY_CODE}->{ $field_hash->{field_order} } = $field_hash->{field};
  392. }
  393. }
  394. # Find fields by arranged by order
  395. $ret->{fields_by_order} = {};
  396. foreach my $field_hash (@{ $ret->{fields} }){
  397. $ret->{fields_by_order}->{ $field_hash->{class_id} } ||= {};
  398. $ret->{fields_by_order}->{ $field_hash->{class_id} }->{ $field_hash->{field_order} } = $field_hash;
  399. }
  400. # Find fields by arranged by short field name
  401. $ret->{fields_by_name} = {};
  402. foreach my $field_hash (@{ $ret->{fields} }){
  403. $ret->{fields_by_name}->{ $field_hash->{field} } ||= [];
  404. push @{ $ret->{fields_by_name}->{ $field_hash->{field} } }, $field_hash;
  405. }
  406. return $ret;
  407. }
  408. sub _init_cache {
  409. my ($query, $sth);
  410. $query = 'SELECT id, program FROM programs';
  411. $sth = $Dbh->prepare($query);
  412. $sth->execute();
  413. while (my $row = $sth->fetchrow_hashref){
  414. $Cache->{ $row->{program} } = $row->{id};
  415. }
  416. }
  417. __END__
  418. for my $i ( 0 .. ( $num_children - 1 ) ) {
  419. my $fh;
  420. my $pid = open( $fh, "|-" ); # fork and send to child's STDIN
  421. $fh->autoflush(1);
  422. die("Couldn't fork: $!") unless defined $pid;
  423. if ($pid) {
  424. $pipes->{$i} = { pid => $pid, fh => $fh, counter => 0 };
  425. $SIG{TERM} = sub {
  426. $Run = 0;
  427. #$Log->info("indexer.pl is shutting down");
  428. # Shut down all children
  429. foreach my $i ( keys %{$pipes} ) {
  430. my $pid = $pipes->{$i}->{pid};
  431. #$Log->debug("Sending SIGALRM to $pid");
  432. kill SIGALRM, $pid; # send SIGALRM to each so they finish up
  433. # Send SIGTERM so they stop their continue loops
  434. #$Log->debug("Sending SIGTERM to $pid");
  435. kill SIGTERM, $pid;
  436. exit;
  437. }
  438. $SIG{CHLD} = sub{
  439. #$Log->debug("Got SIGCHLD");
  440. exit;
  441. }
  442. };
  443. }
  444. else {
  445. # child worker
  446. $Log = Log::Log4perl::get_logger("ELSA") or die("Unable to init logger\n"); # get a new logger to not conflict with parent
  447. $pipes = undef; # to avoid any confusion
  448. $SIG{TERM} = sub {
  449. $Log->info("Worker $$ is shutting down");
  450. $continue = 0;
  451. };
  452. OUTER_LOOP: while ($continue) {
  453. eval {
  454. INNER_LOOP: while ($continue) {
  455. $Log->debug("Starting process_batch");
  456. #my $num_processed = $kid_writer->process_batch();
  457. my $num_processed = _process_batch();
  458. $Log->debug("Processed $num_processed records");
  459. sleep 1 unless $num_processed; # avoid batch-bombing if our parent handle closes
  460. }
  461. };
  462. if ($@) {
  463. my $e = $@;
  464. $Log->error($e);
  465. sleep 1; # to avoid errmsg flooding
  466. }
  467. }
  468. exit;
  469. }
  470. }
  471. my $status_check_limit = 10_000;
  472. # Still parent down here
  473. while (<>) {
  474. last unless $Run;
  475. # child id will be line number (total records read) modulo number of sweatshop workers
  476. $pipes->{ ( $. % $num_children ) }->{fh}->print($_);
  477. $pipes->{ ( $. % $num_children ) }->{counter}++;
  478. if ( $. % $status_check_limit == 0 ) {
  479. foreach my $kid ( sort keys %{$pipes} ) {
  480. printf( "Worker %d processed %d logs\n", $kid, $pipes->{$kid}->{counter} );
  481. }
  482. }
  483. }
  484. foreach my $kid ( sort keys %{$pipes} ) {
  485. printf( "Worker %d processed %d logs\n", $kid, $pipes->{$kid}->{counter} );
  486. }