PageRenderTime 27ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/agent/StreamWriter.pm

http://streamdb.googlecode.com/
Perl | 500 lines | 416 code | 65 blank | 19 comment | 42 complexity | 0f6ca888e0fb66646c170ce78ccce633 MD5 | raw file
Possible License(s): GPL-2.0
  1. package StreamWriter;
  2. use strict;
  3. use Data::Dumper;
  4. use File::Slurp qw(slurp);
  5. use DBI;
  6. use IO::File;
  7. use Config::JSON;
  8. use Socket;
  9. use Log::Log4perl;
  10. use Net::Server::Daemonize qw(daemonize);
  11. our $Prefix = 'streams_';
  12. our $Num_tables = 100;
  13. $SIG{CHLD} = 'IGNORE'; # will do the wait() so we don't create zombies
  14. sub new {
  15. my $class = shift;
  16. my $args = shift;
  17. my $conf;
  18. if ($args->{config_file}){
  19. $conf = new Config::JSON($args->{config_file}) or die('Unable to open config file ' . $args->{config_file});
  20. }
  21. else {
  22. $conf = Config::JSON->create('/tmp/streamdb.conf');
  23. }
  24. my $self = { _ID => $conf->get('id') ? $conf->get('id') : 0, _CONF => $conf };
  25. bless $self, $class;
  26. $self->{_RETENTION_SIZE} = $self->conf->get('retention/size') ? $self->conf->get('retention/size') : 2 * 2**32; # 8GB
  27. my $debug_level = $self->conf->get('debug_level') ? $self->conf->get('debug_level') : 'INFO';
  28. # Setup logger
  29. my $log_conf;
  30. if ($self->conf->get('logdir')){
  31. my $log_file = $self->conf->get('logdir') . '/streamdb.log';
  32. $log_conf = qq(
  33. log4perl.category.StreamDB = $debug_level, File, Screen
  34. log4perl.appender.File = Log::Log4perl::Appender::File
  35. log4perl.appender.File.filename = $log_file
  36. log4perl.appender.File.syswrite = 1
  37. log4perl.appender.File.recreate = 1
  38. log4perl.appender.File.layout = Log::Log4perl::Layout::PatternLayout
  39. log4perl.appender.File.layout.ConversionPattern = * %p [%d] %F (%L) %M %P %m%n
  40. log4perl.filter.ScreenLevel = Log::Log4perl::Filter::LevelRange
  41. log4perl.filter.ScreenLevel.LevelMin = TRACE
  42. log4perl.filter.ScreenLevel.LevelMax = ERROR
  43. log4perl.filter.ScreenLevel.AcceptOnMatch = true
  44. log4perl.appender.Screen = Log::Log4perl::Appender::Screen
  45. log4perl.appender.Screen.Filter = ScreenLevel
  46. log4perl.appender.Screen.stderr = 1
  47. log4perl.appender.Screen.layout = Log::Log4perl::Layout::PatternLayout
  48. log4perl.appender.Screen.layout.ConversionPattern = * %p [%d] %F (%L) %M %P %m%n
  49. );
  50. }
  51. else {
  52. $log_conf = qq(
  53. log4perl.category.StreamDB = $debug_level, Screen
  54. log4perl.filter.ScreenLevel = Log::Log4perl::Filter::LevelRange
  55. log4perl.filter.ScreenLevel.LevelMin = TRACE
  56. log4perl.filter.ScreenLevel.LevelMax = ERROR
  57. log4perl.filter.ScreenLevel.AcceptOnMatch = true
  58. log4perl.appender.Screen = Log::Log4perl::Appender::Screen
  59. log4perl.appender.Screen.Filter = ScreenLevel
  60. log4perl.appender.Screen.stderr = 1
  61. log4perl.appender.Screen.layout = Log::Log4perl::Layout::PatternLayout
  62. log4perl.appender.Screen.layout.ConversionPattern = * %p [%d] %F (%L) %M %P %m%n
  63. );
  64. }
  65. Log::Log4perl::init( \$log_conf ) or die("Unable to init logger\n");
  66. $self->{_LOGGER} = Log::Log4perl::get_logger('StreamDB') or die("Unable to init logger\n");
  67. my $host = $conf->get('db/host') ? $conf->get('db/host') : '127.0.0.1';
  68. my $port = $conf->get('db/port') ? $conf->get('db/port') : 3306;
  69. $self->{_DB_NAME} = $args->{database} ? $args->{database} : $conf->get('db/database') ? $conf->get('db/database') : 'test';
  70. my $username = $conf->get('db/username') ? $conf->get('db/username') : 'root';
  71. my $password = $conf->get('db/password') ? $conf->get('db/password') : '';
  72. $self->{_DBH} = DBI->connect("dbi:mysql:host=$host;port=$port;database=$self->{_DB_NAME}",
  73. $username, $password, { InactiveDestroy => 1 }) or die($DBI::errstr);
  74. $self->{_DBH}->{mysql_auto_reconnect} = 1; # we will auto-reconnect on disconnect
  75. $self->{_DBH}->{HandleError} = \&_sql_error_handler;
  76. # Find our starting table id
  77. my ($query, $sth, $row);
  78. $self->{_DATA_FILE_ID} = 0;
  79. opendir(DIR, $self->conf->get('data_dir'));
  80. while (my $short_file = readdir(DIR)){
  81. if ($short_file =~ /^$Prefix(\d+)$/o){
  82. my $part_id = $1;
  83. if ($part_id > $self->{_DATA_FILE_ID}){
  84. $self->{_DATA_FILE_ID} = $part_id;
  85. }
  86. }
  87. }
  88. close(DIR);
  89. $self->log->debug('Initial file id: ' . $self->{_DATA_FILE_ID});
  90. $self->{_TABLE_ID_ROLLOVER} = int($self->conf->get('retention/size') / 2**32);
  91. if ($self->{_TABLE_ID_ROLLOVER} > $Num_tables){
  92. $self->{_TABLE_ID_ROLLOVER} = int($self->{_TABLE_ID_ROLLOVER} / $Num_tables);
  93. }
  94. else {
  95. $Num_tables = $self->{_TABLE_ID_ROLLOVER};
  96. }
  97. $self->log->debug('retention size: ' . $self->conf->get('retention/size') . ' div 4gb: ' . int($self->conf->get('retention/size') / 2**32));
  98. $self->log->debug("Using a table id rollover of $self->{_TABLE_ID_ROLLOVER} and Num_tables $Num_tables");
  99. $self->_init_db();
  100. if ($conf->get('vortex') and -f $conf->get('vortex')){
  101. $self->{_VORTEX} = $conf->get('vortex');
  102. }
  103. else {
  104. $self->{'_VORTEX'} = '/usr/local/bin/vortex';
  105. if (-f $self->{_VORTEX}){
  106. $self->log->warn('No vortex configured, defaulting to /usr/local/bin/vortex');
  107. }
  108. else {
  109. die('Unable to find a vortex executable.');
  110. }
  111. }
  112. $self->{_BATCH_SIZE} = 100;
  113. if ($self->conf->get('batch_size')){
  114. $self->{_BATCH_SIZE} = int( $self->conf->get('batch_size') );
  115. }
  116. if ($conf->get('interface')){
  117. $self->{_INTERFACE} = $conf->get('interface');
  118. }
  119. elsif ($args->{interface}){
  120. $self->{_INTERFACE} = $args->{interface};
  121. }
  122. else {
  123. $self->log->warn('Defaulting to interface eth1, this may not be what you want!');
  124. $self->{_INTERFACE} = 'eth1';
  125. }
  126. if (defined $args->{read_file}){
  127. if (-f $args->{read_file}){
  128. $self->{_READ_FILE} = $args->{read_file};
  129. $self->log->debug('Reading from pcap file ' . $self->{_READ_FILE});
  130. }
  131. else {
  132. die('Cannot find file: ' . $args->{read_file});
  133. }
  134. }
  135. my $collect_limit = 200_000;
  136. if ($self->conf->get('collect_limit')){
  137. $collect_limit = $self->conf->get('collect_limit');
  138. }
  139. $self->{_OPTIONS} = '-e -l -k -S ' . $collect_limit . ' -C ' . $collect_limit . ' -T 10 -E 10 ';
  140. if ($conf->get('options')){
  141. $self->{_OPTIONS} .= $conf->get('options');
  142. }
  143. if ($self->{_READ_FILE}){
  144. $self->{_OPTIONS} .= " -r $self->{_READ_FILE}";
  145. }
  146. else {
  147. $self->{_OPTIONS} .= " -i $self->{_INTERFACE}";
  148. }
  149. if ($args->{buffer_dir}){
  150. $self->{_BUFFER_DIR} = $args->{buffer_dir};
  151. }
  152. elsif ($conf->get('buffer_dir')){
  153. $self->{_BUFFER_DIR} = $conf->get('buffer_dir');
  154. }
  155. else {
  156. $self->log->warn('Defaulting to the current directory as buffer_dir.');
  157. $self->{_BUFFER_DIR} = './';
  158. }
  159. if ($args->{data_dir}){
  160. $self->{_DATA_DIR} = $args->{data_dir};
  161. }
  162. elsif ($conf->get('data_dir')){
  163. $self->{_DATA_DIR} = $conf->get('data_dir');
  164. }
  165. else {
  166. $self->log->warn('Defaulting to the current directory as data_dir.');
  167. $self->{_DATA_DIR} = './';
  168. }
  169. $self->{_ROLLOVER_CHECK} = $self->conf->get('rollover_check_frequency') ? $self->conf->get('rollover_check_frequency') : 10;
  170. if (($args->{daemonize} or $self->conf->get('daemonize'))){
  171. my $user = $self->conf->get('user') ? $self->conf->get('user') : 'root';
  172. my $group = $self->conf->get('group') ? $self->conf->get('group') : 'root';
  173. my $pid_file = $self->conf->get('pid_file') ? $self->conf->get('pid_file') : '/var/run/streamdb_' . $self->{_ID} . '.pid';
  174. print "Daemonizing...\n";
  175. daemonize($user, $group, $pid_file);
  176. }
  177. return $self;
  178. }
  179. sub _init_db {
  180. my $self = shift;
  181. my ($query, $sth, $row);
  182. # Verify the streamdb table exists
  183. $query = 'SELECT table_name FROM INFORMATION_SCHEMA.tables WHERE table_schema=? AND table_name="streams"';
  184. $sth = $self->db->prepare($query);
  185. $sth->execute($self->{_DB_NAME});
  186. $row = $sth->fetchrow_hashref;
  187. unless ($row){
  188. $self->log->warn("Creating streams table");
  189. $query = <<EOT
  190. CREATE TABLE streams (
  191. offset INT UNSIGNED NOT NULL,
  192. file_id SMALLINT UNSIGNED NOT NULL,
  193. length INT UNSIGNED NOT NULL,
  194. srcip INT UNSIGNED NOT NULL,
  195. srcport MEDIUMINT UNSIGNED NOT NULL,
  196. dstip INT UNSIGNED NOT NULL,
  197. dstport MEDIUMINT UNSIGNED NOT NULL,
  198. timestamp INT UNSIGNED NOT NULL,
  199. duration INT UNSIGNED NOT NULL,
  200. reason ENUM ('c', 'r', 't', 'e', 'l', 'i') NOT NULL,
  201. direction ENUM ('s', 'c') NOT NULL,
  202. PRIMARY KEY (offset, file_id),
  203. KEY (srcip),
  204. KEY (dstip),
  205. KEY (timestamp)
  206. ) ENGINE=MyISAM
  207. EOT
  208. ;
  209. $self->db->do($query) or die('Unable to create table');
  210. }
  211. return 1;
  212. }
  213. sub _open_data_fh {
  214. my $self = shift;
  215. $self->{_DATA_FILE_NAME} = $self->{_DATA_DIR} . '/' . $Prefix . $self->{_DATA_FILE_ID};
  216. my $fh = new IO::File;
  217. $fh->open($self->{_DATA_FILE_NAME}, O_WRONLY|O_APPEND|O_CREAT) or die($!);
  218. $fh->binmode(1);
  219. $fh->autoflush(1);
  220. $fh->print("\0");
  221. $self->{_DATA_FH} = $fh;
  222. $self->log->debug('Using data file ' . $self->{_DATA_FILE_NAME});
  223. $self->_check_table();
  224. return 1;
  225. }
  226. sub conf {
  227. my $self = shift;
  228. return $self->{_CONF};
  229. }
  230. sub log {
  231. my $self = shift;
  232. return $self->{_LOGGER};
  233. }
  234. sub db {
  235. my $self = shift;
  236. return $self->{_DBH};
  237. }
  238. sub run {
  239. my $self = shift;
  240. my ($query, $sth);
  241. # Load any pre-existing tsv's
  242. opendir(DIR, $self->{_BUFFER_DIR});
  243. while (my $short_file = readdir(DIR)){
  244. my $file = $self->{_BUFFER_DIR} . '/' . $short_file;
  245. if ($short_file =~ /^(tcp\-(\d+)\-(\d+)\-(\d+)\-(\w)\-(\d+)\-(\d+\.\d+\.\d+\.\d+)\:(\d+)\w(\d+\.\d+\.\d+\.\d+)\:(\d+))/){
  246. #TODO load existing buffers
  247. # delete existing buffers
  248. $self->log->warn('Deleting leftover buffer ' . $file);
  249. unlink $file;
  250. }
  251. }
  252. closedir(DIR);
  253. $self->_check_rollover();
  254. $self->_open_data_fh();
  255. my $last_ring_errors = 0;
  256. my @to_insert;
  257. $| = 1;
  258. my $cmd = "$self->{_VORTEX} $self->{_OPTIONS} -t $self->{_BUFFER_DIR} 2>&1";
  259. $self->log->debug("cmd: $cmd");
  260. open(FH, "-|", "$cmd");
  261. while (<FH>){
  262. my $line_num = $.;
  263. chomp;
  264. my $file = $_;
  265. #VORTEX_STATS PCAP_RECV: 674814420 PCAP_DROP: 0 VTX_BYTES: 153363091517 VTX_EST: 8245678 VTX_WAIT: 0 VTX_CLOSE_TOT: 8242666 VTX_CLOSE: 5127749 VTX_LIMIT: 1728 VTX_POLL: 0 VTX_TIMOUT: 5 VTX_IDLE: 1355256 VTX_RST: 1757928 VTX_EXIT: 0 VTX_BSF: 0
  266. if ($file =~ /^VORTEX_STATS/){
  267. #TODO implement stats someday
  268. }
  269. #VORTEX_ERRORS TOTAL: 119275 IP_SIZE: 0 IP_FRAG: 0 IP_HDR: 0 IP_SRCRT: 0 TCP_LIMIT: 5 TCP_HDR: 0 TCP_QUE: 119270 TCP_FLAGS: 0 UDP_ALL: 0 SCAN_ALL: 0 VTX_RING: 0 VTX_IO: 0 VTX_MEM: 0 OTHER: 0
  270. elsif ($file =~ /^VORTEX_ERRORS.*VTX_RING: (\d+)/){
  271. my $errors = $1;
  272. if ($errors > $last_ring_errors){
  273. my $new_errors = $errors - $last_ring_errors;
  274. $self->log->error('Dropped ' . $new_errors . ' connections because we could not process them fast enough.');
  275. }
  276. $last_ring_errors = $errors;
  277. }
  278. #{proto}-{connection_serial_number}-{connection_start_time}-{connection_end_time}-{connection_end_reason}-{connection_size}-{client_ip}:{client_port}{direction}{server_ip}:{server_port}
  279. elsif (my ($header, $serial_number, $start, $end, $reason, $length, $srcip, $srcport, $direction, $dstip, $dstport) =
  280. $file =~ /(tcp\-(\d+)\-(\d+)\-(\d+)\-(\w)\-(\d+)\-(\d+\.\d+\.\d+\.\d+)\:(\d+)(\w)(\d+\.\d+\.\d+\.\d+)\:(\d+))/){
  281. my $offset = $self->{_DATA_FH}->tell or die($!);
  282. #my $buf = $header . ' ' . slurp($file);
  283. my $buf = slurp($file);
  284. # Check if we are going to exceed 4GB offset which can't be stored in 32-bit ID in MySQL
  285. if ($offset > ((2**32) - 1)){
  286. $self->_data_file_rollover();
  287. $offset = $self->{_DATA_FH}->tell or die($!);
  288. }
  289. $self->{_DATA_FH}->print($buf) or die($!);
  290. push @to_insert, [ $offset, $self->{_DATA_FILE_ID}, length($buf),
  291. unpack('N*', inet_aton($srcip)), $srcport, unpack('N*', inet_aton($dstip)), $dstport, $start, ($end-$start), '"' . $reason . '"', '"' . $direction . '"' ];
  292. unlink($file);
  293. }
  294. else {
  295. $self->log->warn('Unknown input: ' . $file);
  296. }
  297. # Check to see if it's time to batch insert
  298. if (scalar @to_insert >= $self->{_BATCH_SIZE}){
  299. $self->_insert(\@to_insert);
  300. @to_insert = ();
  301. }
  302. if ($line_num % ($self->{_BATCH_SIZE} * $self->{_ROLLOVER_CHECK}) == 0){ # This is expensive, so we only do it every ROLLOVER_CHECK times
  303. $self->_check_rollover();
  304. #$self->log->trace("line num: $line_num, batch size: $self->{_BATCH_SIZE}, rollover check: $self->{_ROLLOVER_CHECK}, test: " . $. % ($self->{_BATCH_SIZE} * $self->{_ROLLOVER_CHECK}));
  305. }
  306. }
  307. close(FH);
  308. if (scalar @to_insert){
  309. $self->_insert(\@to_insert);
  310. }
  311. }
  312. sub _insert {
  313. my $self = shift;
  314. my $records = shift;
  315. my ($query, $sth);
  316. $query = 'INSERT INTO streams (offset, file_id, length, srcip, srcport, dstip, dstport, timestamp, duration, reason, direction) VALUES';
  317. foreach my $record (@$records){
  318. $query .= '(' . join(',', @$record) . '),';
  319. }
  320. chop($query); #removes the last comma
  321. my $rows = $self->db->do($query);
  322. #$self->log->trace('Inserted ' . $rows . ' rows.');
  323. }
  324. sub _sql_error_handler {
  325. my $errstr = shift;
  326. my $dbh = shift;
  327. my $query = $dbh->{Statement};
  328. my $logger = Log::Log4perl::get_logger("Streamdb");
  329. $errstr = sprintf("SQL ERROR: %s\nQuery: %s\n", $errstr, $query);
  330. $logger->error($errstr);
  331. return 1; # Stops default RaiseError from happening
  332. }
  333. sub _check_table {
  334. my $self = shift;
  335. my ($query, $sth);
  336. # Check to see if we need to create a new table and rollover
  337. if ($self->{_DATA_FILE_ID} and $self->{_DATA_FILE_ID} % $self->{_TABLE_ID_ROLLOVER} == 0){
  338. my $cur_file_id = ($self->{_DATA_FILE_ID} - $self->{_TABLE_ID_ROLLOVER});
  339. my $rolled_table = $Prefix . $cur_file_id;
  340. $query = 'RENAME TABLE streams TO ' . $rolled_table;
  341. $self->log->debug('Rolling streams table to table name : ' . $rolled_table);
  342. $self->db->do($query);
  343. # instantiate the shiny new streams table;
  344. $query = 'CREATE TABLE streams LIKE ' . $rolled_table;
  345. $self->db->do($query);
  346. }
  347. }
  348. sub _data_file_rollover {
  349. my $self = shift;
  350. my ($query, $sth);
  351. # See if we need a new data file
  352. $self->log->info('Rolling over data file.');
  353. $self->{_DATA_FILE_ID}++;
  354. # Make sure we're under SMALLINT
  355. if ($self->{_DATA_FILE_ID} >= 2**16){
  356. $self->log->info(q{Congratulations! You've created 65535 files, wrapping around to 1!});
  357. $self->{_DATA_FILE_ID} = 1;
  358. }
  359. # Start the new fh
  360. $self->{_DATA_FH}->close();
  361. $self->_open_data_fh();
  362. return 1;
  363. }
  364. sub _check_rollover {
  365. my $self = shift;
  366. my ($query, $sth);
  367. #$self->log->trace('checking rollover');
  368. # Do we need to delete the oldest?
  369. $query = 'SELECT COUNT(*) AS count FROM INFORMATION_SCHEMA.tables WHERE table_schema=? AND table_name LIKE "' . $Prefix . '%"';
  370. $sth = $self->db->prepare($query);
  371. $sth->execute($self->{_DB_NAME});
  372. my $row = $sth->fetchrow_hashref;
  373. my $num_tables = $row->{count};
  374. if ($num_tables > $Num_tables){
  375. $query = 'SELECT table_name FROM INFORMATION_SCHEMA.tables WHERE table_schema=? ' . "\n" .
  376. 'AND table_name LIKE "' . $Prefix . '%" ORDER BY create_time ASC LIMIT 1';
  377. $sth = $self->db->prepare($query);
  378. $sth->execute($self->{_DB_NAME});
  379. $row = $sth->fetchrow_hashref;
  380. my $table_name = $row->{table_name};
  381. $query = 'DROP TABLE ' . $table_name;
  382. $self->db->do($query);
  383. $self->log->info('Dropped oldest table ' . $table_name);
  384. # Drop corresponding files
  385. $table_name =~ /^$Prefix(\d+)$/;
  386. my $file_id = $1;
  387. opendir(DIR, $self->conf->get('data_dir'));
  388. while (my $short_file = readdir(DIR)){
  389. if ($short_file =~ /^$Prefix(\d+)$/o){
  390. my $part_id = $1;
  391. if ($part_id >= $file_id and $part_id < ($file_id + $self->{_TABLE_ID_ROLLOVER})){
  392. my $data_file_name = $self->conf->get('data_dir') . '/' . $Prefix . $part_id;
  393. $self->log->info('Dropping data file ' . $data_file_name);
  394. unlink $data_file_name;
  395. }
  396. }
  397. }
  398. close(DIR);
  399. }
  400. }
  401. sub _get_db_size {
  402. my $self = shift;
  403. my ($query, $sth);
  404. $query = 'SELECT data_length+index_length AS size FROM INFORMATION_SCHEMA.tables WHERE table_schema=? AND table_name="streams"';
  405. $sth = $self->db->prepare($query);
  406. $sth->execute($self->{_DB_NAME});
  407. my $row = $sth->fetchrow_hashref;
  408. return $row->{size};
  409. }
  410. sub _get_files_size {
  411. my $self = shift;
  412. my $files_size = 0;
  413. opendir(DIR, $self->{_DATA_DIR});
  414. while (my $short_file = readdir(DIR)){
  415. next unless $short_file =~ /^$Prefix/;
  416. my $file = $self->{_DATA_DIR} . '/' . $short_file;
  417. $files_size += -s $file;
  418. }
  419. close(DIR);
  420. return $files_size;
  421. }
  422. 1;
  423. __END__