PageRenderTime 59ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/isdn4k-utils-CVS-2010-05-01-patched/isdnlog/tools/zone/redzone

#
Perl | 546 lines | 500 code | 11 blank | 35 comment | 24 complexity | ffc43b667aae45ad2b4f085a5dcac37a MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, CC-BY-SA-3.0
  1. #!/usr/bin/perl
  2. # redzone - reduce a zone file
  3. # Copyright 1999 by Leopold Toetsch <lt@toetsch.at>
  4. #
  5. # This program is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation; either version 2, or (at your option)
  8. # any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. use strict;
  15. use Getopt::Std;
  16. # some global options vars
  17. my ($inf, $outf, $rc,$verbose, $normalize, $newinf, $oldinf, $keep_files, $LEN);
  18. my ($opt_only);
  19. # statistics
  20. my (@rem, $tot);
  21. my($OP) = 1;
  22. my $LINK = 127; # maxzone
  23. &getargs;
  24. $| = 1;
  25. &go;
  26. 1;
  27. #======= subs
  28. # get commandline args
  29. #
  30. sub getargs {
  31. my(%opt);
  32. push(@ARGV,'-V');
  33. $normalize = 0; # defaultvalues to keep the compiler happy
  34. $keep_files = 0;
  35. $rc = 4;
  36. $verbose = 2;
  37. $LEN = 4;
  38. getopt('z:r:v:nl:ko', \%opt);
  39. $inf = $opt{'z'};
  40. $outf = $opt{'r'};
  41. $verbose = $opt{'v'} if($opt{'v'} ne '');
  42. $normalize=1 if($opt{'n'});
  43. $LEN = $opt{'l'} if ($opt{'l'});
  44. $keep_files=1 if(defined $opt{'k'});
  45. $opt_only=1 if(defined $opt{'o'});
  46. $rc = $LEN-1;
  47. &usage unless($inf && $outf);
  48. }
  49. # don't remember, what this sub is for
  50. sub usage {
  51. print "$0 -z infile -r outfile [ -v verboselevel ] [ -n ] [ -l len ] [ -k ]\n";
  52. print "\tDefaults:\n";
  53. print "\tverboselevel = 2\n\tnormalize = NO (aussume is already)\n\tlen = 4\n\tkeep inetermediate files = NO\n";
  54. exit 1;
  55. }
  56. # main routine
  57. sub go {
  58. #
  59. # first check, we can read and write
  60. #
  61. my ($i);
  62. for ($i=0; $i<20; $i++) {
  63. $rem[$i] = 0;
  64. }
  65. open(IN, "$inf") or die("Can't read $inf");
  66. open(OUT, ">$outf") or die("Can't write $outf");
  67. #
  68. # if data are not normalized i.e. sorted ascending and from < to
  69. # then we do it here
  70. #
  71. if ($normalize) {
  72. print "Normalizing ...\n" if ($verbose);
  73. &normalize;
  74. }
  75. else {
  76. $tot = `wc --lines $inf`;
  77. $tot =~ /(\d+)\s/;
  78. $tot = $1;
  79. }
  80. printf "%d initial records\n", $tot if($verbose);
  81. #
  82. # data are prepared now, let's do the real work
  83. #
  84. if($opt_only) {
  85. &optimize;
  86. }
  87. else {
  88. &reduce;
  89. &optimize;
  90. }
  91. &clean_up unless($keep_files);
  92. if ($verbose) {
  93. my $rem = $rem[$OP + 10];
  94. my $redt = $tot-$rem;
  95. my $perc = $tot?($redt)/$tot*100:0;
  96. print "Finito:\t$redt of $tot data where eliminated\n";
  97. printf "\tThis is a reduction of %5.2f %%\n", $perc;
  98. if ($verbose > 1) {
  99. my ($ab, $r);
  100. print "\nDetails\n";
  101. printf "Total records\t%6d\n", $tot;
  102. foreach $ab ('b','o') {
  103. for ($i=1; $i<=$rc; $i++) {
  104. $r = $rem[$i + 10*($ab eq 'o')];
  105. printf "Pass %s-%d\t%6d\n", $ab, $i, $r;
  106. last if($ab eq 'o' and $i >= $OP);
  107. }
  108. }
  109. printf "Remaing recs\t%6d\n", $r;
  110. }
  111. }
  112. }
  113. sub clean_up {
  114. system("rm $inf.{a,b,n}* t1~ t2~ 2>/dev/null");
  115. }
  116. ## sort data correctly
  117. #
  118. sub normalize {
  119. my($from, $to, $z, $i);
  120. $i=0;
  121. while (<IN>) {
  122. chomp;
  123. ($from, $to, $z) = split(/\s+/);
  124. $from .= 'X' x ($LEN-length($from));
  125. $to .= 'X' x ($LEN-length($to));
  126. ($to, $from) = ($from, $to) if ($from gt $to);
  127. print OUT "$from $to $z\n";
  128. print STDERR "$i\r" if ($verbose >= 2 && $i%1000==0);
  129. $i++;
  130. }
  131. $tot=$i;
  132. sort_them('n0');
  133. }
  134. ## open a new infile, outfile
  135. # also deletes previous infile
  136. #
  137. sub open_new {
  138. my $f = shift;
  139. close(IN);
  140. close(OUT);
  141. unlink($oldinf) if ($oldinf && ! $keep_files);
  142. $oldinf = $newinf;
  143. $newinf = $f;
  144. unlink($newinf);
  145. rename($outf, $newinf) || die ("Can't rename $outf => $newinf");
  146. open(IN, "$newinf") || die("Can't read $newinf");
  147. open(OUT, ">$outf") or die("Can't write $outf");
  148. print "mv $outf $newinf\t open(r,$newinf)\t open(w,$outf)\n" if($verbose>2);
  149. }
  150. # reducing looks like this
  151. # a b z
  152. # 1234 2345 1
  153. # 1234 2346 1
  154. # 1234 2347 2
  155. #
  156. # 1234 2347 2
  157. # 1234 234X 1
  158. sub sort_them {
  159. my($x) = $_[0];
  160. open_new("$inf.$x.sort");
  161. close(IN);
  162. close(OUT);
  163. my($pass)=substr($x,1);
  164. print "Sorting ...\n" if ($verbose);
  165. if ($pass eq $rc) {
  166. system(qq(export TMPDIR=.;sort < $newinf | sed -e"s/X\\+//g" > $outf));
  167. }
  168. else {
  169. if ($x eq 'n0') {
  170. system(qq(export TMPDIR=.;sort < $newinf | uniq > $outf));
  171. }
  172. else {
  173. system(qq(export TMPDIR=.;sort < $newinf > $outf));
  174. }
  175. }
  176. open_new("$inf.$x.sorted");
  177. }
  178. sub reduce {
  179. my ($pass);
  180. for ($pass = 1; $pass <= $rc; $pass++) {
  181. &reduce_2($pass);
  182. sort_them('b'.$pass);
  183. }
  184. }
  185. sub reduce_2 {
  186. my ($pass) = $_[0];
  187. my($from, $to, $z, $i, $old, $olda, $j, $k, $rem);
  188. my (@from, @to, @z, %zc, $redc, $eof, $line, $oldto);
  189. my ($which) = $LEN-$pass;
  190. print "Starting Pass b-$pass ...\n" if ($verbose);
  191. $old = $olda = '';
  192. my $XXX = 'X' x $pass;
  193. $rem = 0;
  194. $i=0;
  195. while (1) {
  196. print STDERR "$i $rem\r" if ($verbose == 2 && $i%1000==0);
  197. $i++;
  198. if (!$eof) {
  199. $eof = 1 unless defined ($line = <IN>);
  200. }
  201. if (!$eof) {
  202. chomp($line);
  203. ($from, $to, $z) = split(/\s+/, $line);
  204. print "R: '$from' '$to' '$z'\n" if($verbose>=4);
  205. $to .= 'X' x ($LEN-length($to)) if($pass==1);
  206. #
  207. # read one bunch with same <pass> digs at <LEN-pass> for constant <a>
  208. #
  209. # if ($pass > 1 && $to !~ /${XXX}$/) { # exception
  210. # print OUT "$from $to $z\n";
  211. # $rem++;
  212. # next;
  213. # }
  214. if ((substr($to, $which-1, $pass+1) =~ /^$old$/ || $old eq '') &&
  215. ($olda eq $from || $olda eq '') &&
  216. ($LEN-$pass-1 <= 0 ||
  217. substr($to, 0, $LEN-$pass-1) eq substr($oldto,0, $LEN-$pass-1) ||
  218. $oldto eq '')) {
  219. push(@from, $from);
  220. push(@to, $to);
  221. push(@z, $z);
  222. $old = substr($to, $which-1, 1) . '[X\d]' x $pass;
  223. $olda = $from;
  224. $oldto = $to;
  225. next;
  226. }
  227. } # not eof
  228. #
  229. # now we have some data, what is the most used zone in them
  230. #
  231. my $n = @to;
  232. last unless $n;
  233. %zc = ();
  234. foreach (@z) {
  235. $zc{$_}++;
  236. }
  237. $redc = ((sort {$zc{$b} <=> $zc{$a} } (keys(%zc)))[0]);
  238. print "Got $n: ($from[0] $to[0] - $to[$#to]) Red $redc Old '$old'\n" if ($verbose >= 3);
  239. print "There are ",scalar(keys(%zc))," zones\n" if($verbose>=3);
  240. #if there is a shorter one than this is the default
  241. if (scalar(keys(%zc)) == 1) { # write shortcut
  242. $k=0;
  243. substr($to[$k], $which, $pass) = 'X' x $pass;
  244. print OUT "$from[$k] $to[$k] $z[$k]\n";
  245. $rem++;
  246. }
  247. else {
  248. my ($l);
  249. $l = $LEN-$pass+1;
  250. for ($j=1 ;$j < $n; $j++) {
  251. $to[$j] =~ /^\d+/;
  252. if (length($&) == $LEN-$pass) {
  253. $l = length($&);
  254. $redc = $z[$j];
  255. print "But '$from[$j] $to[$j]' is shorter Red $redc now\n" if ($verbose >= 3);
  256. }
  257. }
  258. $k=-1;
  259. for ($j=0 ;$j < $n; $j++) {
  260. $to[$j] =~ /^\d+/;
  261. if ($z[$j] == $redc && length($&) == $l) {
  262. $k=$j;
  263. next;
  264. }
  265. print OUT "$from[$j] $to[$j] $z[$j]\n";
  266. $rem++;
  267. }
  268. # now write the default
  269. if ($k >= 0) {
  270. substr($to[$k], $which, $pass) = 'X' x $pass;
  271. print OUT "$from[$k] $to[$k] $z[$k]\n";
  272. $rem++;
  273. }
  274. }
  275. # clean up & init for next bunch
  276. @from = @to = @z = ();
  277. push(@from, $from); # these we have already read
  278. push(@to, $to);
  279. push(@z, $z);
  280. if (length($to) > $which) {
  281. $old = substr($to, $which-1, 1) . '[X\d]' x $pass;
  282. }
  283. $olda = $from;
  284. $oldto = $to;
  285. # are we ready?
  286. last if ($eof);
  287. } # while
  288. $rem[$pass] = $rem;
  289. print "Pass b-$pass: $rem remaining\n" if ($verbose);
  290. }
  291. sub optimize {
  292. my ($pass);
  293. for ($pass = 1; $pass <= $OP; $pass++) {
  294. &optimize_2($pass);
  295. open_new("$inf.a-${pass}p");
  296. &sort_opt($pass);
  297. open_new("$inf.a-${pass}s") if ($pass < $OP);
  298. }
  299. }
  300. sub sort_opt {
  301. my($pass) = $_[0];
  302. my ($from, $to, $z);
  303. print "Sorting ...\n" if($verbose);
  304. while (<IN>) {
  305. chomp;
  306. ($from, $to, $z) = split(/ /);
  307. $from .= 'X' x ($LEN-length($from)); # sort shorter after others
  308. $to .= 'X' x ($LEN-length($to));
  309. $to = "X$to" if ($z eq $LINK); # sort link after others
  310. print OUT "$from $to $z\n";
  311. }
  312. close(IN);
  313. close(OUT);
  314. $newinf = "$inf.a-${pass}q";
  315. rename($outf, $newinf);
  316. system(qq(sort < $newinf |uniq | sed -e"s/X\\+//g" > $outf));
  317. my ($rem, $wc);
  318. $wc = `wc --lines $outf`;
  319. $wc =~ /(\d+)\s/;
  320. $rem = $1;
  321. print "Pass o-$pass: $rem remaining\n" if ($verbose);
  322. $rem[$pass + 10] = $rem;
  323. }
  324. sub optimize_2 {
  325. my ($pass) = $_[0];
  326. my ($from, $to, $z, $i, $old, $oldfr1, $oldfr2, $j, $rem, $k, $jj);
  327. my (@from, @to, @z, %zc, $redc, $eof, $line, $stopped);
  328. my (@fr1, @to1, @z1);
  329. my (@fr2, @to2, @z2, %used1, %used2, %toprint);
  330. print "Starting Pass o-$pass ...\n" if ($verbose);
  331. $old = $oldfr1 = $oldfr2 = '';
  332. $rem = 0;
  333. $i = 0;
  334. while (1) {
  335. print STDERR "$i $rem\r" if ($verbose == 2 && $i%100==0);
  336. $i++;
  337. if (!$eof) {
  338. $eof = 1 unless defined ($line = <IN>);
  339. }
  340. if (!$eof) {
  341. chomp($line);
  342. ($from, $to, $z) = split(/\s+/, $line);
  343. #
  344. # read one bunch with same digs at begin and same len
  345. #
  346. if ($old eq '' ||
  347. (substr($from, 0 ,length($old)) eq $old &&
  348. length($from)==length($old)+1)) {
  349. push(@from, $from);
  350. push(@to, $to);
  351. push(@z, $z);
  352. $toprint{"$from $to $z"}=1;
  353. $old = substr($from,0, length($from)-1);
  354. next;
  355. }
  356. } # not eof
  357. $oldfr1 = $from[0];
  358. my ($next1, $next2, %udif, %short);
  359. $stopped = 0;
  360. $next1 = 0;
  361. push(@from,'end'); # for the loop to finish
  362. OUTER:
  363. for ($jj = 0; $jj < @from; $jj++) {
  364. if ($from[$jj] eq $oldfr1) {
  365. if ($from[$jj] ne 'end') {
  366. push(@fr1, substr($from[$jj],0,length($old)));
  367. push(@to1, $to[$jj]);
  368. push(@z1, $z[$jj]);
  369. }
  370. }
  371. else {
  372. $next2 = $jj;
  373. $oldfr2 = $from[$next2];
  374. print "Now Outer $from[$next1]\n" if($verbose==4);
  375. goto N1 if ($used2{$from[$next1]}); # this has already a link
  376. open(T1, ">t1~") || die("cant write t1~");
  377. for ($k=0; $k < @to1; $k++) {
  378. print T1 "$fr1[$k] $to1[$k] $z1[$k]\n";
  379. }
  380. close(T1);
  381. INNER:
  382. for ($j = $next2; $j < @from; $j++) {
  383. if ($from[$j] eq $oldfr2) {
  384. if ($from[$j] ne 'end') {
  385. push(@fr2, substr($from[$j],0,length($old)));
  386. push(@to2, $to[$j]);
  387. push(@z2, $z[$j]);
  388. }
  389. }
  390. else {
  391. print "Now Inner $from[$next2]\n" if($verbose==4);
  392. print "Diffs $oldfr1 - $oldfr2=\n" if($verbose==3);
  393. print "Diffs $oldfr1 - $oldfr2=\n1:@to1\n2:@to2\n" if($verbose==4);
  394. $oldfr2 = $from[$j];
  395. open(T2, ">t2~") || die("cant write t2~");
  396. for ($k=0; $k<@to2; $k++) {
  397. print T2 "$fr2[$k] $to2[$k] $z2[$k]\n";
  398. }
  399. close(T2);
  400. my(@difls) = `diff -U0 t1~ t2~`;
  401. print "Are:@difls\n" if($verbose==4);
  402. my($add);
  403. $add=1;
  404. foreach (@difls) {
  405. my($l1, $c1, $l2, $c2, $l, $p);
  406. if (/@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?/) {
  407. ($l1, $c1, $l2, $c2) = ($1,$3,$4,$6);
  408. $c1 = !defined $c1 ? 1 : $c1;
  409. $c2 = !defined $c2 ? 1 : $c2;
  410. foreach ($l=-1; $l<$c1-1; $l++) {
  411. # now look, what zone is this in @to2
  412. # and write it
  413. my ($m, $n, $t, $q, $o);
  414. $t = $to[$next1+$l1+$l];
  415. # $add++;
  416. NL: while(length($t)) {
  417. for ($m=0; $m<@to2; $m++) {
  418. if ($to2[$m] eq $t) {
  419. $q="$from[$next2] $to[$next1+$l1+$l] $z2[$m]";
  420. $udif{$q}=1 if($z2[$m] ne $z[$next1+$l1+$l]);
  421. # and all longer that match
  422. $t = $to[$next1+$l1+$l];
  423. for ($o = 0; $o < @to2; $o++) {
  424. if ($t ne $to2[$o] && $to2[$o] =~ /^$t/) {
  425. $udif{"$from[$next2] $to2[$o] $z2[$o]"}=1;
  426. print "\tAnd longer2 $from[$next2] $to2[$o] $z2[$o]\n" if($verbose==4);
  427. }
  428. }
  429. last NL;
  430. }
  431. } # for m
  432. $t = substr($t,0,length($t)-1);
  433. } # while
  434. if ($verbose==4) {
  435. $p="$from[$next1] $to[$next1+$l1+$l] $z[$next1+$l1+$l]";
  436. print "\t$p = $q\n";
  437. }
  438. }
  439. if($c2) {
  440. foreach ($l=-1; $l<$c2-1; $l++) {
  441. my ($m, $t);
  442. $t = $to[$next2+$l2+$l];
  443. $udif{"$from[$next2] $t $z[$next2+$l2+$l]"}=1;
  444. if ($verbose==4) {
  445. $p="$from[$next2] $t $z[$next2+$l2+$l]";
  446. print "\t$p\n";
  447. }
  448. for ($m = 0; $m < @to2; $m++) {
  449. if ($t ne $to2[$m] && $to2[$m] =~ /^$t/) {
  450. $udif{"$from[$next2] $to2[$m] $z2[$m]"}=1;
  451. print "\tAnd longer $from[$next2] $to2[$m] $z2[$m]\n" if($verbose==4);
  452. }
  453. }
  454. # also write all longer matching
  455. } #foreach
  456. } #if c2
  457. } #if
  458. } # foreach difls
  459. if (keys(%udif)+$add < @fr2) {
  460. print "Used $from[$next1] $from[$next2]\n" if($verbose==4);
  461. my %found;
  462. my $p;
  463. if (!$used1{$from[$next1]}) {
  464. for ($k=0; $k < @fr1; $k++) {
  465. $p="$from[$next1+$k] $to1[$k] $z1[$k]";
  466. print OUT "$p\n";
  467. $found{$to1[$k]}=1;
  468. delete $toprint{$p};
  469. }
  470. }
  471. $used1{$from[$next1]}=1;
  472. $used2{$from[$next2]}=1;
  473. for ($k=0; $k < @fr2; $k++) {
  474. delete $toprint{"$from[$next2+$k] $to2[$k] $z2[$k]"};
  475. }
  476. print OUT "$from[$next2] $from[$next1] $LINK\n";
  477. foreach $k (keys(%udif)) {
  478. print OUT "$k\n";
  479. }
  480. #goto N1;
  481. }
  482. else {
  483. print "Too many diffs $from[$next1] $from[$next2]\n" if($verbose>2);
  484. }
  485. N2:
  486. %udif = ();
  487. @fr2 = @to2 = @z2 = ();
  488. push(@fr2, substr($from[$j],0,length($old)));
  489. push(@to2, $to[$j]);
  490. push(@z2, $z[$j]);
  491. $next2 = $j;
  492. } # else
  493. } # for $j
  494. N1:
  495. %udif = ();
  496. foreach $k (keys(%toprint)) {
  497. print OUT "$k\n";
  498. $rem++;
  499. }
  500. %toprint = ();
  501. @fr2 = @to2 = @z2 = ();
  502. @fr1 = @to1 = @z1 = ();
  503. if ($from[$jj] ne 'end') {
  504. push(@fr1, substr($from[$jj],0,length($old)));
  505. push(@to1, $to[$jj]);
  506. push(@z1, $z[$jj]);
  507. }
  508. $oldfr1 = $from[$jj];
  509. $next1 = $jj;
  510. } # else
  511. } # for jj
  512. @fr2 = @to2 = @z2 = ();
  513. @fr1 = @to1 = @z1 = ();
  514. %used1 = %used2 = %toprint = ();
  515. $oldfr1 = $oldfr2 = '';
  516. # clean up & init for next bunch
  517. @from = @to = @z = ();
  518. push(@from, $from); # these we have already red
  519. push(@to, $to);
  520. push(@z, $z);
  521. $toprint{"$from $to $z"}=1;
  522. $old = '';
  523. last if($eof);
  524. } # while
  525. } # optimize