PageRenderTime 52ms CodeModel.GetById 6ms RepoModel.GetById 0ms app.codeStats 0ms

/perl/attic/writer.pl

http://github.com/quartzjer/TeleHash
Perl | 451 lines | 342 code | 41 blank | 68 comment | 78 complexity | 06a3f37a2cdc090f9b7f7190e4572521 MD5 | raw file
  1. #!/usr/bin/perl
  2. # a prototype telehash writer
  3. # Jer 1/2010
  4. use Digest::SHA1 qw(sha1_hex);
  5. use Data::Dumper;
  6. use IO::Select;
  7. use Socket;
  8. use JSON::DWIW;
  9. my $json = JSON::DWIW->new;
  10. require "./bixor.pl"; # temp testing hack
  11. # defaults to listen on any ip and random port
  12. my $port = $ARGV[0]||0;
  13. my $ip = "0.0.0.0";
  14. my $seed = $ARGV[1]||"telehash.org:42424";
  15. $iaddr = gethostbyname($ip);
  16. $proto = getprotobyname('udp');
  17. $paddr = sockaddr_in($port, $iaddr);
  18. socket(SOCKET, PF_INET, SOCK_DGRAM, $proto) or die "socket: $!";
  19. bind(SOCKET, $paddr) or die "bind: $!";
  20. $sel = IO::Select->new();
  21. $sel->add(\*SOCKET);
  22. # resolve our seed to its ip:port
  23. my($seedhost,$seedport) = split(":",$seed);
  24. my $seedip = gethostbyname($seedhost);
  25. my $seedipp = sprintf("%s:%d",inet_ntoa($seedip),$seedport);
  26. my %lines; # static line assignments per writer
  27. my %ends; # any end hashes that we've handled
  28. my @history; # last however many telexes (containing signals)
  29. my %forwards; # writers with active .fwd going
  30. my $buff;
  31. $|++;
  32. my $ipp, $ipphash;
  33. my $connected=undef;
  34. # track ip/ports to prevent excessive flooding
  35. my %fw_ips; # track ips;
  36. my $fw_tps = 20; # max telex per sec
  37. my $fw_window = 5; # seconds per window
  38. my $fw_last = int(time);
  39. my %fw_karma; # whitelisting
  40. my $buckets = bucket_init(); # big array of buckets
  41. my $lastloop = int(time);
  42. while(1)
  43. {
  44. # if we're not online, attempt to talk to the seed
  45. bootstrap($seedipp) if(!$connected);
  46. # wait for event or timeout loop
  47. my $newmsg = scalar $sel->can_read(10);
  48. if($newmsg == 0 || $lastloop+10 < int(time))
  49. {
  50. printf "LOOP\n";
  51. $lastloop = int(time);
  52. tscan();
  53. next if($newmsg == 0); # timeout loop
  54. }
  55. # must be a telex waiting for us
  56. $connected = 1;
  57. my $caddr = recv(SOCKET, $buff, 8192, 0) || die("recv $!");
  58. # figure out who sent it
  59. ($cport, $addr) = sockaddr_in($caddr);
  60. my $writer = sprintf("%s:%d",inet_ntoa($addr),$cport);
  61. # drop if this sender is flooding us
  62. next if(floodwall($writer));
  63. printf "RECV[%s]\t%s\n",$writer,$buff;
  64. # json parse check
  65. my $j = $json->from_json($buff) || next;
  66. # FIRST, if we're bootstrapping, discover our own ip:port
  67. if(!$ipp && $j->{"_to"})
  68. {
  69. printf "SELF[%s]\n",$j->{"_to"};
  70. $ipp = $j->{"_to"};
  71. $ipphash = sha1_hex($ipp);
  72. # WE are the seed, haha, remove our own line and skip
  73. if($ipp eq $writer)
  74. {
  75. delete $lines{$ipp};
  76. next;
  77. }
  78. }
  79. # if this is a writer we know, check a few things
  80. my $line = getline($writer);
  81. # keep track of when we've last got stuff from them
  82. $line->{"last"} = time();
  83. # check to see if the _line matches or the _ring matches
  84. if($line->{"open"} > 0 && ($line->{"open"} != $j->{"_line"} || ($j->{"_ring"} > 0 && $line->{"open"} % $j->{"_ring"} != 0)))
  85. {
  86. print "LINE MISMATCH!\n";
  87. next;
  88. }
  89. # todo, should have lineto and linefrom open semantics for status checking on cmds
  90. if(!$line->{"open"} && ($j->{"_line"} || $j->{"_ring"}))
  91. {
  92. $line->{"open"} = $j->{"_line"} if($j->{"_line"} % $line->{"ring"} == 0); # verify their line is a product of our ring
  93. $line->{"open"} = int($j->{"_ring"} * $line->{"ring"}) if($j->{"_ring"}); # create a new line as a product of our ring
  94. bucket_see($writer,$buckets) if($line->{"open"}); # make sure they get added to a bucket too?
  95. }
  96. # first process all commands
  97. # they want recent telexes matching these signals (at least one matching signal required)
  98. if($j->{".hist"} && scalar grep(/^[[:alnum:]]+/, keys %$j) > 0)
  99. {
  100. my $hist = $j->{".hist"};
  101. # sanitize hist request
  102. my %hists = map {$_ => $hist->{$_}} grep(/^[[:alnum:]]+/, keys %$hist);
  103. # loop through all history new to old to find any matches
  104. for my $t (@history)
  105. {
  106. # first make sure any of the requested signals exist
  107. my @sigs = grep($t->{$_},keys %hists);
  108. next unless(scalar @sigs > 0);
  109. next unless(tmatch($j,$t));
  110. # deduct from the hist request
  111. for my $sig (grep(/^[[:alnum:]]+/, keys %$t))
  112. {
  113. $hists{$sig}-- if($hists{$sig});
  114. delete $hists{$sig} if($hists{$sig} <= 0);
  115. }
  116. # send them a copy
  117. tsend(tnew($writer,$t));
  118. # see if their request is used up
  119. last unless(scalar keys %hists > 0);
  120. }
  121. }
  122. # a request to send a .nat to a writer that we should know (and only from writers we have a line to)
  123. if($j->{".natr"} && $lines{$j->{".natr"}} && $j->{"_line"})
  124. {
  125. my $jo = tnew($j->{".natr"});
  126. $jo->{".nat"} = $writer;
  127. tsend($jo);
  128. }
  129. # we're asked to send something to this ip:port to open a nat
  130. if($j->{".nat"} && $j->{"_line"})
  131. {
  132. tsend(tnew($j->{".nat"}));
  133. }
  134. # we've been told to talk to these writers
  135. if($j->{".see"} && $line->{"open"})
  136. {
  137. # loop through and establish lines to them (just being dumb for now and trying everyone)
  138. for my $seeipp (@{$j->{".see"}})
  139. {
  140. next if($seeipp eq $ipp); # skip ourselves :)
  141. next if($lines{$seeipp}); # skip if we know them already
  142. # XXX if we're dialing we'd want to use any of these closer to that end
  143. # also check to see if we want them in a bucket
  144. if(bucket_see($seeipp,$buckets))
  145. {
  146. tsend(tnew($seeipp)); # send direct (should open our outgoing to them)
  147. # send nat request back to the writer who .see'd us in case the new one is behind a nat
  148. my $jo = tnew($writer);
  149. $jo->{".natr"} = $seeipp;
  150. tsend($jo);
  151. }
  152. }
  153. }
  154. # handle a fwd command, must be verified
  155. if($j->{".fwd"} && $j->{"_line"})
  156. {
  157. # sanitize, clean the .fwd and create a telex of just the signals and .fwd to store
  158. my $fwd = $j->{".fwd"};
  159. my %fwds = map {$_ => ($fwd->{$_}>100)?100:int($fwd->{$_})} grep(/^[[:alnum:]]+/, keys %$fwd);
  160. my %t = map { $_ => $j->{$_} } grep(/^[[:alnum:]]+/, keys %$j);
  161. # only accept it if there's at least one signal to filter on
  162. if(scalar keys %t > 0)
  163. {
  164. $t{".fwd"} = \%fwds;
  165. $forwards{$writer} = \%t; # always replace any existing
  166. my $jo = tnew($writer);
  167. $jo->{"fwds"} = \%fwds; # just confirm whatever they sent for now
  168. tsend($jo);
  169. }
  170. }
  171. # now process signals, if any
  172. next unless(grep(/^[[:alnum:]]+/,keys %$j));
  173. # a request to find other writers near this end hash
  174. if($j->{"end"})
  175. {
  176. # get writers from buckets near to this end
  177. my $cipps = bucket_near($j->{"end"},$buckets);
  178. my $jo = tnew($writer);
  179. $jo->{".see"} = $cipps;
  180. tsend($jo);
  181. # if we're the closest, we should try to cache this in the history longer
  182. if(bix_str($ckeys[0]) eq $ipphash)
  183. {
  184. printf("closest!\n");
  185. }
  186. }
  187. # check for any active forwards (todo: optimize the matching, this is just brute force)
  188. for my $w (keys %forwards)
  189. {
  190. my $t = $forwards{$w};
  191. print Dumper($t);
  192. next unless(tmatch($t,$j));
  193. print "1";
  194. my $fwd = $t->{".fwd"};
  195. my @sigs = grep($j->{$_},keys %$fwd);
  196. next unless(scalar @sigs > 0);
  197. # deduct from the fwd
  198. print "2";
  199. for my $sig (grep(/^[[:alnum:]]+/, keys %$j))
  200. {
  201. $fwd->{$sig}-- if($fwd->{$sig});
  202. delete $fwd->{$sig} if($fwd->{$sig} <= 0);
  203. }
  204. print "3";
  205. # send them a copy
  206. my $jo = tnew($w,$j);
  207. $jo->{".fwd"} = $t->{".fwd"};
  208. tsend($jo);
  209. # see if the .fwd is used up
  210. if(scalar keys %$fwd == 0)
  211. {
  212. delete $forwards{$w};
  213. }
  214. }
  215. # cache in history if there's any signals, max 1000
  216. next unless(scalar grep(/^[[:alnum:]]+/, keys %$j) > 0);
  217. $j->{"at"} = time() unless($j->{"at"}); # make sure an at signal is set
  218. unshift(@history,$j);
  219. @history = splice(@history,0,1000);
  220. }
  221. # for creating and tracking lines to writers
  222. sub getline
  223. {
  224. my $writer = shift;
  225. if(!$lines{$writer})
  226. {
  227. printf "LINE[%s]\n",$writer;
  228. $lines{$writer} = { "ring" => int(rand(32768)), "first" => time(), "last" => time() };
  229. }
  230. return $lines{$writer};
  231. }
  232. # create a new telex
  233. sub tnew
  234. {
  235. my $to = shift;
  236. my $clone = shift;
  237. my $js = {};
  238. # if there's a telex sent, clone all signals from it
  239. for my $sig (grep(/^[[:alnum:]]+/, keys %$clone))
  240. {
  241. $js->{$sig} = $clone->{$sig};
  242. }
  243. my $line = getline($to);
  244. # if a line is open use that, else send a ring
  245. if($line->{"open"})
  246. {
  247. $js->{"_line"} = int($line->{"open"});
  248. }else{
  249. $js->{"_ring"} = int($line->{"ring"});
  250. }
  251. $js->{"_to"} = $to;
  252. return $js;
  253. }
  254. # actually send a telex to its writer
  255. sub tsend
  256. {
  257. my $j = shift;
  258. my($ip,$port) = split(":",$j->{"_to"});
  259. my $wip = gethostbyname($ip);
  260. return unless($wip); # bad ip?
  261. my $waddr = sockaddr_in($port,$wip);
  262. return unless($waddr); # bad port?
  263. my $js = $json->to_json($j);
  264. printf "SEND[%s]\t%s\n",$j->{"_to"},$js;
  265. if(!defined(send(SOCKET, $js, 0, $waddr)))
  266. {
  267. $ipp=$connected=undef;
  268. printf "OFFLINE\n";
  269. }
  270. }
  271. # see if the second telex is a match or superset of the first's signals
  272. sub tmatch
  273. {
  274. my $t1 = shift;
  275. my $t2 = shift;
  276. my @sigs = grep(/^[[:alnum:]]+/, keys %$t1);
  277. my @match = grep {$t2->{$_} eq $t1->{$_}} @sigs;
  278. return (scalar @sigs == scalar @match);
  279. }
  280. # scan all known writers to keep any nat's open
  281. sub tscan
  282. {
  283. my $at = time();
  284. my @writers = keys %lines;
  285. for my $writer (@writers)
  286. {
  287. next if($writer eq $ipp); # ??
  288. delete $lines{$writer}->{"open"} if($at - $lines{$writer}->{"last"} > 300); # remove open line status if older than 5min
  289. if($at - $lines{$writer}->{"last"} > 600)
  290. { # remove them if they are stale, timed out
  291. printf "PURGE[%s]\n",$writer;
  292. $lines{$writer} = undef;
  293. delete $lines{$writer};
  294. next;
  295. }
  296. my $jo = tnew($writer);
  297. $jo->{"end"} = sha1_hex($ipp);
  298. tsend($jo);
  299. }
  300. if(scalar keys %lines == 0)
  301. {
  302. $ipp=$connected=undef;
  303. printf "OFFLINE\n";
  304. }
  305. }
  306. sub getend
  307. {
  308. my $hash = shift;
  309. return $ends{$hash} if($ends{$hash});
  310. return $ends{$hash} = {"fwds"=>{}};
  311. }
  312. # send a hello to the seed
  313. sub bootstrap
  314. {
  315. my $seed = shift;
  316. my $jo = tnew($seed);
  317. # make sure the hash is really far away so they .see us back a bunch
  318. $jo->{"end"} = bix_str(bix_far(bix_new(sha1_hex($seed))));
  319. tsend($jo);
  320. # make sure seed is in a bucket
  321. bucket_see($seed,$buckets);
  322. }
  323. # create the array of buckets
  324. sub bucket_init
  325. {
  326. my @ba;
  327. for my $pos (0..159)
  328. {
  329. $ba[$pos] = {};
  330. }
  331. return \@ba;
  332. }
  333. # find active writers
  334. sub bucket_near
  335. {
  336. my $end = shift;
  337. my $buckets = shift;
  338. my $bto = bix_new($end); # convert to format for the big xor for faster sorting
  339. my $bme = bix_new($ipphash);
  340. my $start = bix_sbit(bix_or($bto,$bme));
  341. $start = 0 if($start < 0 || $start > 159); # err, this needs to be handled better or something
  342. my @ret;
  343. # first check all buckets closer
  344. printf "NEAR[%d %s] ",$start,$end;
  345. my $pos = $start+1;
  346. while(--$pos)
  347. {
  348. #printf "%d/%d %s",$pos,scalar @ret,Dumper($buckets->[$pos]);
  349. push @ret,grep($lines{$_}->{"open"},keys %{$buckets->[$pos]}); # only push active writers
  350. last if(scalar @ret >= 5);
  351. }
  352. # the check all buckets further
  353. for my $pos (($start+1) .. 159)
  354. {
  355. #printf "%d/%d ",$pos,scalar @ret;
  356. push @ret,grep($lines{$_}->{"open"},keys %{$buckets->[$pos]}); # only push active writers
  357. last if(scalar @ret >= 5);
  358. }
  359. my %hashes = map {sha1_hex($_)=>$_} @ret;
  360. $hashes{$ipphash} = $ipp; # include ourselves always
  361. my @bixes = map {bix_new($_)} keys %hashes; # pre-bix the array for faster sorting
  362. my @ckeys = sort {bix_cmp(bix_or($bto,$a),bix_or($bto,$b))} @bixes; # sort by closest to the end
  363. printf("from %d writers, closest is %d\n",scalar @ckeys, bix_sbit(bix_or($bto,$ckeys[0])));
  364. @ret = map {$hashes{bix_str($_)}} splice @ckeys, 0, 5; # convert back to ip:ports, top 5
  365. return \@ret;
  366. }
  367. # see if we want to try this writer or not, and maybe prune the bucket
  368. sub bucket_see
  369. {
  370. my $writer = shift;
  371. my $buckets = shift;
  372. my $pos = bix_sbit(bix_or(bix_new($writer),bix_new($ipphash)));
  373. printf "BUCKET[%d %s]\n",$pos,$writer;
  374. return undef if($pos < 0 || $pos > 159); # err!?
  375. $buckets->[$pos]->{$writer}++;
  376. return 1; # for now we're always taking everyone, in future need to be more selective when the bucket is "full"!
  377. }
  378. sub floodwall
  379. {
  380. my $writer = shift;
  381. # first check if it's a karma setting
  382. my $karma = shift;
  383. if($karma)
  384. {
  385. $fw_karma{$writer}+=$karma;
  386. return undef;
  387. }
  388. # if karma saved, decide on own
  389. if($fw_karma{$writer})
  390. {
  391. $fw_karma{$writer}--;
  392. return undef if($fw_karma{$writer} > 0);
  393. delete $fw_karma{$writer};
  394. }
  395. # now, if we're in a new window, reset
  396. my $at = int(time);
  397. if($fw_last+5 < $at)
  398. {
  399. %fw_ips = ();
  400. $fw_last = $at;
  401. }
  402. # count packets per ip
  403. my($ip,$port) = split(":",$writer);
  404. $fw_ips{$ip}++;
  405. # if the IP is OK
  406. return undef if($fw_ips{$ip} < $fw_window*$fw_tps);
  407. # too many, FAIL
  408. printf "FLOODWALL[%s]\n",$writer;
  409. return 1;
  410. }