PageRenderTime 23ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/infobot-old/branches/sungo/infobotcode/lib/Factroid/Module/RSS.pm

#
Perl | 238 lines | 212 code | 25 blank | 1 comment | 7 complexity | 111d173df956bac091cab37f6f362dc5 MD5 | raw file
Possible License(s): LGPL-2.0
  1. package Factroid::Module::RSS;
  2. use strict;
  3. BEGIN {
  4. @Factroid::Module::RSS::ISA = qw(Factroid::Module);
  5. }
  6. use Factroid::Module;
  7. use Factroid::Message;
  8. use HTTP::Request;
  9. use HTTP::Response;
  10. use XML::RSS;
  11. use POE qw(Component::JobQueue);
  12. sub _worker_start {
  13. my ($kernel, $heap, $postback, $request) = @_[KERNEL, HEAP, ARG0, ARG1];
  14. $heap->{postback} = $postback;
  15. $kernel->post ('http', 'request', 'response', $request);
  16. }
  17. sub _worker_response {
  18. my ($kernel, $heap, $request_packet, $response_packet) = @_[KERNEL, HEAP, ARG0, ARG1];
  19. my $request = $request_packet->[0];
  20. my $response = $response_packet->[0];
  21. my $postback = $heap->{postback};
  22. if (ref $postback eq 'ARRAY') {
  23. my $session = $kernel->alias_resolve ($postback->[0]);
  24. if (defined $session) {
  25. $postback = $session->postback ($postback->[1], $request);
  26. } else {
  27. $postback = sub { 1 };
  28. }
  29. }
  30. $postback->($response);
  31. }
  32. sub _state_init {
  33. my ($kernel, $session, $heap) = @_[KERNEL, SESSION, HEAP];
  34. my $urlsref = {};
  35. POE::Component::JobQueue->spawn (
  36. Alias => 'fetchrss',
  37. WorkerLimit => 1,
  38. Worker => sub {
  39. my $meta_postback = shift;
  40. my $url = each %$urlsref;
  41. if (defined $url and $url !~ /dummy/) {
  42. my $request = new HTTP::Request ('GET', $url);
  43. my $postback = $meta_postback->($request);
  44. POE::Session->create (inline_states => {
  45. _start => \&_worker_start,
  46. response => \&_worker_response,
  47. }, args => [ $postback, $request ]);
  48. }
  49. },
  50. Active => {
  51. PollInterval => 300,
  52. AckAlias => 'FactroidRSS',
  53. AckState => 'response',
  54. });
  55. $heap->{urls} = $urlsref;
  56. }
  57. sub _push_state_check {
  58. my ($self, $kernel, $sender, $heap, $msg, $subscriber) = @_[OBJECT, KERNEL, SENDER, HEAP, ARG0, ARG1];
  59. unless ($subscriber) {
  60. $kernel->post ($sender, 'db_query_result', $msg);
  61. return;
  62. }
  63. my $re = $self->re;
  64. my ($factoid) = $msg->X =~ /$re/g;
  65. my $dbmsg = new Factroid::DBMessage (type => 'fetch', X => $factoid);
  66. $heap->{$dbmsg} = [$msg, $subscriber];
  67. # FIXME: decide how/if to handle multiple DBs
  68. $kernel->post ('FactroidDBM', $dbmsg->type, $dbmsg);
  69. }
  70. sub _push_state_dbresult {
  71. my ($kernel, $heap, $querymsg) = @_[KERNEL, HEAP, ARG0];
  72. my ($msg, $subscriber) = @{delete $heap->{$querymsg}};
  73. my $Yref = $querymsg->Y;
  74. foreach my $Y (keys %{$Yref}) {
  75. if (my ($url) = $Y =~ /<factroid (?:rss|rdf)\s*=\s*\"?([^\"\s]+)\"?>/ig) {
  76. my $subscribers = ($heap->{urls}->{$url} || []);
  77. push (@{$subscribers}, $subscriber);
  78. $heap->{urls}->{$url} = [$querymsg->X, $subscribers];
  79. }
  80. }
  81. }
  82. sub _push_state_response {
  83. my ($kernel, $heap, $request_packet, $response_packet) = @_[KERNEL, HEAP, ARG0, ARG1];
  84. my $request = $request_packet->[0];
  85. my $response = $response_packet->[0];
  86. my $url = $request->uri->as_string;
  87. my ($X, $subscribers) = @{$heap->{urls}->{$url}};
  88. my $repl = "new $X items: ";
  89. if ($response->is_success) {
  90. my (@new);
  91. my $str = $response->content;
  92. my $rss = new XML::RSS;
  93. eval { $rss->parse ($str); };
  94. unless ($@) {
  95. my $most_recent = $rss->{items}->[0]->{title};
  96. if ($most_recent eq $heap->{$url}) {
  97. return;
  98. }
  99. $heap->{$url} = $most_recent;
  100. foreach my $item (@{$rss->{items}}) {
  101. push (@new, $item);
  102. last if ($item->{title} eq $heap->{$url});
  103. }
  104. if ($#new < 2 and defined ($new[0]->{description})) {
  105. foreach my $item (@new) {
  106. my $repl = $item->{'title'} . "(" . $item->{'link'} . ") " . $item->{'description'};
  107. foreach my $subscriber (@$subscribers) {
  108. my ($alias, $channel, $sender) = $subscriber =~ /(.*)\/(.*?)\/(.+)/;
  109. $kernel->post ($alias, 'privmsg', $sender, $repl);
  110. }
  111. }
  112. return;
  113. }
  114. foreach my $item (@new) {
  115. $repl .= $item->{title} . "; ";
  116. }
  117. $repl =~ s/; $//;
  118. foreach my $subscriber (@$subscribers) {
  119. my ($alias, $channel, $sender) = $subscriber =~ /(.*)\/(.*?)\/(.+)/;
  120. $kernel->post ($alias, 'privmsg', $sender, $repl);
  121. }
  122. }
  123. }
  124. }
  125. sub _state_response {
  126. my ($kernel, $heap, $request_packet, $response_packet) = @_[KERNEL, HEAP, ARG0, ARG1];
  127. my $request = $request_packet->[0];
  128. my $response = $response_packet->[0];
  129. my ($msg, $uri) = @{$heap->{$request}};
  130. if ($response->is_success) {
  131. my $str = $response->content;
  132. my $rss = new XML::RSS;
  133. eval { $rss->parse ($str); };
  134. unless ($@) {
  135. my $repl;
  136. foreach my $item (@{$rss->{items}}) {
  137. $repl .= $item->{title} . "; ";
  138. }
  139. $repl =~ s/; $//;
  140. my $Yref = $msg->Y;
  141. foreach my $Y (keys %{$Yref}) {
  142. my $oldY = $Y;
  143. if ($Y =~ s/<factroid (?:rss|rdf)\s*=\s*\"?\Q$uri\E\">/$repl/) {
  144. $Yref->{$Y} = delete $Yref->{$oldY};
  145. }
  146. }
  147. } else {
  148. print "error: $@\n";
  149. }
  150. }
  151. #FIXME: only do this when all urls in factoid have been checked.
  152. if (--$heap->{$msg}->{count} == 0) {
  153. delete $heap->{$msg}->{count};
  154. my $sender = delete $heap->{$msg}->{sender};
  155. delete $heap->{$msg};
  156. $kernel->post ($sender, 'after_result', $msg);
  157. }
  158. }
  159. sub _state_check {
  160. my ($kernel, $sender, $heap, $msg) = @_[KERNEL, SENDER, HEAP, ARG0];
  161. my @check_uris = ();
  162. foreach my $Y (keys %{$msg->Y}) {
  163. push (@check_uris, $Y =~ /<factroid (?:rss|rdf)\s*=\s*\"?([^\"\s]+)\"?>/ig);
  164. }
  165. if (@check_uris != ()) {
  166. $heap->{$msg}->{sender} = $sender;
  167. foreach my $uri_str (@check_uris) {
  168. my $request = new HTTP::Request (GET => $uri_str);
  169. $heap->{$msg}->{count}++;
  170. $heap->{$request} = [$msg, $uri_str];
  171. $kernel->post ('http', 'request', 'response', $request)
  172. }
  173. }
  174. unless (defined ($heap->{$msg}->{count})) {
  175. $kernel->post ($sender, 'after_result', $msg);
  176. }
  177. }
  178. sub _push_state_end {
  179. my ($self, $kernel) = @_[OBJECT, KERNEL];
  180. shift;
  181. $kernel->post (fetchrss => 'ZOMBIE');
  182. $self->SUPER::_state_end (@_);
  183. }
  184. sub _state_stop {
  185. }
  186. sub new {
  187. my $class = shift;
  188. my $map = {
  189. re => {
  190. type => 'volatile',
  191. doc => 'the regular expression that has to match to start this module',
  192. value => qr/^RSS subscribe\s+(\S+)/i,
  193. },
  194. };
  195. my $self = $class->SUPER::new ('BOTH', $map, {
  196. init => '_state_init',
  197. after => '_state_check',
  198. response => '_state_response',
  199. push_response => '_push_state_response',
  200. db_query_result => '_push_state_dbresult',
  201. before => '_push_state_check',
  202. _stop => '_state_stop', }, @_);
  203. return $self;
  204. }
  205. 1;