/infobot-old/branches/sungo/infobotcode/lib/Factroid/Module/RSS.pm
Perl | 238 lines | 212 code | 25 blank | 1 comment | 7 complexity | 111d173df956bac091cab37f6f362dc5 MD5 | raw file
Possible License(s): LGPL-2.0
- package Factroid::Module::RSS;
- use strict;
- BEGIN {
- @Factroid::Module::RSS::ISA = qw(Factroid::Module);
- }
- use Factroid::Module;
- use Factroid::Message;
- use HTTP::Request;
- use HTTP::Response;
- use XML::RSS;
- use POE qw(Component::JobQueue);
- sub _worker_start {
- my ($kernel, $heap, $postback, $request) = @_[KERNEL, HEAP, ARG0, ARG1];
- $heap->{postback} = $postback;
- $kernel->post ('http', 'request', 'response', $request);
- }
- sub _worker_response {
- my ($kernel, $heap, $request_packet, $response_packet) = @_[KERNEL, HEAP, ARG0, ARG1];
- my $request = $request_packet->[0];
- my $response = $response_packet->[0];
- my $postback = $heap->{postback};
- if (ref $postback eq 'ARRAY') {
- my $session = $kernel->alias_resolve ($postback->[0]);
- if (defined $session) {
- $postback = $session->postback ($postback->[1], $request);
- } else {
- $postback = sub { 1 };
- }
- }
- $postback->($response);
- }
- sub _state_init {
- my ($kernel, $session, $heap) = @_[KERNEL, SESSION, HEAP];
- my $urlsref = {};
- POE::Component::JobQueue->spawn (
- Alias => 'fetchrss',
- WorkerLimit => 1,
- Worker => sub {
- my $meta_postback = shift;
- my $url = each %$urlsref;
- if (defined $url and $url !~ /dummy/) {
- my $request = new HTTP::Request ('GET', $url);
- my $postback = $meta_postback->($request);
- POE::Session->create (inline_states => {
- _start => \&_worker_start,
- response => \&_worker_response,
- }, args => [ $postback, $request ]);
- }
- },
- Active => {
- PollInterval => 300,
- AckAlias => 'FactroidRSS',
- AckState => 'response',
- });
- $heap->{urls} = $urlsref;
- }
- sub _push_state_check {
- my ($self, $kernel, $sender, $heap, $msg, $subscriber) = @_[OBJECT, KERNEL, SENDER, HEAP, ARG0, ARG1];
- unless ($subscriber) {
- $kernel->post ($sender, 'db_query_result', $msg);
- return;
- }
- my $re = $self->re;
- my ($factoid) = $msg->X =~ /$re/g;
- my $dbmsg = new Factroid::DBMessage (type => 'fetch', X => $factoid);
- $heap->{$dbmsg} = [$msg, $subscriber];
- # FIXME: decide how/if to handle multiple DBs
- $kernel->post ('FactroidDBM', $dbmsg->type, $dbmsg);
- }
- sub _push_state_dbresult {
- my ($kernel, $heap, $querymsg) = @_[KERNEL, HEAP, ARG0];
- my ($msg, $subscriber) = @{delete $heap->{$querymsg}};
- my $Yref = $querymsg->Y;
- foreach my $Y (keys %{$Yref}) {
- if (my ($url) = $Y =~ /<factroid (?:rss|rdf)\s*=\s*\"?([^\"\s]+)\"?>/ig) {
- my $subscribers = ($heap->{urls}->{$url} || []);
- push (@{$subscribers}, $subscriber);
- $heap->{urls}->{$url} = [$querymsg->X, $subscribers];
- }
- }
- }
- sub _push_state_response {
- my ($kernel, $heap, $request_packet, $response_packet) = @_[KERNEL, HEAP, ARG0, ARG1];
- my $request = $request_packet->[0];
- my $response = $response_packet->[0];
- my $url = $request->uri->as_string;
- my ($X, $subscribers) = @{$heap->{urls}->{$url}};
- my $repl = "new $X items: ";
- if ($response->is_success) {
- my (@new);
- my $str = $response->content;
- my $rss = new XML::RSS;
- eval { $rss->parse ($str); };
- unless ($@) {
- my $most_recent = $rss->{items}->[0]->{title};
- if ($most_recent eq $heap->{$url}) {
- return;
- }
- $heap->{$url} = $most_recent;
- foreach my $item (@{$rss->{items}}) {
- push (@new, $item);
- last if ($item->{title} eq $heap->{$url});
- }
- if ($#new < 2 and defined ($new[0]->{description})) {
- foreach my $item (@new) {
- my $repl = $item->{'title'} . "(" . $item->{'link'} . ") " . $item->{'description'};
- foreach my $subscriber (@$subscribers) {
- my ($alias, $channel, $sender) = $subscriber =~ /(.*)\/(.*?)\/(.+)/;
- $kernel->post ($alias, 'privmsg', $sender, $repl);
- }
- }
- return;
- }
- foreach my $item (@new) {
- $repl .= $item->{title} . "; ";
- }
- $repl =~ s/; $//;
- foreach my $subscriber (@$subscribers) {
- my ($alias, $channel, $sender) = $subscriber =~ /(.*)\/(.*?)\/(.+)/;
- $kernel->post ($alias, 'privmsg', $sender, $repl);
- }
- }
- }
- }
- sub _state_response {
- my ($kernel, $heap, $request_packet, $response_packet) = @_[KERNEL, HEAP, ARG0, ARG1];
- my $request = $request_packet->[0];
- my $response = $response_packet->[0];
- my ($msg, $uri) = @{$heap->{$request}};
- if ($response->is_success) {
- my $str = $response->content;
- my $rss = new XML::RSS;
- eval { $rss->parse ($str); };
- unless ($@) {
- my $repl;
- foreach my $item (@{$rss->{items}}) {
- $repl .= $item->{title} . "; ";
- }
- $repl =~ s/; $//;
- my $Yref = $msg->Y;
- foreach my $Y (keys %{$Yref}) {
- my $oldY = $Y;
- if ($Y =~ s/<factroid (?:rss|rdf)\s*=\s*\"?\Q$uri\E\">/$repl/) {
- $Yref->{$Y} = delete $Yref->{$oldY};
- }
- }
- } else {
- print "error: $@\n";
- }
- }
- #FIXME: only do this when all urls in factoid have been checked.
- if (--$heap->{$msg}->{count} == 0) {
- delete $heap->{$msg}->{count};
- my $sender = delete $heap->{$msg}->{sender};
- delete $heap->{$msg};
- $kernel->post ($sender, 'after_result', $msg);
- }
- }
- sub _state_check {
- my ($kernel, $sender, $heap, $msg) = @_[KERNEL, SENDER, HEAP, ARG0];
- my @check_uris = ();
- foreach my $Y (keys %{$msg->Y}) {
- push (@check_uris, $Y =~ /<factroid (?:rss|rdf)\s*=\s*\"?([^\"\s]+)\"?>/ig);
- }
- if (@check_uris != ()) {
- $heap->{$msg}->{sender} = $sender;
- foreach my $uri_str (@check_uris) {
- my $request = new HTTP::Request (GET => $uri_str);
- $heap->{$msg}->{count}++;
- $heap->{$request} = [$msg, $uri_str];
- $kernel->post ('http', 'request', 'response', $request)
- }
- }
- unless (defined ($heap->{$msg}->{count})) {
- $kernel->post ($sender, 'after_result', $msg);
- }
- }
- sub _push_state_end {
- my ($self, $kernel) = @_[OBJECT, KERNEL];
- shift;
- $kernel->post (fetchrss => 'ZOMBIE');
- $self->SUPER::_state_end (@_);
- }
- sub _state_stop {
- }
- sub new {
- my $class = shift;
- my $map = {
- re => {
- type => 'volatile',
- doc => 'the regular expression that has to match to start this module',
- value => qr/^RSS subscribe\s+(\S+)/i,
- },
- };
- my $self = $class->SUPER::new ('BOTH', $map, {
- init => '_state_init',
- after => '_state_check',
- response => '_state_response',
- push_response => '_push_state_response',
- db_query_result => '_push_state_dbresult',
- before => '_push_state_check',
- _stop => '_state_stop', }, @_);
- return $self;
- }
- 1;