PageRenderTime 595ms CodeModel.GetById 93ms app.highlight 472ms RepoModel.GetById 1ms app.codeStats 1ms

/tags/3.2/lib/MediaWiki/Bot.pm

http://perlwikipedia.googlecode.com/
Perl | 2157 lines | 1832 code | 304 blank | 21 comment | 159 complexity | 3130e4109f8f2d2523d26e9963bc793e MD5 | raw file
Possible License(s): GPL-3.0

Large files files are truncated, but you can click here to view the full file

   1package MediaWiki::Bot;
   2# ABSTRACT: a MediaWiki bot framework written in Perl
   3
   4use strict;
   5use warnings;
   6use WWW::Mechanize;
   7use HTML::Entities;
   8use URI::Escape;
   9use XML::Simple;
  10use Carp;
  11use URI::Escape qw(uri_escape_utf8);
  12use Digest::MD5 qw(md5_hex);
  13use Encode qw(encode_utf8);
  14use MediaWiki::API;
  15
  16use Module::Pluggable search_path => [qw(MediaWiki::Bot::Plugin)], 'require' => 1;
  17foreach my $plugin (__PACKAGE__->plugins) {
  18
  19    #print "Found plugin $plugin\n";
  20    $plugin->import();
  21}
  22
  23our $VERSION = '3.2.0';
  24
  25=head1 SYNOPSIS
  26
  27    use MediaWiki::Bot;
  28
  29    my $bot = MediaWiki::Bot->new({
  30        useragent   => 'MediaWiki::Bot/3.1.6 (User:Mike.lifeguard)',
  31        assert      => 'bot',
  32        protocol    => 'https',
  33        host        => 'secure.wikimedia.org',
  34        path        => 'wikipedia/meta/w',
  35        login_data  => { username => "Mike's bot account", password => "password" },
  36    });
  37
  38    my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard");
  39    print "Reverting to $revid\n" if defined($revid);
  40    $bot->revert('User:Mike.lifeguard', $revid, 'rvv');
  41
  42=head1 DESCRIPTION
  43
  44MediaWiki::Bot is a framework that can be used to write bots which interface
  45with the MediaWiki API (L<http://en.wikipedia.org/w/api.php>).
  46
  47=head1 METHODS
  48
  49=head2 new($options_hashref)
  50
  51Calling MediaWiki::Bot->new() will create a new MediaWiki::Bot object.
  52
  53=over 4
  54
  55=item *
  56agent sets a custom useragent
  57
  58=item *
  59assert sets a parameter for the AssertEdit extension (commonly 'bot'). Refer to L<http://mediawiki.org/wiki/Extension:AssertEdit>.
  60
  61=item *
  62operator allows the bot to send you a message when it fails an assert, and will be integrated into the default useragent (which may not be used if you set agent yourself). The message will tell you that $useragent is logged out, so use a descriptive one if you set it.
  63
  64=item *
  65maxlag allows you to set the maxlag parameter (default is the recommended 5s). Please refer to the MediaWiki documentation prior to changing this from the default.
  66
  67=item *
  68protocol allows you to specify 'http' or 'https' (default is 'http'). This is commonly used with the domain and path settings below.
  69
  70=item *
  71host sets the domain name of the wiki to connect to.
  72
  73=item *
  74path sets the path to api.php (with no leading or trailing slash).
  75
  76=item *
  77login_data is a hashref of credentials to pass to login(). See that section for a description.
  78
  79=item *
  80debug is whether to provide debug output. 1 provides only error messages; 2 provides further detail on internal operations.
  81
  82=back
  83
  84For example:
  85
  86    my $bot = MediaWiki::Bot->new({
  87        useragent   => 'MediaWiki::Bot/3.1.6 (User:Mike.lifeguard)',
  88        assert      => 'bot',
  89        protocol    => 'https',
  90        host        => 'secure.wikimedia.org',
  91        path        => 'wikipedia/meta/w',
  92        login_data  => { username => "Mike's bot account", password => "password" },
  93    });
  94
  95For backward compatibility, you can specify up to three parameters:
  96
  97    my $bot = MediaWiki::Bot->new('MediaWiki::Bot 2.3.1 (User:Mike.lifeguard)', $assert, $operator);
  98
  99This deprecated form will never do auto-login or autoconfiguration.
 100
 101=cut
 102
 103sub new {
 104    my $package = shift;
 105    my $agent;
 106    my $assert;
 107    my $operator;
 108    my $maxlag;
 109    my $protocol;
 110    my $host;
 111    my $path;
 112    my $login_data;
 113    my $debug;
 114
 115    if (ref $_[0] eq 'HASH') {
 116        $agent      = $_[0]->{'agent'};
 117        $assert     = $_[0]->{'assert'};
 118        $operator   = $_[0]->{'operator'};
 119        $maxlag     = $_[0]->{'maxlag'};
 120        $protocol   = $_[0]->{'protocol'};
 121        $host       = $_[0]->{'host'};
 122        $path       = $_[0]->{'path'};
 123        $login_data = $_[0]->{'login_data'};
 124        $debug      = $_[0]->{'debug'};
 125    }
 126    else {
 127        $agent    = shift;
 128        $assert   = shift;
 129        $operator = shift;
 130        $maxlag   = shift;
 131        $protocol = shift;
 132        $host     = shift;
 133        $path     = shift;
 134        $debug    = shift;
 135    }
 136
 137    $assert   =~ s/[&?]assert=// if $assert; # Strip out param part, leaving just the value
 138    $operator =~ s/^User://i     if $operator;
 139
 140    # Set defaults
 141    unless ($agent) {
 142        $agent  = "MediaWiki::Bot/$VERSION";
 143        $agent .= " (User:$operator)" if $operator;
 144    }
 145
 146    my $self = bless({}, $package);
 147    $self->{mech} = WWW::Mechanize->new(
 148        cookie_jar  => {},
 149        onerror     => \&Carp::carp,
 150        stack_depth => 1
 151    );
 152    $self->{mech}->agent($agent);
 153    $self->{errstr}   = '';
 154    $self->{assert}   = $assert;
 155    $self->{operator} = $operator;
 156    $self->{'debug'}  = $debug || 0;
 157    $self->{api}      = MediaWiki::API->new();
 158    $self->{api}->{ua}->agent($agent);
 159
 160    # Set wiki (handles setting $self->{host} etc)
 161    $self->set_wiki({
 162            protocol => $protocol,
 163            host     => $host,
 164            path     => $path,
 165    });
 166
 167    $self->{api}->{config}->{max_lag}         = $maxlag || 5;
 168    $self->{api}->{config}->{max_lag_delay}   = 1;
 169    $self->{api}->{config}->{retries}         = 5;
 170    $self->{api}->{config}->{max_lag_retries} = -1;
 171    $self->{api}->{config}->{retry_delay}     = 30;
 172
 173    # Log-in, and maybe autoconfigure
 174    if ($login_data) {
 175        my $success = $self->login($login_data);
 176        if ($success) {
 177            return $self;
 178        }
 179        else {
 180            carp "Couldn't log in with supplied settings" if $self->{'debug'};
 181            return;
 182        }
 183    }
 184
 185    return $self;
 186}
 187
 188=head2 set_wiki($options)
 189
 190Set what wiki to use. Host is the domain name; path is the path before api.php (usually 'w'); protocol is either 'http' or 'https'. For example:
 191
 192    $bot->set_wiki(
 193        protocol    => 'https',
 194        host        => 'secure.wikimedia.org',
 195        path        => 'wikipedia/meta/w',
 196    );
 197
 198For backward compatibility, you can specify up to two parameters in this deprecated form:
 199
 200    $bot->set_wiki($host, $path);
 201
 202If you don't set any parameter, it's previous value is used. If it has never been set, the default settings are 'http', 'en.wikipedia.org' and 'w'.
 203
 204=cut
 205
 206sub set_wiki {
 207    my $self = shift;
 208    my $host;
 209    my $path;
 210    my $protocol;
 211
 212    if (ref $_[0] eq 'HASH') {
 213        $host     = $_[0]->{'host'};
 214        $path     = $_[0]->{'path'};
 215        $protocol = $_[0]->{'protocol'};
 216    }
 217    else {
 218        $host = shift;
 219        $path = shift;
 220    }
 221
 222    # Set defaults
 223    $protocol = $self->{'protocol'} || 'http'             unless defined($protocol);
 224    $host     = $self->{'host'}     || 'en.wikipedia.org' unless defined($host);
 225    $path     = $self->{'path'}     || 'w'                unless defined($path);
 226
 227    # Clean up the parts we will build a URL with
 228    $protocol =~ s,://$,,;
 229    if ($host =~ m,^(http|https)(://)?, && !$protocol) {
 230        $protocol = $1;
 231    }
 232    $host =~ s,^https?://,,;
 233    $host =~ s,/$,,;
 234    $path =~ s,/$,,;
 235
 236    # Invalidate wiki-specific cached data
 237    if (   ((defined($self->{'host'})) and ($self->{'host'} ne $host))
 238        or ((defined($self->{'path'})) and ($self->{'path'} ne $path))
 239        or ((defined($self->{'protocol'})) and ($self->{'protocol'} ne $protocol))
 240    ) {
 241        delete $self->{'ns_data'} if $self->{'ns_data'};
 242    }
 243
 244    $self->{protocol} = $protocol;
 245    $self->{host}     = $host;
 246    $self->{path}     = $path;
 247
 248    $self->{api}->{config}->{api_url} = $path
 249        ? "$protocol://$host/$path/api.php"
 250        : "$protocol://$host/api.php"; # $path is '', so don't use http://domain.com//api.php
 251    warn "Wiki set to " . $self->{api}->{config}{api_url} . "\n" if $self->{'debug'} > 1;
 252
 253    return 1;
 254}
 255
 256=head2 login($login_hashref)
 257
 258Logs the use $username in, optionally using $password. First, an attempt will be made to use cookies to log in. If this fails, an attempt will be made to use the password provided to log in, if any. If the login was successful, returns true; false otherwise.
 259
 260    $bot->login({
 261        username => $username,
 262        password => $password,
 263    }) or die "Login failed";
 264
 265Once logged in, attempt to do some simple auto-configuration. At present, this consists of:
 266
 267=over 4
 268
 269=item *
 270
 271Warning if the account doesn't have the bot flag, and isn't a sysop account.
 272
 273=item *
 274
 275Setting the use of apihighlimits if the account has that userright.
 276
 277=item *
 278
 279Setting an appropriate default assert.
 280
 281=back
 282
 283You can skip this autoconfiguration by passing C<autoconfig =E<gt> 0>
 284
 285=head3 Single User Login
 286
 287On WMF wikis, C<do_sul> specifies whether to log in on all projects. The default is false. But even when false, you still get a CentralAuth cookie for, and are thus logged in on, all languages of a given domain (*.wikipedia.org, for example). When set, a login is done on each WMF domain so you are logged in on all ~800 content wikis. Since C<*.wikimedia.org> is not possible, we explicitly include meta, commons, incubator, and wikispecies. When C<do_sul> is set, the return is the number of domains that login was successful for. This allows callers to do the following:
 288
 289    $bot->login({
 290        username    => $username,
 291        password    => $password,
 292        do_sul      => 1,
 293    }) or die "SUL failed";
 294
 295For backward compatibility, you can call this as
 296
 297    $bot->login($username, $password);
 298
 299This deprecated form will never do autoconfiguration or SUL login.
 300
 301If you need to supply basic auth credentials, pass a hashref of data as described by L<LWP::UserAgent>:
 302
 303    $bot->login({
 304        username    => $username,
 305        password    => $password,
 306        basic_auth  => {    netloc  => "private.wiki.com:80",
 307                            realm   => "Authentication Realm",
 308                            uname   => "Basic auth username",
 309                            pass    => "password",
 310                        }
 311    }) or die "Couldn't log in";
 312
 313=cut
 314
 315sub login {
 316    my $self = shift;
 317    my $username;
 318    my $password;
 319    my $lgdomain;
 320    my $autoconfig;
 321    my $basic_auth;
 322    my $do_sul;
 323    if (ref $_[0] eq 'HASH') {
 324        $username   = $_[0]->{'username'};
 325        $password   = $_[0]->{'password'};
 326        $autoconfig = defined($_[0]->{'autoconfig'}) ? $_[0]->{'autoconfig'} : 1;
 327        $basic_auth = $_[0]->{'basic_auth'};
 328        $do_sul     = $_[0]->{'do_sul'} || 0;
 329        $lgdomain   = $_[0]->{'lgdomain'};
 330    }
 331    else {
 332        $username   = shift;
 333        $password   = shift;
 334        $autoconfig = 0;
 335        $do_sul     = 0;
 336    }
 337    $self->{'username'} = $username;    # Remember who we are
 338
 339    # Handle basic auth first, if needed
 340    if ($basic_auth) {
 341        warn "Applying basic auth credentials" if $self->{'debug'} > 1;
 342        $self->{api}->{ua}->credentials(
 343            $basic_auth->{'netloc'},
 344            $basic_auth->{'realm'},
 345            $basic_auth->{'uname'},
 346            $basic_auth->{'pass'}
 347        );
 348    }
 349    $do_sul = 0 if (
 350        ($self->{'protocol'} eq 'https') and
 351        ($self->{'host'} eq 'secure.wikimedia.org') );
 352
 353    if ($do_sul) {
 354        my $debug    = $self->{'debug'};   # Remember this for later
 355        my $host     = $self->{'host'};
 356        my $path     = $self->{'path'};
 357        my $protocol = $self->{'protocol'};
 358
 359        $self->{'debug'} = 0;           # Turn off debugging for these internal calls
 360        my @logins;                     # Keep track of our successes
 361        my @WMF_projects = qw(
 362            en.wikipedia.org
 363            en.wiktionary.org
 364            en.wikibooks.org
 365            en.wikinews.org
 366            en.wikiquote.org
 367            en.wikisource.org
 368            en.wikiversity.org
 369            meta.wikimedia.org
 370            commons.wikimedia.org
 371            species.wikimedia.org
 372            incubator.wikimedia.org
 373        );
 374
 375        SUL: foreach my $project (@WMF_projects) {
 376            print STDERR "Logging in on $project..." if $debug > 1;
 377            $self->set_wiki({
 378                host    => $project,
 379            });
 380            my $success = $self->login({
 381                username    => $username,
 382                password    => $password,
 383                lgdomain    => $lgdomain,
 384                do_sul      => 0,
 385                autoconfig  => 0,
 386            });
 387            warn ($success ? " OK\n" : " FAILED\n") if $debug > 1;
 388            push(@logins, $success);
 389        }
 390        $self->set_wiki({           # Switch back to original wiki
 391            protocol => $protocol,
 392            host     => $host,
 393            path     => $path,
 394        });
 395
 396        my $sum = 0;
 397        $sum += $_ for @logins;
 398        my $total = scalar @WMF_projects;
 399        warn "$sum/$total logins succeeded\n" if $debug > 1;
 400        $self->{'debug'} = $debug; # Reset debug to it's old value
 401
 402        return $sum;
 403    }
 404
 405    my $cookies = ".mediawiki-bot-$username-cookies";
 406    if (-r $cookies) {
 407        $self->{mech}->{cookie_jar}->load($cookies);
 408        $self->{mech}->{cookie_jar}->{ignore_discard} = 1;
 409        $self->{api}->{ua}->{cookie_jar}->load($cookies);
 410        $self->{api}->{ua}->{cookie_jar}->{ignore_discard} = 1;
 411
 412        my $logged_in = $self->_is_loggedin();
 413        if ($logged_in) {
 414            $self->_do_autoconfig() if $autoconfig;
 415            warn "Logged in successfully with cookies" if $self->{'debug'} > 1;
 416            return 1; # If we're already logged in, nothing more is needed
 417        }
 418    }
 419
 420    unless ($password) {
 421        carp "No login cookies available, and no password to continue with authentication" if $self->{'debug'};
 422        return 0;
 423    }
 424
 425    my $res = $self->{api}->api({
 426        action      => 'login',
 427        lgname      => $username,
 428        lgpassword  => $password,
 429        lgdomain    => $lgdomain
 430    }) or return $self->_handle_api_error();
 431    $self->{api}->{ua}->{cookie_jar}->extract_cookies($self->{api}->{response});
 432    $self->{api}->{ua}->{cookie_jar}->save($cookies) if (-w($cookies) or -w('.'));
 433
 434    if ($res->{'login'}->{'result'} eq 'NeedToken') {
 435        my $token = $res->{'login'}->{'token'};
 436        $res = $self->{api}->api({
 437            action      => 'login',
 438            lgname      => $username,
 439            lgpassword  => $password,
 440            lgdomain    => $lgdomain,
 441            lgtoken     => $token,
 442        }) or return $self->_handle_api_error();
 443
 444        $self->{api}->{ua}->{cookie_jar}->extract_cookies($self->{api}->{response});
 445        $self->{api}->{ua}->{cookie_jar}->save($cookies) if (-w($cookies) or -w('.'));
 446    }
 447
 448    if ($res->{'login'}->{'result'} eq 'Success') {
 449        if ($res->{'login'}->{'lgusername'} eq $self->{'username'}) {
 450            $self->_do_autoconfig() if $autoconfig;
 451            warn "Logged in successfully with password" if $self->{'debug'} > 1;
 452        }
 453    }
 454
 455    return (
 456        (defined($res->{'login'}->{'lgusername'})) and
 457        (defined($res->{'login'}->{'result'})) and
 458        ($res->{'login'}->{'lgusername'} eq $self->{'username'}) and
 459        ($res->{'login'}->{'result'} eq 'Success')
 460    );
 461}
 462
 463=head2 set_highlimits($flag)
 464
 465Tells MediaWiki::Bot to start/stop using APIHighLimits for certain queries.
 466
 467    $bot->set_highlimits(1);
 468
 469=cut
 470
 471sub set_highlimits {
 472    my $self       = shift;
 473    my $highlimits = defined($_[0]) ? shift : 1;
 474
 475    $self->{highlimits} = $highlimits;
 476    return 1;
 477}
 478
 479=head2 logout()
 480
 481The logout procedure deletes the login tokens and other browser cookies.
 482
 483    $bot->logout();
 484
 485=cut
 486
 487sub logout {
 488    my $self = shift;
 489
 490    my $hash = {
 491        action => 'logout',
 492    };
 493    $self->{api}->api($hash);
 494    return 1;
 495}
 496
 497=head2 edit($options_hashref)
 498
 499Puts text on a page. If provided, use a specified edit summary, mark the edit as minor, as a non-bot edit, or add an assertion. Set section to edit a single section instead of the whole page. An MD5 hash is sent to guard against data corruption while in transit.
 500
 501    my $text = $bot->get_text('My page');
 502    $text .= "\n\n* More text\n";
 503    $bot->edit({
 504        page    => 'My page',
 505        text    => $text,
 506        summary => 'Adding new content',
 507        section => 'new',
 508    });
 509
 510You can also call this using the deprecated form:
 511
 512    $bot->edit($page, $text, $summary, $is_minor, $assert, $markasbot);
 513
 514=cut
 515
 516sub edit {
 517    my $self = shift;
 518    my $page;
 519    my $text;
 520    my $summary;
 521    my $is_minor;
 522    my $assert;
 523    my $markasbot;
 524    my $section;
 525
 526    if (ref $_[0] eq 'HASH') {
 527        $page      = $_[0]->{'page'};
 528        $text      = $_[0]->{'text'};
 529        $summary   = $_[0]->{'summary'};
 530        $is_minor  = $_[0]->{'is_minor'};
 531        $assert    = $_[0]->{'assert'};
 532        $markasbot = $_[0]->{'markasbot'};
 533        $section   = $_[0]->{'section'};
 534    }
 535    else {
 536        $page      = shift;
 537        $text      = shift;
 538        $summary   = shift;
 539        $is_minor  = shift;
 540        $assert    = shift;
 541        $markasbot = shift;
 542        $section   = shift;
 543    }
 544
 545    # Set defaults
 546    $summary = 'BOT: Changing page text' unless $summary;
 547    if ($assert) {
 548        $assert =~ s/^[&?]assert=//;
 549    }
 550    else {
 551        $assert = $self->{'assert'};
 552    }
 553    $is_minor  = 1 unless defined($is_minor);
 554    $markasbot = 1 unless defined($markasbot);
 555
 556    my ($edittoken, $lastedit, $tokentime) = $self->_get_edittoken($page);
 557    return $self->_handle_api_error() unless $edittoken;
 558    my $hash = {
 559        action         => 'edit',
 560        title          => $page,
 561        token          => $edittoken,
 562        text           => $text,
 563        md5            => md5_hex(encode_utf8($text)),    # Guard against data corruption
 564                                                          # Pass only bytes to md5_hex()
 565        summary        => $summary,
 566        basetimestamp  => $lastedit,                      # Guard against edit conflicts
 567        starttimestamp => $tokentime,                     # Guard against the page being deleted/moved
 568        bot            => $markasbot,
 569        assert         => $assert,
 570        minor          => $is_minor,
 571    };
 572    $hash->{'section'} = $section if defined($section);
 573
 574    my $res = $self->{api}->api($hash); # Check if MediaWiki::API::edit() is good enough
 575    return $self->_handle_api_error() unless $res;
 576    if ($res->{edit}->{result} && $res->{edit}->{result} eq 'Failure') {
 577        if ($self->{mech}->{agent}) {
 578            carp 'Assertion failed as ' . $self->{mech}->{agent} if $self->{'debug'};
 579            if ($self->{operator}) {
 580                my $optalk = $self->get_text('User talk:' . $self->{operator});
 581                if (defined($optalk)) {
 582                    print "Sending warning!\n";
 583                    $self->edit(
 584                        page => "User talk:$self->{operator}",
 585                        text => $optalk
 586                            . "\n\n==Error with "
 587                            . $self->{mech}->{agent} . "==\n"
 588                            . $self->{mech}->{agent}
 589                            . ' needs to be logged in! ~~~~',
 590                        summary  => 'bot issue',
 591                        is_minor => 0,
 592                        assert   => ''
 593                    );
 594                }
 595            }
 596            return;
 597        }
 598        else {
 599            carp 'Assertion failed' if $self->{'debug'};
 600        }
 601    }
 602    return $res;
 603}
 604
 605=head2 move($from, $to, $reason, $options_hashref)
 606
 607This moves a page from $from to $to. If you wish to specify more options (like whether to suppress creation of a redirect), use $options_hashref.
 608
 609=over 4
 610
 611=item *
 612movetalk specifies whether to attempt to the talk page.
 613
 614=item *
 615noredirect specifies whether to suppress creation of a redirect.
 616
 617=item *
 618movesubpages specifies whether to move subpages, if applicable.
 619
 620=item *
 621watch and unwatch add or remove the page and the redirect from your watchlist.
 622
 623=item *
 624ignorewarnings ignores warnings.
 625
 626=back
 627
 628    my @pages = ("Humor", "Rumor");
 629    foreach my $page (@pages) {
 630        my $to = $page;
 631        $to =~ s/or$/our/;
 632        $bot->move($page, $to, "silly 'merricans");
 633    }
 634
 635=cut
 636
 637sub move {
 638    my $self   = shift;
 639    my $from   = shift;
 640    my $to     = shift;
 641    my $reason = shift;
 642    my $opts   = shift;
 643
 644    my $hash = {
 645        action => 'move',
 646        from   => $from,
 647        to     => $to,
 648        reason => $reason,
 649    };
 650    $hash->{'movetalk'}   = $opts->{'movetalk'}   if defined($opts->{'movetalk'});
 651    $hash->{'noredirect'} = $opts->{'noredirect'} if defined($opts->{'noredirect'});
 652
 653    my $res = $self->{api}->edit($hash);
 654    return $self->_handle_api_error() unless $res;
 655    return $res; # should we return something more useful?
 656}
 657
 658=head2 get_history($pagename[,$limit])
 659
 660Returns an array containing the history of the specified page, with $limit number of revisions. The array structure contains 'revid', 'user', 'comment', 'timestamp_date', and 'timestamp_time'.
 661
 662=cut
 663
 664sub get_history {
 665    my $self      = shift;
 666    my $pagename  = shift;
 667    my $limit     = shift || 5;
 668    my $rvstartid = shift || '';
 669    my $direction = shift;
 670
 671    my @return;
 672    my @revisions;
 673
 674    my $hash = {
 675        action  => 'query',
 676        prop    => 'revisions',
 677        titles  => $pagename,
 678        rvprop  => 'ids|timestamp|user|comment',
 679        rvlimit => $limit
 680    };
 681
 682    $hash->{rvstartid} = $rvstartid if ($rvstartid);
 683    $hash->{direction} = $direction if ($direction);
 684
 685    my $res = $self->{api}->api($hash);
 686    return $self->_handle_api_error() unless $res;
 687    my ($id) = keys %{ $res->{query}->{pages} };
 688    my $array = $res->{query}->{pages}->{$id}->{revisions};
 689
 690    foreach my $hash (@{$array}) {
 691        my $revid = $hash->{revid};
 692        my $user  = $hash->{user};
 693        my ($timestamp_date, $timestamp_time) = split(/T/, $hash->{timestamp});
 694        $timestamp_time =~ s/Z$//;
 695        my $comment = $hash->{comment};
 696        push(
 697            @return,
 698            {
 699                revid          => $revid,
 700                user           => $user,
 701                timestamp_date => $timestamp_date,
 702                timestamp_time => $timestamp_time,
 703                comment        => $comment,
 704            });
 705    }
 706    return @return;
 707}
 708
 709=head2 get_text($pagename,[$revid,$section_number])
 710
 711Returns an the wikitext of the specified page. If $revid is defined, it will return the text of that revision; if $section_number is defined, it will return the text of that section. A blank page will return wikitext of "" (which evaluates to false in Perl, but is defined); a nonexistent page will return undef (which also evaluates to false in Perl, but is obviously undefined). You can distinguish between blank and nonexistent by using defined():
 712
 713    my $wikitext = $bot->get_text('Page title');
 714    print "Wikitext: $wikitext\n" if defined $wikitext;
 715
 716=cut
 717
 718sub get_text {
 719    my $self     = shift;
 720    my $pagename = shift;
 721    my $revid    = shift;
 722    my $section  = shift;
 723
 724    my $hash = {
 725        action => 'query',
 726        titles => $pagename,
 727        prop   => 'revisions',
 728        rvprop => 'content',
 729    };
 730    $hash->{rvstartid} = $revid   if ($revid);
 731    $hash->{rvsection} = $section if ($section);
 732
 733    my $res = $self->{api}->api($hash);
 734    return $self->_handle_api_error() unless $res;
 735    my ($id, $data) = %{ $res->{query}->{pages} };
 736    if ($id == -1) {    # Page doesn't exist
 737        return;
 738    }
 739    else {              # Page exists
 740        my $wikitext = $data->{revisions}[0]->{'*'};
 741        return $wikitext;
 742    }
 743}
 744
 745=head2 get_id($pagename)
 746
 747Returns the id of the specified page. Returns undef if page does not exist.
 748
 749    my $pageid = $bot->get_id("Main Page");
 750    croak "Page doesn't exist\n" if !defined($pageid);
 751
 752=cut
 753
 754sub get_id {
 755    my $self     = shift;
 756    my $pagename = shift;
 757
 758    my $hash = {
 759        action => 'query',
 760        titles => $pagename,
 761    };
 762
 763    my $res = $self->{api}->api($hash);
 764    return $self->_handle_api_error() unless $res;
 765    my ($id, $data) = %{ $res->{query}->{pages} };
 766    if ($id == -1) {
 767        return;
 768    }
 769    else {
 770        return $id;
 771    }
 772}
 773
 774=head2 get_pages(\@pages)
 775
 776Returns the text of the specified pages in a hashref. Content of undef means page does not exist. Also handles redirects or article names that use namespace aliases.
 777
 778    my @pages = ('Page 1', 'Page 2', 'Page 3');
 779    my $thing = $bot->get_pages(\@pages);
 780    foreach my $page (keys %$thing) {
 781        my $text = $thing->{$page};
 782        print "$text\n" if defined($text);
 783    }
 784
 785=cut
 786
 787sub get_pages {
 788    my $self  = shift;
 789    my @pages = (ref $_[0] eq 'ARRAY') ? @{$_[0]} : @_;
 790    my %return;
 791
 792    my $hash = {
 793        action => 'query',
 794        titles => join('|', @pages),
 795        prop   => 'revisions',
 796        rvprop => 'content',
 797    };
 798
 799    my $diff;    # Used to track problematic article names
 800    map { $diff->{$_} = 1; } @pages;
 801
 802    my $res = $self->{api}->api($hash);
 803    return $self->_handle_api_error() unless $res;
 804
 805    foreach my $id (keys %{ $res->{query}->{pages} }) {
 806        my $page = $res->{'query'}->{'pages'}->{$id};
 807        if ($diff->{ $page->{'title'} }) {
 808            $diff->{ $page->{'title'} }++;
 809        }
 810        else {
 811            next;
 812        }
 813
 814        if (defined($page->{'missing'})) {
 815            $return{ $page->{'title'} } = undef;
 816            next;
 817        }
 818        if (defined($page->{'revisions'})) {
 819            my $revisions = @{ $page->{'revisions'} }[0]->{'*'};
 820            if (!defined $revisions) {
 821                $return{ $page->{'title'} } = $revisions;
 822            }
 823            elsif (length($revisions) < 150 && $revisions =~ m/\#REDIRECT\s\[\[([^\[\]]+)\]\]/) {    # FRAGILE!
 824                my $redirect_to = $1;
 825                $return{ $page->{'title'} } = $self->get_text($redirect_to);
 826            }
 827            else {
 828                $return{ $page->{'title'} } = $revisions;
 829            }
 830        }
 831    }
 832
 833    # Based on api.php?action=query&meta=siteinfo&siprop=namespaces|namespacealiases
 834    # Should be done on an as-needed basis! This is only correct for enwiki (and
 835    # it is probably incomplete anyways, or will be eventually).
 836    my $expand = {
 837        'WP'         => 'Wikipedia',
 838        'WT'         => 'Wikipedia talk',
 839        'Image'      => 'File',
 840        'Image talk' => 'File talk',
 841    };
 842
 843    # Only for those article names that remained after the first part
 844    # If we're here we are dealing most likely with a WP:CSD type of article name
 845    for my $title (keys %$diff) {
 846        if ($diff->{$title} == 1) {
 847            my @pieces = split(/:/, $title);
 848            if (@pieces > 1) {
 849                $pieces[0] = ($expand->{ $pieces[0] } || $pieces[0]);
 850                my $v = $self->get_text(join ':', @pieces);
 851                warn "Detected article name that needed expanding $title\n" if $self->{'debug'} > 1;
 852
 853                $return{$title} = $v;
 854                if ($v =~ m/\#REDIRECT\s\[\[([^\[\]]+)\]\]/) {
 855                    $v = $self->get_text($1);
 856                    $return{$title} = $v;
 857                }
 858            }
 859        }
 860    }
 861    return \%return;
 862}
 863
 864=head2 revert($pagename, $revid[,$summary])
 865
 866Reverts the specified page to $revid, with an edit summary of $summary. A default edit summary will be used if $summary is omitted.
 867
 868    my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard");
 869    print "Reverting to $revid\n" if defined($revid);
 870    $bot->revert('User:Mike.lifeguard', $revid, 'rvv');
 871
 872
 873=cut
 874
 875sub revert {
 876    my $self     = shift;
 877    my $pagename = shift;
 878    my $revid    = shift;
 879    my $summary  = shift || "Reverting to old revision $revid";
 880
 881    my $text = $self->get_text($pagename, $revid);
 882    my $res = $self->edit($pagename, $text, $summary);
 883    return $res;
 884}
 885
 886=head2 undo($pagename, $revid[,$summary[,$after]])
 887
 888Reverts the specified $revid, with an edit summary of $summary, using the undo function. To undo all revisions from $revid up to but not including this one, set $after to another revid. If not set, just undo the one revision ($revid).
 889
 890=cut
 891
 892sub undo {
 893    my $self    = shift;
 894    my $page    = shift;
 895    my $revid   = shift;
 896    my $summary = shift || "Reverting revision #$revid";
 897    my $after   = shift;
 898    $summary = "Reverting edits between #$revid & #$after" if defined($after);    # Is that clear? Correct?
 899
 900    my ($edittoken, $basetimestamp, $starttimestamp) = $self->_get_edittoken($page);
 901    my $hash = {
 902        action         => 'edit',
 903        title          => $page,
 904        undo           => $revid,
 905        undoafter      => $after,
 906        summary        => $summary,
 907        token          => $edittoken,
 908        starttimestamp => $starttimestamp,
 909        basetimestamp  => $basetimestamp,
 910    };
 911
 912    my $res = $self->{api}->api($hash);
 913    return $self->_handle_api_error() unless $res;
 914    return $res;
 915}
 916
 917=head2 get_last($page, $user)
 918
 919Returns the revid of the last revision to $page not made by $user. undef is returned if no result was found, as would be the case if the page is deleted.
 920
 921    my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard");
 922    if defined($revid) {
 923        print "Reverting to $revid\n";
 924        $bot->revert('User:Mike.lifeguard', $revid, 'rvv');
 925    }
 926
 927=cut
 928
 929sub get_last {
 930    my $self = shift;
 931    my $page = shift;
 932    my $user = shift;
 933
 934    my $revertto = 0;
 935
 936    my $res = $self->{api}->api({
 937            action        => 'query',
 938            titles        => $page,
 939            prop          => 'revisions',
 940            rvlimit       => 1,
 941            rvprop        => 'ids|user',
 942            rvexcludeuser => $user,
 943    });
 944    return $self->_handle_api_error() unless $res;
 945    my ($id, $data) = %{ $res->{query}->{pages} };
 946    my $revid = $data->{'revisions'}[0]->{'revid'};
 947    return $revid;
 948}
 949
 950=head2 update_rc($limit[,$options_hashref])
 951
 952Returns an array containing the Recent Changes to the wiki Main namespace. The array structure contains 'title', 'revid', 'old_revid', and 'timestamp'. The $options_hashref is the same as described in the section on linksearch().
 953
 954    my @rc = $bot->update_rc(5);
 955    foreach my $hashref (@rc) {
 956        my $title = $hash->{'title'};
 957        print "$title\n";
 958    }
 959
 960    # Or, use a callback for incremental processing:
 961    my $options = { hook => \&mysub, };
 962    $bot->update_rc($options);
 963    sub mysub {
 964        my ($res) = @_;
 965        foreach my $hashref (@$res) {
 966            my $page = $hashref->{'title'};
 967            print "$page\n";
 968        }
 969    }
 970
 971=cut
 972
 973sub update_rc {
 974    my $self    = shift;
 975    my $limit   = shift;
 976    my $options = shift;
 977
 978    my $hash = {
 979        action      => 'query',
 980        list        => 'recentchanges',
 981        rcnamespace => 0,
 982        rclimit     => $limit,
 983    };
 984    $options->{'max'} = 1 unless $options->{'max'};
 985
 986    my $res = $self->{api}->list($hash, $options);
 987    return $self->_handle_api_error() unless $res;
 988    return 1 if (!ref $res);    # Not a ref when using callback
 989    my @rc_table;
 990    foreach my $hash (@{$res}) {
 991        push(
 992            @rc_table,
 993            {
 994                title     => $hash->{'title'},
 995                revid     => $hash->{'revid'},
 996                old_revid => $hash->{'old_revid'},
 997                timestamp => $hash->{'timestamp'},
 998            });
 999    }
1000    return @rc_table;
1001}
1002
1003=head2 what_links_here($page[,$filter[,$ns[,$options]]])
1004
1005Returns an array containing a list of all pages linking to $page. The array structure contains 'title' and 'redirect' is defined if the title is a redirect. $filter can be one of: all (default), redirects (list only redirects), nonredirects (list only non-redirects). $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). $options is a hashref as described by MediaWiki::API: Set max to limit the number of queries performed. Set hook to a subroutine reference to use a callback hook for incremental processing. Refer to the section on linksearch() for examples.
1006
1007A typical query:
1008
1009    my @links = $bot->what_links_here("Meta:Sandbox", undef, 1, {hook=>\&mysub});
1010    sub mysub{
1011        my ($res) = @_;
1012        foreach my $hash (@$res) {
1013            my $title = $hash->{'title'};
1014            my $is_redir = $hash->{'redirect'};
1015            print "Redirect: $title\n" if $is_redir;
1016            print "Page: $title\n" unless $is_redir;
1017        }
1018    }
1019
1020Transclusions are no longer handled by what_links_here() - use list_transcludes() instead.
1021
1022=cut
1023
1024sub what_links_here {
1025    my $self    = shift;
1026    my $page    = shift;
1027    my $filter  = shift;
1028    my $ns      = shift;
1029    my $options = shift;
1030
1031    $ns = join('|', @$ns) if (ref $ns eq 'ARRAY');    # Allow array of namespaces
1032    if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) {    # Verify $filter
1033        $filter = $1;
1034    }
1035
1036    # http://en.wikipedia.org/w/api.php?action=query&list=backlinks&bltitle=template:tlx
1037    my $hash = {
1038        action      => 'query',
1039        list        => 'backlinks',
1040        bltitle     => $page,
1041        blnamespace => $ns,
1042    };
1043    $hash->{'blfilterredir'} = $filter if $filter;
1044    $options->{'max'} = 1 unless $options->{'max'};
1045
1046    my $res = $self->{api}->list($hash, $options);
1047    return $self->_handle_api_error() unless $res;
1048    return 1 if (!ref $res);    # When using a callback hook, this won't be a reference
1049    my @links;
1050    foreach my $hashref (@$res) {
1051        my $title    = $hashref->{'title'};
1052        my $redirect = defined($hashref->{'redirect'});
1053        push @links, { title => $title, redirect => $redirect };
1054    }
1055
1056    return @links;
1057}
1058
1059=head2 list_transclusions($page[,$filter[,$ns[,$options]]])
1060
1061Returns an array containing a list of all pages transcluding $page. The array structure contains 'title' and 'redirect' is defined if the title is a redirect. $filter can be one of: all (default), redirects (list only redirects), nonredirects (list only non-redirects). $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). $options is a hashref as described by MediaWiki::API: Set max to limit the number of queries performed. Set hook to a subroutine reference to use a callback hook for incremental processing. Refer to the section on linksearch() or what_links_here() for examples.
1062
1063A typical query:
1064
1065    $bot->list_transclusions("Template:Tlx", undef, 4, {hook => \&mysub});
1066    sub mysub{
1067        my ($res) = @_;
1068        foreach my $hash (@$res) {
1069            my $title = $hash->{'title'};
1070            my $is_redir = $hash->{'redirect'};
1071            print "Redirect: $title\n" if $is_redir;
1072            print "Page: $title\n" unless $is_redir;
1073        }
1074    }
1075
1076=cut
1077
1078sub list_transclusions {
1079    my $self    = shift;
1080    my $page    = shift;
1081    my $filter  = shift;
1082    my $ns      = shift;
1083    my $options = shift;
1084
1085    $ns = join('|', @$ns) if (ref $ns eq 'ARRAY');
1086    if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) {    # Verify $filter
1087        $filter = $1;
1088    }
1089
1090    # http://en.wikipedia.org/w/api.php?action=query&list=embeddedin&eititle=Template:Stub
1091    my $hash = {
1092        action      => 'query',
1093        list        => 'embeddedin',
1094        eititle     => $page,
1095        einamespace => $ns,
1096    };
1097    $hash->{'eifilterredir'} = $filter if $filter;
1098    $options->{'max'} = 1 unless $options->{'max'};
1099
1100    my $res = $self->{api}->list($hash, $options);
1101    return $self->_handle_api_error() unless $res;
1102    return 1 if (!ref $res);    # When using a callback hook, this won't be a reference
1103    my @links;
1104    foreach my $hashref (@$res) {
1105        my $title    = $hashref->{'title'};
1106        my $redirect = defined($hashref->{'redirect'});
1107        push @links, { title => $title, redirect => $redirect };
1108    }
1109
1110    return @links;
1111}
1112
1113=head2 get_pages_in_category($category_name[,$options_hashref])
1114
1115Returns an array containing the names of all pages in the specified category (include Category: prefix). Does not recurse into sub-categories.
1116
1117    my @pages = $bot->get_pages_in_category("Category:People on stamps of Gabon");
1118    print "The pages in Category:People on stamps of Gabon are:\n@pages\n";
1119
1120The options hashref is as described in the section on linksearch(). Use { max => 0 } to get all results.
1121
1122=cut
1123
1124sub get_pages_in_category {
1125    my $self     = shift;
1126    my $category = shift;
1127    my $options  = shift;
1128
1129    if ($category =~ m/:/) {    # It might have a namespace name
1130        my ($cat, $title) = split(/:/, $category, 2);
1131        if ($cat ne 'Category') {    # 'Category' is a canonical name for ns14
1132            my $ns_data     = $self->_get_ns_data();
1133            my $cat_ns_name = $ns_data->{'14'};        # ns14 gives us the localized name for 'Category'
1134            if ($cat ne $cat_ns_name) {
1135                $category = "$cat_ns_name:$category";
1136            }
1137        }
1138    }
1139    else {                                             # Definitely no namespace name, since there's no colon
1140        $category = "Category:$category";
1141    }
1142    warn "Category to fetch is [[$category]]" if $self->{'debug'} > 1;
1143
1144    my $hash = {
1145        action  => 'query',
1146        list    => 'categorymembers',
1147        cmtitle => $category,
1148    };
1149    $options->{'max'} = 1 unless defined($options->{'max'});
1150    delete($options->{'max'}) if $options->{'max'} == 0;
1151
1152    my $res = $self->{api}->list($hash, $options);
1153    return 1 if (!ref $res);    # Not a hashref when using callback
1154    return $self->_handle_api_error() unless $res;
1155    my @pages;
1156    foreach my $hash (@$res) {
1157        my $title = $hash->{'title'};
1158        push @pages, $title;
1159    }
1160    return @pages;
1161}
1162
1163=head2 get_all_pages_in_category($category_name[,$options_hashref])
1164
1165Returns an array containing the names of ALL pages in the specified category (include the Category: prefix), including sub-categories. The $options_hashref is the same as described for get_pages_in_category().
1166
1167=cut
1168
1169{    # Instead of using the state pragma, use a bare block
1170    my %data;
1171
1172    sub get_all_pages_in_category {
1173        my $self          = shift;
1174        my $base_category = shift;
1175        my $options       = shift;
1176        $options->{'max'} = 0 unless defined($options->{'max'});
1177
1178        my @first = $self->get_pages_in_category($base_category, $options);
1179        %data = () unless $_[0];    # This is a special flag for internal use.
1180                                    # It marks a call to this method as being
1181                                    # internal. Since %data is a fake state variable,
1182                                    # it needs to be cleared for every *external*
1183                                    # call, but not cleared when the call is recursive.
1184
1185        my $ns_data     = $self->_get_ns_data();
1186        my $cat_ns_name = $ns_data->{'14'};
1187
1188        foreach my $page (@first) {
1189            if ($page =~ m/^$cat_ns_name:/) {
1190                if (!exists($data{$page})) {
1191                    $data{$page} = '';
1192                    my @pages = $self->get_all_pages_in_category($page, $options, 1);
1193                    foreach (@pages) {
1194                        $data{$_} = '';
1195                    }
1196                }
1197                else {
1198                    $data{$page} = '';
1199                }
1200            }
1201            else {
1202                $data{$page} = '';
1203            }
1204        }
1205        return keys %data;
1206    }
1207}    # This ends the bare block around get_all_pages_in_category()
1208
1209=head2 linksearch($link[,$ns[,$protocol[,$options]]])
1210
1211Runs a linksearch on the specified link and returns an array containing anonymous hashes with keys 'url' for the outbound URL, and 'title' for the page the link is on. $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). You can search by $protocol (http is default). The optional $options hashref is fully documented in MediaWiki::API: Set `max` to limit the number of queries performed. Set `hook` to a subroutine reference to use a callback hook for incremental processing.
1212
1213Set max in $options to get more than one query's worth of results:
1214
1215    my $options = { max => 10, }; # I only want some results
1216    my @links = $bot->linksearch("slashdot.org", 1, undef, $options);
1217    foreach my $hash (@links) {
1218        my $url = $hash->{'url'};
1219        my $page = $hash->{'title'};
1220        print "$page: $url\n";
1221    }
1222
1223You can also specify a callback function in $options:
1224
1225    my $options = { hook => \&mysub, }; # I want to do incremental processing
1226    $bot->linksearch("slashdot.org", 1, undef, $options);
1227    sub mysub {
1228        my ($res) = @_;
1229        foreach my $hashref (@$res) {
1230            my $url  = $hashref->{'url'};
1231            my $page = $hashref->{'title'};
1232            print "$page: $url\n";
1233        }
1234    }
1235
1236=cut
1237
1238sub linksearch {
1239    my $self    = shift;
1240    my $link    = shift;
1241    my $ns      = shift;
1242    my $prot    = shift;
1243    my $options = shift;
1244
1245    $ns = join('|', @$ns) if (ref $ns eq 'ARRAY');
1246
1247    my $hash = {
1248        action      => 'query',
1249        list        => 'exturlusage',
1250        euprop      => 'url|title',
1251        euquery     => $link,
1252        eunamespace => $ns,
1253        euprotocol  => $prot,
1254    };
1255    $options->{'max'} = 1 unless $options->{'max'};
1256
1257    my $res = $self->{api}->list($hash, $options);
1258    return $self->_handle_api_error() unless $res;
1259    return 1 if (!ref $res);    # When using a callback hook, this won't be a reference
1260    my @links;
1261    foreach my $hashref (@$res) {
1262        my $url  = $hashref->{'url'};
1263        my $page = $hashref->{'title'};
1264        push(@links, { 'url' => $url, 'title' => $page });
1265    }
1266    return @links;
1267}
1268
1269=head2 purge_page($pagename)
1270
1271Purges the server cache of the specified page. Pass an array reference to purge multiple pages. Returns true on success; false on failure. If you really care, a true return value is the number of pages successfully purged. You could check that it is the same as the number you wanted to purge.- maybe some pages don't exist, or you passed invalid titles, or you aren't allowed to purge the cache:
1272
1273    my @to_purge = ('Main Page', 'A', 'B', 'C', 'Very unlikely to exist');
1274    my $size = scalar @to_purge;
1275
1276    print "all-at-once:\n";
1277    my $success = $bot->purge_page(\@to_purge);
1278
1279    if ($success == $size) {
1280        print "@to_purge: OK ($success/$size)\n";
1281    }
1282    else {
1283        my $missed = @to_purge - $success;
1284        print "We couldn't purge $missed pages (list was: "
1285            . join(', ', @to_purge)
1286            . ")\n";
1287    }
1288
1289    # OR
1290    print "\n\none-at-a-time:\n";
1291    foreach my $page (@to_purge) {
1292        my $ok = $bot->purge_page($page);
1293        print "$page: $ok\n";
1294    }
1295
1296=cut
1297
1298sub purge_page {
1299    my $self = shift;
1300    my $page = shift;
1301
1302    my $hash;
1303    if (ref $page eq 'ARRAY') {             # If it is an array reference...
1304        $hash = {
1305            action => 'purge',
1306            titles => join('|', @$page),    # dereference it and purge all those titles
1307        };
1308    }
1309    else {                                  # Just one page
1310        $hash = {
1311            action => 'purge',
1312            titles => $page,
1313        };
1314    }
1315
1316    my $res = $self->{api}->api($hash);
1317    return $self->_handle_api_error() unless $res;
1318    my $success = 0;
1319    foreach my $hashref (@{ $res->{'purge'} }) {
1320        $success++ if exists $hashref->{'purged'};
1321    }
1322    return $success;
1323}
1324
1325=head2 get_namespace_names()
1326
1327get_namespace_names returns a hash linking the namespace id, such as 1, to its named equivalent, such as "Talk".
1328
1329=cut
1330
1331sub get_namespace_names {
1332    my $self = shift;
1333    my %return;
1334    my $res = $self->{api}->api({
1335            action => 'query',
1336            meta   => 'siteinfo',
1337            siprop => 'namespaces'
1338    });
1339    return $self->_handle_api_error() unless $res;
1340    foreach my $id (keys %{ $res->{query}->{namespaces} }) {
1341        $return{$id} = $res->{query}->{namespaces}->{$id}->{'*'};
1342    }
1343    if ($return{1} or $_[0] > 1) {
1344        return %return;
1345    }
1346    else {
1347        return $self->get_namespace_names($_[0] + 1);
1348    }
1349}
1350
1351=head2 image_usage($image[,$ns[,$filter,[$options]]])
1352
1353Gets a list of pages which include a certain image. Additional parameters are the namespace number to fetch results from (or an arrayref of multiple namespace numbers); $filter is 'all', 'redirect' (to return only redirects), or 'nonredirects' (to return no redirects). $options is a hashref as described in the section for linksearch().
1354
1355    my @pages = $bot->image_usage("File:Albert Einstein Head.jpg");
1356
1357or, make use of the options hashref to do incremental processing:
1358
1359    $bot->image_usage("File:Albert Einstein Head.jpg", undef, undef, {hook=>\&mysub, max=>5});
1360    sub mysub {
1361        my $res = shift;
1362        foreach my $page (@$res) {
1363            my $title = $page->{'title'};
1364            print "$title\n";
1365        }
1366    }
1367
1368=cut
1369
1370sub image_usage {
1371    my $self    = shift;
1372    my $image   = shift;
1373    my $ns      = shift;
1374    my $filter  = shift;
1375    my $options = shift;
1376
1377    if ($image !~ m/^File:|Image:/) {
1378        my $ns_data = $self->_get_ns_data();
1379        my $image_ns_name = $ns_data->{'6'};
1380        if ($image !~ m/^\Q$image_ns_name\E:/) {
1381            $image = "$image_ns_name:$image";
1382        }
1383    }
1384
1385    $options->{'max'} = 1 unless defined($options->{'max'});
1386    delete($options->{'max'}) if $options->{'max'} == 0;
1387
1388    $ns = join('|', @$ns) if (ref $ns eq 'ARRAY');
1389
1390    my $hash = {
1391        action          => 'query',
1392        list            => 'imageusage',
1393        iutitle         => $image,
1394        iunamespace     => $ns,
1395    };
1396    if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) {
1397        $hash->{'iufilterredir'} = $1;
1398    }
1399    my $res = $self->{api}->list($hash, $options);
1400    return $self->_handle_api_error() unless $res;
1401    return 1 if (!ref $res);    # When using a callback hook, this won't be a reference
1402    my @pages;
1403    foreach my $hashref (@$res) {
1404        my $title = $hashref->{'title'};
1405        push(@pages, $title);
1406    }
1407
1408    return @pages;
1409}
1410
1411=head2 links_to_image($image)
1412
1413A backward-compatible call to image_usage(). You can provide only the image name.
1414
1415=cut
1416
1417sub links_to_image {
1418    my $self = shift;
1419    return $self->image_usage($_[0]);
1420}
1421
1422=head2 is_blocked($user)
1423
1424Checks if a user is currently blocked.
1425
1426=cut
1427
1428sub is_blocked {
1429    my $self = shift;
1430    my $user = shift;
1431
1432    # http://en.wikipedia.org/w/api.php?action=query&meta=blocks&bkusers=$user&bklimit=1&bkprop=id
1433    my $hash = {
1434        action  => 'query',
1435        list    => 'blocks',
1436        bkusers => $user,
1437        bklimit => 1,
1438        bkprop  => 'id',
1439    };
1440    my $res = $self->{api}->api($hash);
1441    return $self->_handle_api_error() unless $res;
1442
1443    my $number = scalar @{ $res->{query}->{"blocks"} };    # The number of blocks returned
1444    if ($number == 1) {
1445        return 1;
1446    }
1447    elsif ($number == 0) {
1448        return 0;
1449    }
1450    else {
1451        return; # UNPOSSIBLE!
1452    }
1453}
1454
1455=head2 test_blocked($user)
1456
1457Retained for backwards compatibility. Use is_blocked($user) for clarity.
1458
1459=cut
1460
1461sub test_blocked { # For backwards-compatibility
1462    return (is_blocked(@_));
1463}
1464
1465=head2 test_image_exists($page)
1466
1467Checks if an image exists at $page. 0 means no, 1 means yes, local, 2
1468means on commons, 3 means doesn't exist but there is text on the page.
1469If you pass in an arrayref of images, you'll get out an arrayref of
1470results.
1471
1472    my $exists = $bot->test_image_exists('File:Albert Einstein Head.jpg');
1473    if ($exists == 0) {
1474        print "Doesn't exist\n";
1475    }
1476    elsif ($exists == 1) {
1477        print "Exists locally\n";
1478    }
1479    elsif ($exists == 2) {
1480        print "Exists on Commons\n";
1481    }
1482
1483=cut
1484
1485sub test_image_exists {
1486    my $self  = shift;
1487    my $image = shift;
1488
1489    my $multi = 0;
1490    if (ref $image eq 'ARRAY') {
1491        $image = join('|', @$image);
1492        $multi = 1; # so we know whether to return a hash or a single scalar
1493    }
1494
1495    my $res = $self->{api}->api({
1496        action  => 'query',
1497        titles  => $image,
1498        iilimit => 1,
1499        prop    => 'imageinfo'
1500    });
1501    return $self->_handle_api_error() unless $res;
1502
1503    my @return;
1504    # use Data::Dumper; print STDERR Dumper($res) and die;
1505    foreach my $id (keys %{ $res->{query}->{pages} }) {
1506        my $title = $res->{query}->{pages}->{$id}->{title};
1507        if ($res->{query}->{pages}->{$id}->{imagerepository} eq 'shared') {
1508            if ($multi) {
1509                unshift @return, 2;
1510            }
1511            else {
1512                return 2;
1513            }
1514        }
1515        elsif (exists($res->{query}->{pages}->{$id}->{missing})) {
1516            if ($multi) {
1517                unshift @return, 0;
1518            }
1519            else {
1520                return 0;
1521            }
1522        }
1523        elsif ($res->{query}->{pages}->{$id}->{imagerepository} eq '') {
1524            if ($multi) {
1525                unshift @return, 3;
1526            }
1527            else {
1528                return 3;
1529            }
1530        }
1531        elsif ($res->{query}->{pages}->{$id}->{imagerepository} eq 'local') {
1532            if ($multi) {
1533                unshift @return, 1;
1534            }
1535            else {
1536                return 1;
1537            }
1538        }
1539    }
1540
1541    # use Data::Dumper; print STDERR Dumper(\@return) and die;
1542    return \@return;
1543}
1544
1545=head2 get_pages_in_namespace($namespace_id, $page_limit)
1546
1547Returns an array containing the names of all pages in the specified namespace. The $namespace_id must be a number, not a namespace name. Setting $page_limit is optional. If $page_limit is over 500, it will be rounded up to the next multiple of 500.
1548
1549=cut
1550
1551sub get_pages_in_namespace {
1552    my $self      = shift;
1553    my $namespace = shift;
1554    my $limit     = shift || 500;
1555    my $options   = shift;
1556    $limit = 5000 if $self->{'highlimits'};
1557
1558    my $hash = {
1559        action      => 'query',
1560        list        => 'allpages',
1561        apnamespace => $namespace,
1562        aplimit     => $limit,
1563    };
1564    $options->{'max'} = 1 unless $options->{'max'};
1565
1566    my $res = $self->{api}->list($hash, $options);
1567    return $self->_handle_api_error() unless $res;
1568    return 1 if (!ref $res);    # Not a ref when using callback
1569    my @return;
1570    foreach (@{$res}) {
1571        push @return, $_->{title};
1572    }
1573

Large files files are truncated, but you can click here to view the full file