/tags/3.2/lib/MediaWiki/Bot.pm
Perl | 2157 lines | 1832 code | 304 blank | 21 comment | 159 complexity | 3130e4109f8f2d2523d26e9963bc793e MD5 | raw file
Possible License(s): GPL-3.0
Large files files are truncated, but you can click here to view the full file
1package MediaWiki::Bot; 2# ABSTRACT: a MediaWiki bot framework written in Perl 3 4use strict; 5use warnings; 6use WWW::Mechanize; 7use HTML::Entities; 8use URI::Escape; 9use XML::Simple; 10use Carp; 11use URI::Escape qw(uri_escape_utf8); 12use Digest::MD5 qw(md5_hex); 13use Encode qw(encode_utf8); 14use MediaWiki::API; 15 16use Module::Pluggable search_path => [qw(MediaWiki::Bot::Plugin)], 'require' => 1; 17foreach my $plugin (__PACKAGE__->plugins) { 18 19 #print "Found plugin $plugin\n"; 20 $plugin->import(); 21} 22 23our $VERSION = '3.2.0'; 24 25=head1 SYNOPSIS 26 27 use MediaWiki::Bot; 28 29 my $bot = MediaWiki::Bot->new({ 30 useragent => 'MediaWiki::Bot/3.1.6 (User:Mike.lifeguard)', 31 assert => 'bot', 32 protocol => 'https', 33 host => 'secure.wikimedia.org', 34 path => 'wikipedia/meta/w', 35 login_data => { username => "Mike's bot account", password => "password" }, 36 }); 37 38 my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard"); 39 print "Reverting to $revid\n" if defined($revid); 40 $bot->revert('User:Mike.lifeguard', $revid, 'rvv'); 41 42=head1 DESCRIPTION 43 44MediaWiki::Bot is a framework that can be used to write bots which interface 45with the MediaWiki API (L<http://en.wikipedia.org/w/api.php>). 46 47=head1 METHODS 48 49=head2 new($options_hashref) 50 51Calling MediaWiki::Bot->new() will create a new MediaWiki::Bot object. 52 53=over 4 54 55=item * 56agent sets a custom useragent 57 58=item * 59assert sets a parameter for the AssertEdit extension (commonly 'bot'). Refer to L<http://mediawiki.org/wiki/Extension:AssertEdit>. 60 61=item * 62operator allows the bot to send you a message when it fails an assert, and will be integrated into the default useragent (which may not be used if you set agent yourself). The message will tell you that $useragent is logged out, so use a descriptive one if you set it. 63 64=item * 65maxlag allows you to set the maxlag parameter (default is the recommended 5s). Please refer to the MediaWiki documentation prior to changing this from the default. 66 67=item * 68protocol allows you to specify 'http' or 'https' (default is 'http'). This is commonly used with the domain and path settings below. 69 70=item * 71host sets the domain name of the wiki to connect to. 72 73=item * 74path sets the path to api.php (with no leading or trailing slash). 75 76=item * 77login_data is a hashref of credentials to pass to login(). See that section for a description. 78 79=item * 80debug is whether to provide debug output. 1 provides only error messages; 2 provides further detail on internal operations. 81 82=back 83 84For example: 85 86 my $bot = MediaWiki::Bot->new({ 87 useragent => 'MediaWiki::Bot/3.1.6 (User:Mike.lifeguard)', 88 assert => 'bot', 89 protocol => 'https', 90 host => 'secure.wikimedia.org', 91 path => 'wikipedia/meta/w', 92 login_data => { username => "Mike's bot account", password => "password" }, 93 }); 94 95For backward compatibility, you can specify up to three parameters: 96 97 my $bot = MediaWiki::Bot->new('MediaWiki::Bot 2.3.1 (User:Mike.lifeguard)', $assert, $operator); 98 99This deprecated form will never do auto-login or autoconfiguration. 100 101=cut 102 103sub new { 104 my $package = shift; 105 my $agent; 106 my $assert; 107 my $operator; 108 my $maxlag; 109 my $protocol; 110 my $host; 111 my $path; 112 my $login_data; 113 my $debug; 114 115 if (ref $_[0] eq 'HASH') { 116 $agent = $_[0]->{'agent'}; 117 $assert = $_[0]->{'assert'}; 118 $operator = $_[0]->{'operator'}; 119 $maxlag = $_[0]->{'maxlag'}; 120 $protocol = $_[0]->{'protocol'}; 121 $host = $_[0]->{'host'}; 122 $path = $_[0]->{'path'}; 123 $login_data = $_[0]->{'login_data'}; 124 $debug = $_[0]->{'debug'}; 125 } 126 else { 127 $agent = shift; 128 $assert = shift; 129 $operator = shift; 130 $maxlag = shift; 131 $protocol = shift; 132 $host = shift; 133 $path = shift; 134 $debug = shift; 135 } 136 137 $assert =~ s/[&?]assert=// if $assert; # Strip out param part, leaving just the value 138 $operator =~ s/^User://i if $operator; 139 140 # Set defaults 141 unless ($agent) { 142 $agent = "MediaWiki::Bot/$VERSION"; 143 $agent .= " (User:$operator)" if $operator; 144 } 145 146 my $self = bless({}, $package); 147 $self->{mech} = WWW::Mechanize->new( 148 cookie_jar => {}, 149 onerror => \&Carp::carp, 150 stack_depth => 1 151 ); 152 $self->{mech}->agent($agent); 153 $self->{errstr} = ''; 154 $self->{assert} = $assert; 155 $self->{operator} = $operator; 156 $self->{'debug'} = $debug || 0; 157 $self->{api} = MediaWiki::API->new(); 158 $self->{api}->{ua}->agent($agent); 159 160 # Set wiki (handles setting $self->{host} etc) 161 $self->set_wiki({ 162 protocol => $protocol, 163 host => $host, 164 path => $path, 165 }); 166 167 $self->{api}->{config}->{max_lag} = $maxlag || 5; 168 $self->{api}->{config}->{max_lag_delay} = 1; 169 $self->{api}->{config}->{retries} = 5; 170 $self->{api}->{config}->{max_lag_retries} = -1; 171 $self->{api}->{config}->{retry_delay} = 30; 172 173 # Log-in, and maybe autoconfigure 174 if ($login_data) { 175 my $success = $self->login($login_data); 176 if ($success) { 177 return $self; 178 } 179 else { 180 carp "Couldn't log in with supplied settings" if $self->{'debug'}; 181 return; 182 } 183 } 184 185 return $self; 186} 187 188=head2 set_wiki($options) 189 190Set what wiki to use. Host is the domain name; path is the path before api.php (usually 'w'); protocol is either 'http' or 'https'. For example: 191 192 $bot->set_wiki( 193 protocol => 'https', 194 host => 'secure.wikimedia.org', 195 path => 'wikipedia/meta/w', 196 ); 197 198For backward compatibility, you can specify up to two parameters in this deprecated form: 199 200 $bot->set_wiki($host, $path); 201 202If you don't set any parameter, it's previous value is used. If it has never been set, the default settings are 'http', 'en.wikipedia.org' and 'w'. 203 204=cut 205 206sub set_wiki { 207 my $self = shift; 208 my $host; 209 my $path; 210 my $protocol; 211 212 if (ref $_[0] eq 'HASH') { 213 $host = $_[0]->{'host'}; 214 $path = $_[0]->{'path'}; 215 $protocol = $_[0]->{'protocol'}; 216 } 217 else { 218 $host = shift; 219 $path = shift; 220 } 221 222 # Set defaults 223 $protocol = $self->{'protocol'} || 'http' unless defined($protocol); 224 $host = $self->{'host'} || 'en.wikipedia.org' unless defined($host); 225 $path = $self->{'path'} || 'w' unless defined($path); 226 227 # Clean up the parts we will build a URL with 228 $protocol =~ s,://$,,; 229 if ($host =~ m,^(http|https)(://)?, && !$protocol) { 230 $protocol = $1; 231 } 232 $host =~ s,^https?://,,; 233 $host =~ s,/$,,; 234 $path =~ s,/$,,; 235 236 # Invalidate wiki-specific cached data 237 if ( ((defined($self->{'host'})) and ($self->{'host'} ne $host)) 238 or ((defined($self->{'path'})) and ($self->{'path'} ne $path)) 239 or ((defined($self->{'protocol'})) and ($self->{'protocol'} ne $protocol)) 240 ) { 241 delete $self->{'ns_data'} if $self->{'ns_data'}; 242 } 243 244 $self->{protocol} = $protocol; 245 $self->{host} = $host; 246 $self->{path} = $path; 247 248 $self->{api}->{config}->{api_url} = $path 249 ? "$protocol://$host/$path/api.php" 250 : "$protocol://$host/api.php"; # $path is '', so don't use http://domain.com//api.php 251 warn "Wiki set to " . $self->{api}->{config}{api_url} . "\n" if $self->{'debug'} > 1; 252 253 return 1; 254} 255 256=head2 login($login_hashref) 257 258Logs the use $username in, optionally using $password. First, an attempt will be made to use cookies to log in. If this fails, an attempt will be made to use the password provided to log in, if any. If the login was successful, returns true; false otherwise. 259 260 $bot->login({ 261 username => $username, 262 password => $password, 263 }) or die "Login failed"; 264 265Once logged in, attempt to do some simple auto-configuration. At present, this consists of: 266 267=over 4 268 269=item * 270 271Warning if the account doesn't have the bot flag, and isn't a sysop account. 272 273=item * 274 275Setting the use of apihighlimits if the account has that userright. 276 277=item * 278 279Setting an appropriate default assert. 280 281=back 282 283You can skip this autoconfiguration by passing C<autoconfig =E<gt> 0> 284 285=head3 Single User Login 286 287On WMF wikis, C<do_sul> specifies whether to log in on all projects. The default is false. But even when false, you still get a CentralAuth cookie for, and are thus logged in on, all languages of a given domain (*.wikipedia.org, for example). When set, a login is done on each WMF domain so you are logged in on all ~800 content wikis. Since C<*.wikimedia.org> is not possible, we explicitly include meta, commons, incubator, and wikispecies. When C<do_sul> is set, the return is the number of domains that login was successful for. This allows callers to do the following: 288 289 $bot->login({ 290 username => $username, 291 password => $password, 292 do_sul => 1, 293 }) or die "SUL failed"; 294 295For backward compatibility, you can call this as 296 297 $bot->login($username, $password); 298 299This deprecated form will never do autoconfiguration or SUL login. 300 301If you need to supply basic auth credentials, pass a hashref of data as described by L<LWP::UserAgent>: 302 303 $bot->login({ 304 username => $username, 305 password => $password, 306 basic_auth => { netloc => "private.wiki.com:80", 307 realm => "Authentication Realm", 308 uname => "Basic auth username", 309 pass => "password", 310 } 311 }) or die "Couldn't log in"; 312 313=cut 314 315sub login { 316 my $self = shift; 317 my $username; 318 my $password; 319 my $lgdomain; 320 my $autoconfig; 321 my $basic_auth; 322 my $do_sul; 323 if (ref $_[0] eq 'HASH') { 324 $username = $_[0]->{'username'}; 325 $password = $_[0]->{'password'}; 326 $autoconfig = defined($_[0]->{'autoconfig'}) ? $_[0]->{'autoconfig'} : 1; 327 $basic_auth = $_[0]->{'basic_auth'}; 328 $do_sul = $_[0]->{'do_sul'} || 0; 329 $lgdomain = $_[0]->{'lgdomain'}; 330 } 331 else { 332 $username = shift; 333 $password = shift; 334 $autoconfig = 0; 335 $do_sul = 0; 336 } 337 $self->{'username'} = $username; # Remember who we are 338 339 # Handle basic auth first, if needed 340 if ($basic_auth) { 341 warn "Applying basic auth credentials" if $self->{'debug'} > 1; 342 $self->{api}->{ua}->credentials( 343 $basic_auth->{'netloc'}, 344 $basic_auth->{'realm'}, 345 $basic_auth->{'uname'}, 346 $basic_auth->{'pass'} 347 ); 348 } 349 $do_sul = 0 if ( 350 ($self->{'protocol'} eq 'https') and 351 ($self->{'host'} eq 'secure.wikimedia.org') ); 352 353 if ($do_sul) { 354 my $debug = $self->{'debug'}; # Remember this for later 355 my $host = $self->{'host'}; 356 my $path = $self->{'path'}; 357 my $protocol = $self->{'protocol'}; 358 359 $self->{'debug'} = 0; # Turn off debugging for these internal calls 360 my @logins; # Keep track of our successes 361 my @WMF_projects = qw( 362 en.wikipedia.org 363 en.wiktionary.org 364 en.wikibooks.org 365 en.wikinews.org 366 en.wikiquote.org 367 en.wikisource.org 368 en.wikiversity.org 369 meta.wikimedia.org 370 commons.wikimedia.org 371 species.wikimedia.org 372 incubator.wikimedia.org 373 ); 374 375 SUL: foreach my $project (@WMF_projects) { 376 print STDERR "Logging in on $project..." if $debug > 1; 377 $self->set_wiki({ 378 host => $project, 379 }); 380 my $success = $self->login({ 381 username => $username, 382 password => $password, 383 lgdomain => $lgdomain, 384 do_sul => 0, 385 autoconfig => 0, 386 }); 387 warn ($success ? " OK\n" : " FAILED\n") if $debug > 1; 388 push(@logins, $success); 389 } 390 $self->set_wiki({ # Switch back to original wiki 391 protocol => $protocol, 392 host => $host, 393 path => $path, 394 }); 395 396 my $sum = 0; 397 $sum += $_ for @logins; 398 my $total = scalar @WMF_projects; 399 warn "$sum/$total logins succeeded\n" if $debug > 1; 400 $self->{'debug'} = $debug; # Reset debug to it's old value 401 402 return $sum; 403 } 404 405 my $cookies = ".mediawiki-bot-$username-cookies"; 406 if (-r $cookies) { 407 $self->{mech}->{cookie_jar}->load($cookies); 408 $self->{mech}->{cookie_jar}->{ignore_discard} = 1; 409 $self->{api}->{ua}->{cookie_jar}->load($cookies); 410 $self->{api}->{ua}->{cookie_jar}->{ignore_discard} = 1; 411 412 my $logged_in = $self->_is_loggedin(); 413 if ($logged_in) { 414 $self->_do_autoconfig() if $autoconfig; 415 warn "Logged in successfully with cookies" if $self->{'debug'} > 1; 416 return 1; # If we're already logged in, nothing more is needed 417 } 418 } 419 420 unless ($password) { 421 carp "No login cookies available, and no password to continue with authentication" if $self->{'debug'}; 422 return 0; 423 } 424 425 my $res = $self->{api}->api({ 426 action => 'login', 427 lgname => $username, 428 lgpassword => $password, 429 lgdomain => $lgdomain 430 }) or return $self->_handle_api_error(); 431 $self->{api}->{ua}->{cookie_jar}->extract_cookies($self->{api}->{response}); 432 $self->{api}->{ua}->{cookie_jar}->save($cookies) if (-w($cookies) or -w('.')); 433 434 if ($res->{'login'}->{'result'} eq 'NeedToken') { 435 my $token = $res->{'login'}->{'token'}; 436 $res = $self->{api}->api({ 437 action => 'login', 438 lgname => $username, 439 lgpassword => $password, 440 lgdomain => $lgdomain, 441 lgtoken => $token, 442 }) or return $self->_handle_api_error(); 443 444 $self->{api}->{ua}->{cookie_jar}->extract_cookies($self->{api}->{response}); 445 $self->{api}->{ua}->{cookie_jar}->save($cookies) if (-w($cookies) or -w('.')); 446 } 447 448 if ($res->{'login'}->{'result'} eq 'Success') { 449 if ($res->{'login'}->{'lgusername'} eq $self->{'username'}) { 450 $self->_do_autoconfig() if $autoconfig; 451 warn "Logged in successfully with password" if $self->{'debug'} > 1; 452 } 453 } 454 455 return ( 456 (defined($res->{'login'}->{'lgusername'})) and 457 (defined($res->{'login'}->{'result'})) and 458 ($res->{'login'}->{'lgusername'} eq $self->{'username'}) and 459 ($res->{'login'}->{'result'} eq 'Success') 460 ); 461} 462 463=head2 set_highlimits($flag) 464 465Tells MediaWiki::Bot to start/stop using APIHighLimits for certain queries. 466 467 $bot->set_highlimits(1); 468 469=cut 470 471sub set_highlimits { 472 my $self = shift; 473 my $highlimits = defined($_[0]) ? shift : 1; 474 475 $self->{highlimits} = $highlimits; 476 return 1; 477} 478 479=head2 logout() 480 481The logout procedure deletes the login tokens and other browser cookies. 482 483 $bot->logout(); 484 485=cut 486 487sub logout { 488 my $self = shift; 489 490 my $hash = { 491 action => 'logout', 492 }; 493 $self->{api}->api($hash); 494 return 1; 495} 496 497=head2 edit($options_hashref) 498 499Puts text on a page. If provided, use a specified edit summary, mark the edit as minor, as a non-bot edit, or add an assertion. Set section to edit a single section instead of the whole page. An MD5 hash is sent to guard against data corruption while in transit. 500 501 my $text = $bot->get_text('My page'); 502 $text .= "\n\n* More text\n"; 503 $bot->edit({ 504 page => 'My page', 505 text => $text, 506 summary => 'Adding new content', 507 section => 'new', 508 }); 509 510You can also call this using the deprecated form: 511 512 $bot->edit($page, $text, $summary, $is_minor, $assert, $markasbot); 513 514=cut 515 516sub edit { 517 my $self = shift; 518 my $page; 519 my $text; 520 my $summary; 521 my $is_minor; 522 my $assert; 523 my $markasbot; 524 my $section; 525 526 if (ref $_[0] eq 'HASH') { 527 $page = $_[0]->{'page'}; 528 $text = $_[0]->{'text'}; 529 $summary = $_[0]->{'summary'}; 530 $is_minor = $_[0]->{'is_minor'}; 531 $assert = $_[0]->{'assert'}; 532 $markasbot = $_[0]->{'markasbot'}; 533 $section = $_[0]->{'section'}; 534 } 535 else { 536 $page = shift; 537 $text = shift; 538 $summary = shift; 539 $is_minor = shift; 540 $assert = shift; 541 $markasbot = shift; 542 $section = shift; 543 } 544 545 # Set defaults 546 $summary = 'BOT: Changing page text' unless $summary; 547 if ($assert) { 548 $assert =~ s/^[&?]assert=//; 549 } 550 else { 551 $assert = $self->{'assert'}; 552 } 553 $is_minor = 1 unless defined($is_minor); 554 $markasbot = 1 unless defined($markasbot); 555 556 my ($edittoken, $lastedit, $tokentime) = $self->_get_edittoken($page); 557 return $self->_handle_api_error() unless $edittoken; 558 my $hash = { 559 action => 'edit', 560 title => $page, 561 token => $edittoken, 562 text => $text, 563 md5 => md5_hex(encode_utf8($text)), # Guard against data corruption 564 # Pass only bytes to md5_hex() 565 summary => $summary, 566 basetimestamp => $lastedit, # Guard against edit conflicts 567 starttimestamp => $tokentime, # Guard against the page being deleted/moved 568 bot => $markasbot, 569 assert => $assert, 570 minor => $is_minor, 571 }; 572 $hash->{'section'} = $section if defined($section); 573 574 my $res = $self->{api}->api($hash); # Check if MediaWiki::API::edit() is good enough 575 return $self->_handle_api_error() unless $res; 576 if ($res->{edit}->{result} && $res->{edit}->{result} eq 'Failure') { 577 if ($self->{mech}->{agent}) { 578 carp 'Assertion failed as ' . $self->{mech}->{agent} if $self->{'debug'}; 579 if ($self->{operator}) { 580 my $optalk = $self->get_text('User talk:' . $self->{operator}); 581 if (defined($optalk)) { 582 print "Sending warning!\n"; 583 $self->edit( 584 page => "User talk:$self->{operator}", 585 text => $optalk 586 . "\n\n==Error with " 587 . $self->{mech}->{agent} . "==\n" 588 . $self->{mech}->{agent} 589 . ' needs to be logged in! ~~~~', 590 summary => 'bot issue', 591 is_minor => 0, 592 assert => '' 593 ); 594 } 595 } 596 return; 597 } 598 else { 599 carp 'Assertion failed' if $self->{'debug'}; 600 } 601 } 602 return $res; 603} 604 605=head2 move($from, $to, $reason, $options_hashref) 606 607This moves a page from $from to $to. If you wish to specify more options (like whether to suppress creation of a redirect), use $options_hashref. 608 609=over 4 610 611=item * 612movetalk specifies whether to attempt to the talk page. 613 614=item * 615noredirect specifies whether to suppress creation of a redirect. 616 617=item * 618movesubpages specifies whether to move subpages, if applicable. 619 620=item * 621watch and unwatch add or remove the page and the redirect from your watchlist. 622 623=item * 624ignorewarnings ignores warnings. 625 626=back 627 628 my @pages = ("Humor", "Rumor"); 629 foreach my $page (@pages) { 630 my $to = $page; 631 $to =~ s/or$/our/; 632 $bot->move($page, $to, "silly 'merricans"); 633 } 634 635=cut 636 637sub move { 638 my $self = shift; 639 my $from = shift; 640 my $to = shift; 641 my $reason = shift; 642 my $opts = shift; 643 644 my $hash = { 645 action => 'move', 646 from => $from, 647 to => $to, 648 reason => $reason, 649 }; 650 $hash->{'movetalk'} = $opts->{'movetalk'} if defined($opts->{'movetalk'}); 651 $hash->{'noredirect'} = $opts->{'noredirect'} if defined($opts->{'noredirect'}); 652 653 my $res = $self->{api}->edit($hash); 654 return $self->_handle_api_error() unless $res; 655 return $res; # should we return something more useful? 656} 657 658=head2 get_history($pagename[,$limit]) 659 660Returns an array containing the history of the specified page, with $limit number of revisions. The array structure contains 'revid', 'user', 'comment', 'timestamp_date', and 'timestamp_time'. 661 662=cut 663 664sub get_history { 665 my $self = shift; 666 my $pagename = shift; 667 my $limit = shift || 5; 668 my $rvstartid = shift || ''; 669 my $direction = shift; 670 671 my @return; 672 my @revisions; 673 674 my $hash = { 675 action => 'query', 676 prop => 'revisions', 677 titles => $pagename, 678 rvprop => 'ids|timestamp|user|comment', 679 rvlimit => $limit 680 }; 681 682 $hash->{rvstartid} = $rvstartid if ($rvstartid); 683 $hash->{direction} = $direction if ($direction); 684 685 my $res = $self->{api}->api($hash); 686 return $self->_handle_api_error() unless $res; 687 my ($id) = keys %{ $res->{query}->{pages} }; 688 my $array = $res->{query}->{pages}->{$id}->{revisions}; 689 690 foreach my $hash (@{$array}) { 691 my $revid = $hash->{revid}; 692 my $user = $hash->{user}; 693 my ($timestamp_date, $timestamp_time) = split(/T/, $hash->{timestamp}); 694 $timestamp_time =~ s/Z$//; 695 my $comment = $hash->{comment}; 696 push( 697 @return, 698 { 699 revid => $revid, 700 user => $user, 701 timestamp_date => $timestamp_date, 702 timestamp_time => $timestamp_time, 703 comment => $comment, 704 }); 705 } 706 return @return; 707} 708 709=head2 get_text($pagename,[$revid,$section_number]) 710 711Returns an the wikitext of the specified page. If $revid is defined, it will return the text of that revision; if $section_number is defined, it will return the text of that section. A blank page will return wikitext of "" (which evaluates to false in Perl, but is defined); a nonexistent page will return undef (which also evaluates to false in Perl, but is obviously undefined). You can distinguish between blank and nonexistent by using defined(): 712 713 my $wikitext = $bot->get_text('Page title'); 714 print "Wikitext: $wikitext\n" if defined $wikitext; 715 716=cut 717 718sub get_text { 719 my $self = shift; 720 my $pagename = shift; 721 my $revid = shift; 722 my $section = shift; 723 724 my $hash = { 725 action => 'query', 726 titles => $pagename, 727 prop => 'revisions', 728 rvprop => 'content', 729 }; 730 $hash->{rvstartid} = $revid if ($revid); 731 $hash->{rvsection} = $section if ($section); 732 733 my $res = $self->{api}->api($hash); 734 return $self->_handle_api_error() unless $res; 735 my ($id, $data) = %{ $res->{query}->{pages} }; 736 if ($id == -1) { # Page doesn't exist 737 return; 738 } 739 else { # Page exists 740 my $wikitext = $data->{revisions}[0]->{'*'}; 741 return $wikitext; 742 } 743} 744 745=head2 get_id($pagename) 746 747Returns the id of the specified page. Returns undef if page does not exist. 748 749 my $pageid = $bot->get_id("Main Page"); 750 croak "Page doesn't exist\n" if !defined($pageid); 751 752=cut 753 754sub get_id { 755 my $self = shift; 756 my $pagename = shift; 757 758 my $hash = { 759 action => 'query', 760 titles => $pagename, 761 }; 762 763 my $res = $self->{api}->api($hash); 764 return $self->_handle_api_error() unless $res; 765 my ($id, $data) = %{ $res->{query}->{pages} }; 766 if ($id == -1) { 767 return; 768 } 769 else { 770 return $id; 771 } 772} 773 774=head2 get_pages(\@pages) 775 776Returns the text of the specified pages in a hashref. Content of undef means page does not exist. Also handles redirects or article names that use namespace aliases. 777 778 my @pages = ('Page 1', 'Page 2', 'Page 3'); 779 my $thing = $bot->get_pages(\@pages); 780 foreach my $page (keys %$thing) { 781 my $text = $thing->{$page}; 782 print "$text\n" if defined($text); 783 } 784 785=cut 786 787sub get_pages { 788 my $self = shift; 789 my @pages = (ref $_[0] eq 'ARRAY') ? @{$_[0]} : @_; 790 my %return; 791 792 my $hash = { 793 action => 'query', 794 titles => join('|', @pages), 795 prop => 'revisions', 796 rvprop => 'content', 797 }; 798 799 my $diff; # Used to track problematic article names 800 map { $diff->{$_} = 1; } @pages; 801 802 my $res = $self->{api}->api($hash); 803 return $self->_handle_api_error() unless $res; 804 805 foreach my $id (keys %{ $res->{query}->{pages} }) { 806 my $page = $res->{'query'}->{'pages'}->{$id}; 807 if ($diff->{ $page->{'title'} }) { 808 $diff->{ $page->{'title'} }++; 809 } 810 else { 811 next; 812 } 813 814 if (defined($page->{'missing'})) { 815 $return{ $page->{'title'} } = undef; 816 next; 817 } 818 if (defined($page->{'revisions'})) { 819 my $revisions = @{ $page->{'revisions'} }[0]->{'*'}; 820 if (!defined $revisions) { 821 $return{ $page->{'title'} } = $revisions; 822 } 823 elsif (length($revisions) < 150 && $revisions =~ m/\#REDIRECT\s\[\[([^\[\]]+)\]\]/) { # FRAGILE! 824 my $redirect_to = $1; 825 $return{ $page->{'title'} } = $self->get_text($redirect_to); 826 } 827 else { 828 $return{ $page->{'title'} } = $revisions; 829 } 830 } 831 } 832 833 # Based on api.php?action=query&meta=siteinfo&siprop=namespaces|namespacealiases 834 # Should be done on an as-needed basis! This is only correct for enwiki (and 835 # it is probably incomplete anyways, or will be eventually). 836 my $expand = { 837 'WP' => 'Wikipedia', 838 'WT' => 'Wikipedia talk', 839 'Image' => 'File', 840 'Image talk' => 'File talk', 841 }; 842 843 # Only for those article names that remained after the first part 844 # If we're here we are dealing most likely with a WP:CSD type of article name 845 for my $title (keys %$diff) { 846 if ($diff->{$title} == 1) { 847 my @pieces = split(/:/, $title); 848 if (@pieces > 1) { 849 $pieces[0] = ($expand->{ $pieces[0] } || $pieces[0]); 850 my $v = $self->get_text(join ':', @pieces); 851 warn "Detected article name that needed expanding $title\n" if $self->{'debug'} > 1; 852 853 $return{$title} = $v; 854 if ($v =~ m/\#REDIRECT\s\[\[([^\[\]]+)\]\]/) { 855 $v = $self->get_text($1); 856 $return{$title} = $v; 857 } 858 } 859 } 860 } 861 return \%return; 862} 863 864=head2 revert($pagename, $revid[,$summary]) 865 866Reverts the specified page to $revid, with an edit summary of $summary. A default edit summary will be used if $summary is omitted. 867 868 my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard"); 869 print "Reverting to $revid\n" if defined($revid); 870 $bot->revert('User:Mike.lifeguard', $revid, 'rvv'); 871 872 873=cut 874 875sub revert { 876 my $self = shift; 877 my $pagename = shift; 878 my $revid = shift; 879 my $summary = shift || "Reverting to old revision $revid"; 880 881 my $text = $self->get_text($pagename, $revid); 882 my $res = $self->edit($pagename, $text, $summary); 883 return $res; 884} 885 886=head2 undo($pagename, $revid[,$summary[,$after]]) 887 888Reverts the specified $revid, with an edit summary of $summary, using the undo function. To undo all revisions from $revid up to but not including this one, set $after to another revid. If not set, just undo the one revision ($revid). 889 890=cut 891 892sub undo { 893 my $self = shift; 894 my $page = shift; 895 my $revid = shift; 896 my $summary = shift || "Reverting revision #$revid"; 897 my $after = shift; 898 $summary = "Reverting edits between #$revid & #$after" if defined($after); # Is that clear? Correct? 899 900 my ($edittoken, $basetimestamp, $starttimestamp) = $self->_get_edittoken($page); 901 my $hash = { 902 action => 'edit', 903 title => $page, 904 undo => $revid, 905 undoafter => $after, 906 summary => $summary, 907 token => $edittoken, 908 starttimestamp => $starttimestamp, 909 basetimestamp => $basetimestamp, 910 }; 911 912 my $res = $self->{api}->api($hash); 913 return $self->_handle_api_error() unless $res; 914 return $res; 915} 916 917=head2 get_last($page, $user) 918 919Returns the revid of the last revision to $page not made by $user. undef is returned if no result was found, as would be the case if the page is deleted. 920 921 my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard"); 922 if defined($revid) { 923 print "Reverting to $revid\n"; 924 $bot->revert('User:Mike.lifeguard', $revid, 'rvv'); 925 } 926 927=cut 928 929sub get_last { 930 my $self = shift; 931 my $page = shift; 932 my $user = shift; 933 934 my $revertto = 0; 935 936 my $res = $self->{api}->api({ 937 action => 'query', 938 titles => $page, 939 prop => 'revisions', 940 rvlimit => 1, 941 rvprop => 'ids|user', 942 rvexcludeuser => $user, 943 }); 944 return $self->_handle_api_error() unless $res; 945 my ($id, $data) = %{ $res->{query}->{pages} }; 946 my $revid = $data->{'revisions'}[0]->{'revid'}; 947 return $revid; 948} 949 950=head2 update_rc($limit[,$options_hashref]) 951 952Returns an array containing the Recent Changes to the wiki Main namespace. The array structure contains 'title', 'revid', 'old_revid', and 'timestamp'. The $options_hashref is the same as described in the section on linksearch(). 953 954 my @rc = $bot->update_rc(5); 955 foreach my $hashref (@rc) { 956 my $title = $hash->{'title'}; 957 print "$title\n"; 958 } 959 960 # Or, use a callback for incremental processing: 961 my $options = { hook => \&mysub, }; 962 $bot->update_rc($options); 963 sub mysub { 964 my ($res) = @_; 965 foreach my $hashref (@$res) { 966 my $page = $hashref->{'title'}; 967 print "$page\n"; 968 } 969 } 970 971=cut 972 973sub update_rc { 974 my $self = shift; 975 my $limit = shift; 976 my $options = shift; 977 978 my $hash = { 979 action => 'query', 980 list => 'recentchanges', 981 rcnamespace => 0, 982 rclimit => $limit, 983 }; 984 $options->{'max'} = 1 unless $options->{'max'}; 985 986 my $res = $self->{api}->list($hash, $options); 987 return $self->_handle_api_error() unless $res; 988 return 1 if (!ref $res); # Not a ref when using callback 989 my @rc_table; 990 foreach my $hash (@{$res}) { 991 push( 992 @rc_table, 993 { 994 title => $hash->{'title'}, 995 revid => $hash->{'revid'}, 996 old_revid => $hash->{'old_revid'}, 997 timestamp => $hash->{'timestamp'}, 998 }); 999 } 1000 return @rc_table; 1001} 1002 1003=head2 what_links_here($page[,$filter[,$ns[,$options]]]) 1004 1005Returns an array containing a list of all pages linking to $page. The array structure contains 'title' and 'redirect' is defined if the title is a redirect. $filter can be one of: all (default), redirects (list only redirects), nonredirects (list only non-redirects). $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). $options is a hashref as described by MediaWiki::API: Set max to limit the number of queries performed. Set hook to a subroutine reference to use a callback hook for incremental processing. Refer to the section on linksearch() for examples. 1006 1007A typical query: 1008 1009 my @links = $bot->what_links_here("Meta:Sandbox", undef, 1, {hook=>\&mysub}); 1010 sub mysub{ 1011 my ($res) = @_; 1012 foreach my $hash (@$res) { 1013 my $title = $hash->{'title'}; 1014 my $is_redir = $hash->{'redirect'}; 1015 print "Redirect: $title\n" if $is_redir; 1016 print "Page: $title\n" unless $is_redir; 1017 } 1018 } 1019 1020Transclusions are no longer handled by what_links_here() - use list_transcludes() instead. 1021 1022=cut 1023 1024sub what_links_here { 1025 my $self = shift; 1026 my $page = shift; 1027 my $filter = shift; 1028 my $ns = shift; 1029 my $options = shift; 1030 1031 $ns = join('|', @$ns) if (ref $ns eq 'ARRAY'); # Allow array of namespaces 1032 if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) { # Verify $filter 1033 $filter = $1; 1034 } 1035 1036 # http://en.wikipedia.org/w/api.php?action=query&list=backlinks&bltitle=template:tlx 1037 my $hash = { 1038 action => 'query', 1039 list => 'backlinks', 1040 bltitle => $page, 1041 blnamespace => $ns, 1042 }; 1043 $hash->{'blfilterredir'} = $filter if $filter; 1044 $options->{'max'} = 1 unless $options->{'max'}; 1045 1046 my $res = $self->{api}->list($hash, $options); 1047 return $self->_handle_api_error() unless $res; 1048 return 1 if (!ref $res); # When using a callback hook, this won't be a reference 1049 my @links; 1050 foreach my $hashref (@$res) { 1051 my $title = $hashref->{'title'}; 1052 my $redirect = defined($hashref->{'redirect'}); 1053 push @links, { title => $title, redirect => $redirect }; 1054 } 1055 1056 return @links; 1057} 1058 1059=head2 list_transclusions($page[,$filter[,$ns[,$options]]]) 1060 1061Returns an array containing a list of all pages transcluding $page. The array structure contains 'title' and 'redirect' is defined if the title is a redirect. $filter can be one of: all (default), redirects (list only redirects), nonredirects (list only non-redirects). $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). $options is a hashref as described by MediaWiki::API: Set max to limit the number of queries performed. Set hook to a subroutine reference to use a callback hook for incremental processing. Refer to the section on linksearch() or what_links_here() for examples. 1062 1063A typical query: 1064 1065 $bot->list_transclusions("Template:Tlx", undef, 4, {hook => \&mysub}); 1066 sub mysub{ 1067 my ($res) = @_; 1068 foreach my $hash (@$res) { 1069 my $title = $hash->{'title'}; 1070 my $is_redir = $hash->{'redirect'}; 1071 print "Redirect: $title\n" if $is_redir; 1072 print "Page: $title\n" unless $is_redir; 1073 } 1074 } 1075 1076=cut 1077 1078sub list_transclusions { 1079 my $self = shift; 1080 my $page = shift; 1081 my $filter = shift; 1082 my $ns = shift; 1083 my $options = shift; 1084 1085 $ns = join('|', @$ns) if (ref $ns eq 'ARRAY'); 1086 if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) { # Verify $filter 1087 $filter = $1; 1088 } 1089 1090 # http://en.wikipedia.org/w/api.php?action=query&list=embeddedin&eititle=Template:Stub 1091 my $hash = { 1092 action => 'query', 1093 list => 'embeddedin', 1094 eititle => $page, 1095 einamespace => $ns, 1096 }; 1097 $hash->{'eifilterredir'} = $filter if $filter; 1098 $options->{'max'} = 1 unless $options->{'max'}; 1099 1100 my $res = $self->{api}->list($hash, $options); 1101 return $self->_handle_api_error() unless $res; 1102 return 1 if (!ref $res); # When using a callback hook, this won't be a reference 1103 my @links; 1104 foreach my $hashref (@$res) { 1105 my $title = $hashref->{'title'}; 1106 my $redirect = defined($hashref->{'redirect'}); 1107 push @links, { title => $title, redirect => $redirect }; 1108 } 1109 1110 return @links; 1111} 1112 1113=head2 get_pages_in_category($category_name[,$options_hashref]) 1114 1115Returns an array containing the names of all pages in the specified category (include Category: prefix). Does not recurse into sub-categories. 1116 1117 my @pages = $bot->get_pages_in_category("Category:People on stamps of Gabon"); 1118 print "The pages in Category:People on stamps of Gabon are:\n@pages\n"; 1119 1120The options hashref is as described in the section on linksearch(). Use { max => 0 } to get all results. 1121 1122=cut 1123 1124sub get_pages_in_category { 1125 my $self = shift; 1126 my $category = shift; 1127 my $options = shift; 1128 1129 if ($category =~ m/:/) { # It might have a namespace name 1130 my ($cat, $title) = split(/:/, $category, 2); 1131 if ($cat ne 'Category') { # 'Category' is a canonical name for ns14 1132 my $ns_data = $self->_get_ns_data(); 1133 my $cat_ns_name = $ns_data->{'14'}; # ns14 gives us the localized name for 'Category' 1134 if ($cat ne $cat_ns_name) { 1135 $category = "$cat_ns_name:$category"; 1136 } 1137 } 1138 } 1139 else { # Definitely no namespace name, since there's no colon 1140 $category = "Category:$category"; 1141 } 1142 warn "Category to fetch is [[$category]]" if $self->{'debug'} > 1; 1143 1144 my $hash = { 1145 action => 'query', 1146 list => 'categorymembers', 1147 cmtitle => $category, 1148 }; 1149 $options->{'max'} = 1 unless defined($options->{'max'}); 1150 delete($options->{'max'}) if $options->{'max'} == 0; 1151 1152 my $res = $self->{api}->list($hash, $options); 1153 return 1 if (!ref $res); # Not a hashref when using callback 1154 return $self->_handle_api_error() unless $res; 1155 my @pages; 1156 foreach my $hash (@$res) { 1157 my $title = $hash->{'title'}; 1158 push @pages, $title; 1159 } 1160 return @pages; 1161} 1162 1163=head2 get_all_pages_in_category($category_name[,$options_hashref]) 1164 1165Returns an array containing the names of ALL pages in the specified category (include the Category: prefix), including sub-categories. The $options_hashref is the same as described for get_pages_in_category(). 1166 1167=cut 1168 1169{ # Instead of using the state pragma, use a bare block 1170 my %data; 1171 1172 sub get_all_pages_in_category { 1173 my $self = shift; 1174 my $base_category = shift; 1175 my $options = shift; 1176 $options->{'max'} = 0 unless defined($options->{'max'}); 1177 1178 my @first = $self->get_pages_in_category($base_category, $options); 1179 %data = () unless $_[0]; # This is a special flag for internal use. 1180 # It marks a call to this method as being 1181 # internal. Since %data is a fake state variable, 1182 # it needs to be cleared for every *external* 1183 # call, but not cleared when the call is recursive. 1184 1185 my $ns_data = $self->_get_ns_data(); 1186 my $cat_ns_name = $ns_data->{'14'}; 1187 1188 foreach my $page (@first) { 1189 if ($page =~ m/^$cat_ns_name:/) { 1190 if (!exists($data{$page})) { 1191 $data{$page} = ''; 1192 my @pages = $self->get_all_pages_in_category($page, $options, 1); 1193 foreach (@pages) { 1194 $data{$_} = ''; 1195 } 1196 } 1197 else { 1198 $data{$page} = ''; 1199 } 1200 } 1201 else { 1202 $data{$page} = ''; 1203 } 1204 } 1205 return keys %data; 1206 } 1207} # This ends the bare block around get_all_pages_in_category() 1208 1209=head2 linksearch($link[,$ns[,$protocol[,$options]]]) 1210 1211Runs a linksearch on the specified link and returns an array containing anonymous hashes with keys 'url' for the outbound URL, and 'title' for the page the link is on. $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). You can search by $protocol (http is default). The optional $options hashref is fully documented in MediaWiki::API: Set `max` to limit the number of queries performed. Set `hook` to a subroutine reference to use a callback hook for incremental processing. 1212 1213Set max in $options to get more than one query's worth of results: 1214 1215 my $options = { max => 10, }; # I only want some results 1216 my @links = $bot->linksearch("slashdot.org", 1, undef, $options); 1217 foreach my $hash (@links) { 1218 my $url = $hash->{'url'}; 1219 my $page = $hash->{'title'}; 1220 print "$page: $url\n"; 1221 } 1222 1223You can also specify a callback function in $options: 1224 1225 my $options = { hook => \&mysub, }; # I want to do incremental processing 1226 $bot->linksearch("slashdot.org", 1, undef, $options); 1227 sub mysub { 1228 my ($res) = @_; 1229 foreach my $hashref (@$res) { 1230 my $url = $hashref->{'url'}; 1231 my $page = $hashref->{'title'}; 1232 print "$page: $url\n"; 1233 } 1234 } 1235 1236=cut 1237 1238sub linksearch { 1239 my $self = shift; 1240 my $link = shift; 1241 my $ns = shift; 1242 my $prot = shift; 1243 my $options = shift; 1244 1245 $ns = join('|', @$ns) if (ref $ns eq 'ARRAY'); 1246 1247 my $hash = { 1248 action => 'query', 1249 list => 'exturlusage', 1250 euprop => 'url|title', 1251 euquery => $link, 1252 eunamespace => $ns, 1253 euprotocol => $prot, 1254 }; 1255 $options->{'max'} = 1 unless $options->{'max'}; 1256 1257 my $res = $self->{api}->list($hash, $options); 1258 return $self->_handle_api_error() unless $res; 1259 return 1 if (!ref $res); # When using a callback hook, this won't be a reference 1260 my @links; 1261 foreach my $hashref (@$res) { 1262 my $url = $hashref->{'url'}; 1263 my $page = $hashref->{'title'}; 1264 push(@links, { 'url' => $url, 'title' => $page }); 1265 } 1266 return @links; 1267} 1268 1269=head2 purge_page($pagename) 1270 1271Purges the server cache of the specified page. Pass an array reference to purge multiple pages. Returns true on success; false on failure. If you really care, a true return value is the number of pages successfully purged. You could check that it is the same as the number you wanted to purge.- maybe some pages don't exist, or you passed invalid titles, or you aren't allowed to purge the cache: 1272 1273 my @to_purge = ('Main Page', 'A', 'B', 'C', 'Very unlikely to exist'); 1274 my $size = scalar @to_purge; 1275 1276 print "all-at-once:\n"; 1277 my $success = $bot->purge_page(\@to_purge); 1278 1279 if ($success == $size) { 1280 print "@to_purge: OK ($success/$size)\n"; 1281 } 1282 else { 1283 my $missed = @to_purge - $success; 1284 print "We couldn't purge $missed pages (list was: " 1285 . join(', ', @to_purge) 1286 . ")\n"; 1287 } 1288 1289 # OR 1290 print "\n\none-at-a-time:\n"; 1291 foreach my $page (@to_purge) { 1292 my $ok = $bot->purge_page($page); 1293 print "$page: $ok\n"; 1294 } 1295 1296=cut 1297 1298sub purge_page { 1299 my $self = shift; 1300 my $page = shift; 1301 1302 my $hash; 1303 if (ref $page eq 'ARRAY') { # If it is an array reference... 1304 $hash = { 1305 action => 'purge', 1306 titles => join('|', @$page), # dereference it and purge all those titles 1307 }; 1308 } 1309 else { # Just one page 1310 $hash = { 1311 action => 'purge', 1312 titles => $page, 1313 }; 1314 } 1315 1316 my $res = $self->{api}->api($hash); 1317 return $self->_handle_api_error() unless $res; 1318 my $success = 0; 1319 foreach my $hashref (@{ $res->{'purge'} }) { 1320 $success++ if exists $hashref->{'purged'}; 1321 } 1322 return $success; 1323} 1324 1325=head2 get_namespace_names() 1326 1327get_namespace_names returns a hash linking the namespace id, such as 1, to its named equivalent, such as "Talk". 1328 1329=cut 1330 1331sub get_namespace_names { 1332 my $self = shift; 1333 my %return; 1334 my $res = $self->{api}->api({ 1335 action => 'query', 1336 meta => 'siteinfo', 1337 siprop => 'namespaces' 1338 }); 1339 return $self->_handle_api_error() unless $res; 1340 foreach my $id (keys %{ $res->{query}->{namespaces} }) { 1341 $return{$id} = $res->{query}->{namespaces}->{$id}->{'*'}; 1342 } 1343 if ($return{1} or $_[0] > 1) { 1344 return %return; 1345 } 1346 else { 1347 return $self->get_namespace_names($_[0] + 1); 1348 } 1349} 1350 1351=head2 image_usage($image[,$ns[,$filter,[$options]]]) 1352 1353Gets a list of pages which include a certain image. Additional parameters are the namespace number to fetch results from (or an arrayref of multiple namespace numbers); $filter is 'all', 'redirect' (to return only redirects), or 'nonredirects' (to return no redirects). $options is a hashref as described in the section for linksearch(). 1354 1355 my @pages = $bot->image_usage("File:Albert Einstein Head.jpg"); 1356 1357or, make use of the options hashref to do incremental processing: 1358 1359 $bot->image_usage("File:Albert Einstein Head.jpg", undef, undef, {hook=>\&mysub, max=>5}); 1360 sub mysub { 1361 my $res = shift; 1362 foreach my $page (@$res) { 1363 my $title = $page->{'title'}; 1364 print "$title\n"; 1365 } 1366 } 1367 1368=cut 1369 1370sub image_usage { 1371 my $self = shift; 1372 my $image = shift; 1373 my $ns = shift; 1374 my $filter = shift; 1375 my $options = shift; 1376 1377 if ($image !~ m/^File:|Image:/) { 1378 my $ns_data = $self->_get_ns_data(); 1379 my $image_ns_name = $ns_data->{'6'}; 1380 if ($image !~ m/^\Q$image_ns_name\E:/) { 1381 $image = "$image_ns_name:$image"; 1382 } 1383 } 1384 1385 $options->{'max'} = 1 unless defined($options->{'max'}); 1386 delete($options->{'max'}) if $options->{'max'} == 0; 1387 1388 $ns = join('|', @$ns) if (ref $ns eq 'ARRAY'); 1389 1390 my $hash = { 1391 action => 'query', 1392 list => 'imageusage', 1393 iutitle => $image, 1394 iunamespace => $ns, 1395 }; 1396 if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) { 1397 $hash->{'iufilterredir'} = $1; 1398 } 1399 my $res = $self->{api}->list($hash, $options); 1400 return $self->_handle_api_error() unless $res; 1401 return 1 if (!ref $res); # When using a callback hook, this won't be a reference 1402 my @pages; 1403 foreach my $hashref (@$res) { 1404 my $title = $hashref->{'title'}; 1405 push(@pages, $title); 1406 } 1407 1408 return @pages; 1409} 1410 1411=head2 links_to_image($image) 1412 1413A backward-compatible call to image_usage(). You can provide only the image name. 1414 1415=cut 1416 1417sub links_to_image { 1418 my $self = shift; 1419 return $self->image_usage($_[0]); 1420} 1421 1422=head2 is_blocked($user) 1423 1424Checks if a user is currently blocked. 1425 1426=cut 1427 1428sub is_blocked { 1429 my $self = shift; 1430 my $user = shift; 1431 1432 # http://en.wikipedia.org/w/api.php?action=query&meta=blocks&bkusers=$user&bklimit=1&bkprop=id 1433 my $hash = { 1434 action => 'query', 1435 list => 'blocks', 1436 bkusers => $user, 1437 bklimit => 1, 1438 bkprop => 'id', 1439 }; 1440 my $res = $self->{api}->api($hash); 1441 return $self->_handle_api_error() unless $res; 1442 1443 my $number = scalar @{ $res->{query}->{"blocks"} }; # The number of blocks returned 1444 if ($number == 1) { 1445 return 1; 1446 } 1447 elsif ($number == 0) { 1448 return 0; 1449 } 1450 else { 1451 return; # UNPOSSIBLE! 1452 } 1453} 1454 1455=head2 test_blocked($user) 1456 1457Retained for backwards compatibility. Use is_blocked($user) for clarity. 1458 1459=cut 1460 1461sub test_blocked { # For backwards-compatibility 1462 return (is_blocked(@_)); 1463} 1464 1465=head2 test_image_exists($page) 1466 1467Checks if an image exists at $page. 0 means no, 1 means yes, local, 2 1468means on commons, 3 means doesn't exist but there is text on the page. 1469If you pass in an arrayref of images, you'll get out an arrayref of 1470results. 1471 1472 my $exists = $bot->test_image_exists('File:Albert Einstein Head.jpg'); 1473 if ($exists == 0) { 1474 print "Doesn't exist\n"; 1475 } 1476 elsif ($exists == 1) { 1477 print "Exists locally\n"; 1478 } 1479 elsif ($exists == 2) { 1480 print "Exists on Commons\n"; 1481 } 1482 1483=cut 1484 1485sub test_image_exists { 1486 my $self = shift; 1487 my $image = shift; 1488 1489 my $multi = 0; 1490 if (ref $image eq 'ARRAY') { 1491 $image = join('|', @$image); 1492 $multi = 1; # so we know whether to return a hash or a single scalar 1493 } 1494 1495 my $res = $self->{api}->api({ 1496 action => 'query', 1497 titles => $image, 1498 iilimit => 1, 1499 prop => 'imageinfo' 1500 }); 1501 return $self->_handle_api_error() unless $res; 1502 1503 my @return; 1504 # use Data::Dumper; print STDERR Dumper($res) and die; 1505 foreach my $id (keys %{ $res->{query}->{pages} }) { 1506 my $title = $res->{query}->{pages}->{$id}->{title}; 1507 if ($res->{query}->{pages}->{$id}->{imagerepository} eq 'shared') { 1508 if ($multi) { 1509 unshift @return, 2; 1510 } 1511 else { 1512 return 2; 1513 } 1514 } 1515 elsif (exists($res->{query}->{pages}->{$id}->{missing})) { 1516 if ($multi) { 1517 unshift @return, 0; 1518 } 1519 else { 1520 return 0; 1521 } 1522 } 1523 elsif ($res->{query}->{pages}->{$id}->{imagerepository} eq '') { 1524 if ($multi) { 1525 unshift @return, 3; 1526 } 1527 else { 1528 return 3; 1529 } 1530 } 1531 elsif ($res->{query}->{pages}->{$id}->{imagerepository} eq 'local') { 1532 if ($multi) { 1533 unshift @return, 1; 1534 } 1535 else { 1536 return 1; 1537 } 1538 } 1539 } 1540 1541 # use Data::Dumper; print STDERR Dumper(\@return) and die; 1542 return \@return; 1543} 1544 1545=head2 get_pages_in_namespace($namespace_id, $page_limit) 1546 1547Returns an array containing the names of all pages in the specified namespace. The $namespace_id must be a number, not a namespace name. Setting $page_limit is optional. If $page_limit is over 500, it will be rounded up to the next multiple of 500. 1548 1549=cut 1550 1551sub get_pages_in_namespace { 1552 my $self = shift; 1553 my $namespace = shift; 1554 my $limit = shift || 500; 1555 my $options = shift; 1556 $limit = 5000 if $self->{'highlimits'}; 1557 1558 my $hash = { 1559 action => 'query', 1560 list => 'allpages', 1561 apnamespace => $namespace, 1562 aplimit => $limit, 1563 }; 1564 $options->{'max'} = 1 unless $options->{'max'}; 1565 1566 my $res = $self->{api}->list($hash, $options); 1567 return $self->_handle_api_error() unless $res; 1568 return 1 if (!ref $res); # Not a ref when using callback 1569 my @return; 1570 foreach (@{$res}) { 1571 push @return, $_->{title}; 1572 } 1573…
Large files files are truncated, but you can click here to view the full file