PageRenderTime 67ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/tags/3.2/lib/MediaWiki/Bot.pm

http://perlwikipedia.googlecode.com/
Perl | 2157 lines | 1832 code | 304 blank | 21 comment | 159 complexity | 3130e4109f8f2d2523d26e9963bc793e MD5 | raw file
Possible License(s): GPL-3.0
  1. package MediaWiki::Bot;
  2. # ABSTRACT: a MediaWiki bot framework written in Perl
  3. use strict;
  4. use warnings;
  5. use WWW::Mechanize;
  6. use HTML::Entities;
  7. use URI::Escape;
  8. use XML::Simple;
  9. use Carp;
  10. use URI::Escape qw(uri_escape_utf8);
  11. use Digest::MD5 qw(md5_hex);
  12. use Encode qw(encode_utf8);
  13. use MediaWiki::API;
  14. use Module::Pluggable search_path => [qw(MediaWiki::Bot::Plugin)], 'require' => 1;
  15. foreach my $plugin (__PACKAGE__->plugins) {
  16. #print "Found plugin $plugin\n";
  17. $plugin->import();
  18. }
  19. our $VERSION = '3.2.0';
  20. =head1 SYNOPSIS
  21. use MediaWiki::Bot;
  22. my $bot = MediaWiki::Bot->new({
  23. useragent => 'MediaWiki::Bot/3.1.6 (User:Mike.lifeguard)',
  24. assert => 'bot',
  25. protocol => 'https',
  26. host => 'secure.wikimedia.org',
  27. path => 'wikipedia/meta/w',
  28. login_data => { username => "Mike's bot account", password => "password" },
  29. });
  30. my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard");
  31. print "Reverting to $revid\n" if defined($revid);
  32. $bot->revert('User:Mike.lifeguard', $revid, 'rvv');
  33. =head1 DESCRIPTION
  34. MediaWiki::Bot is a framework that can be used to write bots which interface
  35. with the MediaWiki API (L<http://en.wikipedia.org/w/api.php>).
  36. =head1 METHODS
  37. =head2 new($options_hashref)
  38. Calling MediaWiki::Bot->new() will create a new MediaWiki::Bot object.
  39. =over 4
  40. =item *
  41. agent sets a custom useragent
  42. =item *
  43. assert sets a parameter for the AssertEdit extension (commonly 'bot'). Refer to L<http://mediawiki.org/wiki/Extension:AssertEdit>.
  44. =item *
  45. operator allows the bot to send you a message when it fails an assert, and will be integrated into the default useragent (which may not be used if you set agent yourself). The message will tell you that $useragent is logged out, so use a descriptive one if you set it.
  46. =item *
  47. maxlag allows you to set the maxlag parameter (default is the recommended 5s). Please refer to the MediaWiki documentation prior to changing this from the default.
  48. =item *
  49. protocol allows you to specify 'http' or 'https' (default is 'http'). This is commonly used with the domain and path settings below.
  50. =item *
  51. host sets the domain name of the wiki to connect to.
  52. =item *
  53. path sets the path to api.php (with no leading or trailing slash).
  54. =item *
  55. login_data is a hashref of credentials to pass to login(). See that section for a description.
  56. =item *
  57. debug is whether to provide debug output. 1 provides only error messages; 2 provides further detail on internal operations.
  58. =back
  59. For example:
  60. my $bot = MediaWiki::Bot->new({
  61. useragent => 'MediaWiki::Bot/3.1.6 (User:Mike.lifeguard)',
  62. assert => 'bot',
  63. protocol => 'https',
  64. host => 'secure.wikimedia.org',
  65. path => 'wikipedia/meta/w',
  66. login_data => { username => "Mike's bot account", password => "password" },
  67. });
  68. For backward compatibility, you can specify up to three parameters:
  69. my $bot = MediaWiki::Bot->new('MediaWiki::Bot 2.3.1 (User:Mike.lifeguard)', $assert, $operator);
  70. This deprecated form will never do auto-login or autoconfiguration.
  71. =cut
  72. sub new {
  73. my $package = shift;
  74. my $agent;
  75. my $assert;
  76. my $operator;
  77. my $maxlag;
  78. my $protocol;
  79. my $host;
  80. my $path;
  81. my $login_data;
  82. my $debug;
  83. if (ref $_[0] eq 'HASH') {
  84. $agent = $_[0]->{'agent'};
  85. $assert = $_[0]->{'assert'};
  86. $operator = $_[0]->{'operator'};
  87. $maxlag = $_[0]->{'maxlag'};
  88. $protocol = $_[0]->{'protocol'};
  89. $host = $_[0]->{'host'};
  90. $path = $_[0]->{'path'};
  91. $login_data = $_[0]->{'login_data'};
  92. $debug = $_[0]->{'debug'};
  93. }
  94. else {
  95. $agent = shift;
  96. $assert = shift;
  97. $operator = shift;
  98. $maxlag = shift;
  99. $protocol = shift;
  100. $host = shift;
  101. $path = shift;
  102. $debug = shift;
  103. }
  104. $assert =~ s/[&?]assert=// if $assert; # Strip out param part, leaving just the value
  105. $operator =~ s/^User://i if $operator;
  106. # Set defaults
  107. unless ($agent) {
  108. $agent = "MediaWiki::Bot/$VERSION";
  109. $agent .= " (User:$operator)" if $operator;
  110. }
  111. my $self = bless({}, $package);
  112. $self->{mech} = WWW::Mechanize->new(
  113. cookie_jar => {},
  114. onerror => \&Carp::carp,
  115. stack_depth => 1
  116. );
  117. $self->{mech}->agent($agent);
  118. $self->{errstr} = '';
  119. $self->{assert} = $assert;
  120. $self->{operator} = $operator;
  121. $self->{'debug'} = $debug || 0;
  122. $self->{api} = MediaWiki::API->new();
  123. $self->{api}->{ua}->agent($agent);
  124. # Set wiki (handles setting $self->{host} etc)
  125. $self->set_wiki({
  126. protocol => $protocol,
  127. host => $host,
  128. path => $path,
  129. });
  130. $self->{api}->{config}->{max_lag} = $maxlag || 5;
  131. $self->{api}->{config}->{max_lag_delay} = 1;
  132. $self->{api}->{config}->{retries} = 5;
  133. $self->{api}->{config}->{max_lag_retries} = -1;
  134. $self->{api}->{config}->{retry_delay} = 30;
  135. # Log-in, and maybe autoconfigure
  136. if ($login_data) {
  137. my $success = $self->login($login_data);
  138. if ($success) {
  139. return $self;
  140. }
  141. else {
  142. carp "Couldn't log in with supplied settings" if $self->{'debug'};
  143. return;
  144. }
  145. }
  146. return $self;
  147. }
  148. =head2 set_wiki($options)
  149. Set what wiki to use. Host is the domain name; path is the path before api.php (usually 'w'); protocol is either 'http' or 'https'. For example:
  150. $bot->set_wiki(
  151. protocol => 'https',
  152. host => 'secure.wikimedia.org',
  153. path => 'wikipedia/meta/w',
  154. );
  155. For backward compatibility, you can specify up to two parameters in this deprecated form:
  156. $bot->set_wiki($host, $path);
  157. If you don't set any parameter, it's previous value is used. If it has never been set, the default settings are 'http', 'en.wikipedia.org' and 'w'.
  158. =cut
  159. sub set_wiki {
  160. my $self = shift;
  161. my $host;
  162. my $path;
  163. my $protocol;
  164. if (ref $_[0] eq 'HASH') {
  165. $host = $_[0]->{'host'};
  166. $path = $_[0]->{'path'};
  167. $protocol = $_[0]->{'protocol'};
  168. }
  169. else {
  170. $host = shift;
  171. $path = shift;
  172. }
  173. # Set defaults
  174. $protocol = $self->{'protocol'} || 'http' unless defined($protocol);
  175. $host = $self->{'host'} || 'en.wikipedia.org' unless defined($host);
  176. $path = $self->{'path'} || 'w' unless defined($path);
  177. # Clean up the parts we will build a URL with
  178. $protocol =~ s,://$,,;
  179. if ($host =~ m,^(http|https)(://)?, && !$protocol) {
  180. $protocol = $1;
  181. }
  182. $host =~ s,^https?://,,;
  183. $host =~ s,/$,,;
  184. $path =~ s,/$,,;
  185. # Invalidate wiki-specific cached data
  186. if ( ((defined($self->{'host'})) and ($self->{'host'} ne $host))
  187. or ((defined($self->{'path'})) and ($self->{'path'} ne $path))
  188. or ((defined($self->{'protocol'})) and ($self->{'protocol'} ne $protocol))
  189. ) {
  190. delete $self->{'ns_data'} if $self->{'ns_data'};
  191. }
  192. $self->{protocol} = $protocol;
  193. $self->{host} = $host;
  194. $self->{path} = $path;
  195. $self->{api}->{config}->{api_url} = $path
  196. ? "$protocol://$host/$path/api.php"
  197. : "$protocol://$host/api.php"; # $path is '', so don't use http://domain.com//api.php
  198. warn "Wiki set to " . $self->{api}->{config}{api_url} . "\n" if $self->{'debug'} > 1;
  199. return 1;
  200. }
  201. =head2 login($login_hashref)
  202. Logs the use $username in, optionally using $password. First, an attempt will be made to use cookies to log in. If this fails, an attempt will be made to use the password provided to log in, if any. If the login was successful, returns true; false otherwise.
  203. $bot->login({
  204. username => $username,
  205. password => $password,
  206. }) or die "Login failed";
  207. Once logged in, attempt to do some simple auto-configuration. At present, this consists of:
  208. =over 4
  209. =item *
  210. Warning if the account doesn't have the bot flag, and isn't a sysop account.
  211. =item *
  212. Setting the use of apihighlimits if the account has that userright.
  213. =item *
  214. Setting an appropriate default assert.
  215. =back
  216. You can skip this autoconfiguration by passing C<autoconfig =E<gt> 0>
  217. =head3 Single User Login
  218. On WMF wikis, C<do_sul> specifies whether to log in on all projects. The default is false. But even when false, you still get a CentralAuth cookie for, and are thus logged in on, all languages of a given domain (*.wikipedia.org, for example). When set, a login is done on each WMF domain so you are logged in on all ~800 content wikis. Since C<*.wikimedia.org> is not possible, we explicitly include meta, commons, incubator, and wikispecies. When C<do_sul> is set, the return is the number of domains that login was successful for. This allows callers to do the following:
  219. $bot->login({
  220. username => $username,
  221. password => $password,
  222. do_sul => 1,
  223. }) or die "SUL failed";
  224. For backward compatibility, you can call this as
  225. $bot->login($username, $password);
  226. This deprecated form will never do autoconfiguration or SUL login.
  227. If you need to supply basic auth credentials, pass a hashref of data as described by L<LWP::UserAgent>:
  228. $bot->login({
  229. username => $username,
  230. password => $password,
  231. basic_auth => { netloc => "private.wiki.com:80",
  232. realm => "Authentication Realm",
  233. uname => "Basic auth username",
  234. pass => "password",
  235. }
  236. }) or die "Couldn't log in";
  237. =cut
  238. sub login {
  239. my $self = shift;
  240. my $username;
  241. my $password;
  242. my $lgdomain;
  243. my $autoconfig;
  244. my $basic_auth;
  245. my $do_sul;
  246. if (ref $_[0] eq 'HASH') {
  247. $username = $_[0]->{'username'};
  248. $password = $_[0]->{'password'};
  249. $autoconfig = defined($_[0]->{'autoconfig'}) ? $_[0]->{'autoconfig'} : 1;
  250. $basic_auth = $_[0]->{'basic_auth'};
  251. $do_sul = $_[0]->{'do_sul'} || 0;
  252. $lgdomain = $_[0]->{'lgdomain'};
  253. }
  254. else {
  255. $username = shift;
  256. $password = shift;
  257. $autoconfig = 0;
  258. $do_sul = 0;
  259. }
  260. $self->{'username'} = $username; # Remember who we are
  261. # Handle basic auth first, if needed
  262. if ($basic_auth) {
  263. warn "Applying basic auth credentials" if $self->{'debug'} > 1;
  264. $self->{api}->{ua}->credentials(
  265. $basic_auth->{'netloc'},
  266. $basic_auth->{'realm'},
  267. $basic_auth->{'uname'},
  268. $basic_auth->{'pass'}
  269. );
  270. }
  271. $do_sul = 0 if (
  272. ($self->{'protocol'} eq 'https') and
  273. ($self->{'host'} eq 'secure.wikimedia.org') );
  274. if ($do_sul) {
  275. my $debug = $self->{'debug'}; # Remember this for later
  276. my $host = $self->{'host'};
  277. my $path = $self->{'path'};
  278. my $protocol = $self->{'protocol'};
  279. $self->{'debug'} = 0; # Turn off debugging for these internal calls
  280. my @logins; # Keep track of our successes
  281. my @WMF_projects = qw(
  282. en.wikipedia.org
  283. en.wiktionary.org
  284. en.wikibooks.org
  285. en.wikinews.org
  286. en.wikiquote.org
  287. en.wikisource.org
  288. en.wikiversity.org
  289. meta.wikimedia.org
  290. commons.wikimedia.org
  291. species.wikimedia.org
  292. incubator.wikimedia.org
  293. );
  294. SUL: foreach my $project (@WMF_projects) {
  295. print STDERR "Logging in on $project..." if $debug > 1;
  296. $self->set_wiki({
  297. host => $project,
  298. });
  299. my $success = $self->login({
  300. username => $username,
  301. password => $password,
  302. lgdomain => $lgdomain,
  303. do_sul => 0,
  304. autoconfig => 0,
  305. });
  306. warn ($success ? " OK\n" : " FAILED\n") if $debug > 1;
  307. push(@logins, $success);
  308. }
  309. $self->set_wiki({ # Switch back to original wiki
  310. protocol => $protocol,
  311. host => $host,
  312. path => $path,
  313. });
  314. my $sum = 0;
  315. $sum += $_ for @logins;
  316. my $total = scalar @WMF_projects;
  317. warn "$sum/$total logins succeeded\n" if $debug > 1;
  318. $self->{'debug'} = $debug; # Reset debug to it's old value
  319. return $sum;
  320. }
  321. my $cookies = ".mediawiki-bot-$username-cookies";
  322. if (-r $cookies) {
  323. $self->{mech}->{cookie_jar}->load($cookies);
  324. $self->{mech}->{cookie_jar}->{ignore_discard} = 1;
  325. $self->{api}->{ua}->{cookie_jar}->load($cookies);
  326. $self->{api}->{ua}->{cookie_jar}->{ignore_discard} = 1;
  327. my $logged_in = $self->_is_loggedin();
  328. if ($logged_in) {
  329. $self->_do_autoconfig() if $autoconfig;
  330. warn "Logged in successfully with cookies" if $self->{'debug'} > 1;
  331. return 1; # If we're already logged in, nothing more is needed
  332. }
  333. }
  334. unless ($password) {
  335. carp "No login cookies available, and no password to continue with authentication" if $self->{'debug'};
  336. return 0;
  337. }
  338. my $res = $self->{api}->api({
  339. action => 'login',
  340. lgname => $username,
  341. lgpassword => $password,
  342. lgdomain => $lgdomain
  343. }) or return $self->_handle_api_error();
  344. $self->{api}->{ua}->{cookie_jar}->extract_cookies($self->{api}->{response});
  345. $self->{api}->{ua}->{cookie_jar}->save($cookies) if (-w($cookies) or -w('.'));
  346. if ($res->{'login'}->{'result'} eq 'NeedToken') {
  347. my $token = $res->{'login'}->{'token'};
  348. $res = $self->{api}->api({
  349. action => 'login',
  350. lgname => $username,
  351. lgpassword => $password,
  352. lgdomain => $lgdomain,
  353. lgtoken => $token,
  354. }) or return $self->_handle_api_error();
  355. $self->{api}->{ua}->{cookie_jar}->extract_cookies($self->{api}->{response});
  356. $self->{api}->{ua}->{cookie_jar}->save($cookies) if (-w($cookies) or -w('.'));
  357. }
  358. if ($res->{'login'}->{'result'} eq 'Success') {
  359. if ($res->{'login'}->{'lgusername'} eq $self->{'username'}) {
  360. $self->_do_autoconfig() if $autoconfig;
  361. warn "Logged in successfully with password" if $self->{'debug'} > 1;
  362. }
  363. }
  364. return (
  365. (defined($res->{'login'}->{'lgusername'})) and
  366. (defined($res->{'login'}->{'result'})) and
  367. ($res->{'login'}->{'lgusername'} eq $self->{'username'}) and
  368. ($res->{'login'}->{'result'} eq 'Success')
  369. );
  370. }
  371. =head2 set_highlimits($flag)
  372. Tells MediaWiki::Bot to start/stop using APIHighLimits for certain queries.
  373. $bot->set_highlimits(1);
  374. =cut
  375. sub set_highlimits {
  376. my $self = shift;
  377. my $highlimits = defined($_[0]) ? shift : 1;
  378. $self->{highlimits} = $highlimits;
  379. return 1;
  380. }
  381. =head2 logout()
  382. The logout procedure deletes the login tokens and other browser cookies.
  383. $bot->logout();
  384. =cut
  385. sub logout {
  386. my $self = shift;
  387. my $hash = {
  388. action => 'logout',
  389. };
  390. $self->{api}->api($hash);
  391. return 1;
  392. }
  393. =head2 edit($options_hashref)
  394. Puts text on a page. If provided, use a specified edit summary, mark the edit as minor, as a non-bot edit, or add an assertion. Set section to edit a single section instead of the whole page. An MD5 hash is sent to guard against data corruption while in transit.
  395. my $text = $bot->get_text('My page');
  396. $text .= "\n\n* More text\n";
  397. $bot->edit({
  398. page => 'My page',
  399. text => $text,
  400. summary => 'Adding new content',
  401. section => 'new',
  402. });
  403. You can also call this using the deprecated form:
  404. $bot->edit($page, $text, $summary, $is_minor, $assert, $markasbot);
  405. =cut
  406. sub edit {
  407. my $self = shift;
  408. my $page;
  409. my $text;
  410. my $summary;
  411. my $is_minor;
  412. my $assert;
  413. my $markasbot;
  414. my $section;
  415. if (ref $_[0] eq 'HASH') {
  416. $page = $_[0]->{'page'};
  417. $text = $_[0]->{'text'};
  418. $summary = $_[0]->{'summary'};
  419. $is_minor = $_[0]->{'is_minor'};
  420. $assert = $_[0]->{'assert'};
  421. $markasbot = $_[0]->{'markasbot'};
  422. $section = $_[0]->{'section'};
  423. }
  424. else {
  425. $page = shift;
  426. $text = shift;
  427. $summary = shift;
  428. $is_minor = shift;
  429. $assert = shift;
  430. $markasbot = shift;
  431. $section = shift;
  432. }
  433. # Set defaults
  434. $summary = 'BOT: Changing page text' unless $summary;
  435. if ($assert) {
  436. $assert =~ s/^[&?]assert=//;
  437. }
  438. else {
  439. $assert = $self->{'assert'};
  440. }
  441. $is_minor = 1 unless defined($is_minor);
  442. $markasbot = 1 unless defined($markasbot);
  443. my ($edittoken, $lastedit, $tokentime) = $self->_get_edittoken($page);
  444. return $self->_handle_api_error() unless $edittoken;
  445. my $hash = {
  446. action => 'edit',
  447. title => $page,
  448. token => $edittoken,
  449. text => $text,
  450. md5 => md5_hex(encode_utf8($text)), # Guard against data corruption
  451. # Pass only bytes to md5_hex()
  452. summary => $summary,
  453. basetimestamp => $lastedit, # Guard against edit conflicts
  454. starttimestamp => $tokentime, # Guard against the page being deleted/moved
  455. bot => $markasbot,
  456. assert => $assert,
  457. minor => $is_minor,
  458. };
  459. $hash->{'section'} = $section if defined($section);
  460. my $res = $self->{api}->api($hash); # Check if MediaWiki::API::edit() is good enough
  461. return $self->_handle_api_error() unless $res;
  462. if ($res->{edit}->{result} && $res->{edit}->{result} eq 'Failure') {
  463. if ($self->{mech}->{agent}) {
  464. carp 'Assertion failed as ' . $self->{mech}->{agent} if $self->{'debug'};
  465. if ($self->{operator}) {
  466. my $optalk = $self->get_text('User talk:' . $self->{operator});
  467. if (defined($optalk)) {
  468. print "Sending warning!\n";
  469. $self->edit(
  470. page => "User talk:$self->{operator}",
  471. text => $optalk
  472. . "\n\n==Error with "
  473. . $self->{mech}->{agent} . "==\n"
  474. . $self->{mech}->{agent}
  475. . ' needs to be logged in! ~~~~',
  476. summary => 'bot issue',
  477. is_minor => 0,
  478. assert => ''
  479. );
  480. }
  481. }
  482. return;
  483. }
  484. else {
  485. carp 'Assertion failed' if $self->{'debug'};
  486. }
  487. }
  488. return $res;
  489. }
  490. =head2 move($from, $to, $reason, $options_hashref)
  491. This moves a page from $from to $to. If you wish to specify more options (like whether to suppress creation of a redirect), use $options_hashref.
  492. =over 4
  493. =item *
  494. movetalk specifies whether to attempt to the talk page.
  495. =item *
  496. noredirect specifies whether to suppress creation of a redirect.
  497. =item *
  498. movesubpages specifies whether to move subpages, if applicable.
  499. =item *
  500. watch and unwatch add or remove the page and the redirect from your watchlist.
  501. =item *
  502. ignorewarnings ignores warnings.
  503. =back
  504. my @pages = ("Humor", "Rumor");
  505. foreach my $page (@pages) {
  506. my $to = $page;
  507. $to =~ s/or$/our/;
  508. $bot->move($page, $to, "silly 'merricans");
  509. }
  510. =cut
  511. sub move {
  512. my $self = shift;
  513. my $from = shift;
  514. my $to = shift;
  515. my $reason = shift;
  516. my $opts = shift;
  517. my $hash = {
  518. action => 'move',
  519. from => $from,
  520. to => $to,
  521. reason => $reason,
  522. };
  523. $hash->{'movetalk'} = $opts->{'movetalk'} if defined($opts->{'movetalk'});
  524. $hash->{'noredirect'} = $opts->{'noredirect'} if defined($opts->{'noredirect'});
  525. my $res = $self->{api}->edit($hash);
  526. return $self->_handle_api_error() unless $res;
  527. return $res; # should we return something more useful?
  528. }
  529. =head2 get_history($pagename[,$limit])
  530. Returns an array containing the history of the specified page, with $limit number of revisions. The array structure contains 'revid', 'user', 'comment', 'timestamp_date', and 'timestamp_time'.
  531. =cut
  532. sub get_history {
  533. my $self = shift;
  534. my $pagename = shift;
  535. my $limit = shift || 5;
  536. my $rvstartid = shift || '';
  537. my $direction = shift;
  538. my @return;
  539. my @revisions;
  540. my $hash = {
  541. action => 'query',
  542. prop => 'revisions',
  543. titles => $pagename,
  544. rvprop => 'ids|timestamp|user|comment',
  545. rvlimit => $limit
  546. };
  547. $hash->{rvstartid} = $rvstartid if ($rvstartid);
  548. $hash->{direction} = $direction if ($direction);
  549. my $res = $self->{api}->api($hash);
  550. return $self->_handle_api_error() unless $res;
  551. my ($id) = keys %{ $res->{query}->{pages} };
  552. my $array = $res->{query}->{pages}->{$id}->{revisions};
  553. foreach my $hash (@{$array}) {
  554. my $revid = $hash->{revid};
  555. my $user = $hash->{user};
  556. my ($timestamp_date, $timestamp_time) = split(/T/, $hash->{timestamp});
  557. $timestamp_time =~ s/Z$//;
  558. my $comment = $hash->{comment};
  559. push(
  560. @return,
  561. {
  562. revid => $revid,
  563. user => $user,
  564. timestamp_date => $timestamp_date,
  565. timestamp_time => $timestamp_time,
  566. comment => $comment,
  567. });
  568. }
  569. return @return;
  570. }
  571. =head2 get_text($pagename,[$revid,$section_number])
  572. Returns an the wikitext of the specified page. If $revid is defined, it will return the text of that revision; if $section_number is defined, it will return the text of that section. A blank page will return wikitext of "" (which evaluates to false in Perl, but is defined); a nonexistent page will return undef (which also evaluates to false in Perl, but is obviously undefined). You can distinguish between blank and nonexistent by using defined():
  573. my $wikitext = $bot->get_text('Page title');
  574. print "Wikitext: $wikitext\n" if defined $wikitext;
  575. =cut
  576. sub get_text {
  577. my $self = shift;
  578. my $pagename = shift;
  579. my $revid = shift;
  580. my $section = shift;
  581. my $hash = {
  582. action => 'query',
  583. titles => $pagename,
  584. prop => 'revisions',
  585. rvprop => 'content',
  586. };
  587. $hash->{rvstartid} = $revid if ($revid);
  588. $hash->{rvsection} = $section if ($section);
  589. my $res = $self->{api}->api($hash);
  590. return $self->_handle_api_error() unless $res;
  591. my ($id, $data) = %{ $res->{query}->{pages} };
  592. if ($id == -1) { # Page doesn't exist
  593. return;
  594. }
  595. else { # Page exists
  596. my $wikitext = $data->{revisions}[0]->{'*'};
  597. return $wikitext;
  598. }
  599. }
  600. =head2 get_id($pagename)
  601. Returns the id of the specified page. Returns undef if page does not exist.
  602. my $pageid = $bot->get_id("Main Page");
  603. croak "Page doesn't exist\n" if !defined($pageid);
  604. =cut
  605. sub get_id {
  606. my $self = shift;
  607. my $pagename = shift;
  608. my $hash = {
  609. action => 'query',
  610. titles => $pagename,
  611. };
  612. my $res = $self->{api}->api($hash);
  613. return $self->_handle_api_error() unless $res;
  614. my ($id, $data) = %{ $res->{query}->{pages} };
  615. if ($id == -1) {
  616. return;
  617. }
  618. else {
  619. return $id;
  620. }
  621. }
  622. =head2 get_pages(\@pages)
  623. Returns the text of the specified pages in a hashref. Content of undef means page does not exist. Also handles redirects or article names that use namespace aliases.
  624. my @pages = ('Page 1', 'Page 2', 'Page 3');
  625. my $thing = $bot->get_pages(\@pages);
  626. foreach my $page (keys %$thing) {
  627. my $text = $thing->{$page};
  628. print "$text\n" if defined($text);
  629. }
  630. =cut
  631. sub get_pages {
  632. my $self = shift;
  633. my @pages = (ref $_[0] eq 'ARRAY') ? @{$_[0]} : @_;
  634. my %return;
  635. my $hash = {
  636. action => 'query',
  637. titles => join('|', @pages),
  638. prop => 'revisions',
  639. rvprop => 'content',
  640. };
  641. my $diff; # Used to track problematic article names
  642. map { $diff->{$_} = 1; } @pages;
  643. my $res = $self->{api}->api($hash);
  644. return $self->_handle_api_error() unless $res;
  645. foreach my $id (keys %{ $res->{query}->{pages} }) {
  646. my $page = $res->{'query'}->{'pages'}->{$id};
  647. if ($diff->{ $page->{'title'} }) {
  648. $diff->{ $page->{'title'} }++;
  649. }
  650. else {
  651. next;
  652. }
  653. if (defined($page->{'missing'})) {
  654. $return{ $page->{'title'} } = undef;
  655. next;
  656. }
  657. if (defined($page->{'revisions'})) {
  658. my $revisions = @{ $page->{'revisions'} }[0]->{'*'};
  659. if (!defined $revisions) {
  660. $return{ $page->{'title'} } = $revisions;
  661. }
  662. elsif (length($revisions) < 150 && $revisions =~ m/\#REDIRECT\s\[\[([^\[\]]+)\]\]/) { # FRAGILE!
  663. my $redirect_to = $1;
  664. $return{ $page->{'title'} } = $self->get_text($redirect_to);
  665. }
  666. else {
  667. $return{ $page->{'title'} } = $revisions;
  668. }
  669. }
  670. }
  671. # Based on api.php?action=query&meta=siteinfo&siprop=namespaces|namespacealiases
  672. # Should be done on an as-needed basis! This is only correct for enwiki (and
  673. # it is probably incomplete anyways, or will be eventually).
  674. my $expand = {
  675. 'WP' => 'Wikipedia',
  676. 'WT' => 'Wikipedia talk',
  677. 'Image' => 'File',
  678. 'Image talk' => 'File talk',
  679. };
  680. # Only for those article names that remained after the first part
  681. # If we're here we are dealing most likely with a WP:CSD type of article name
  682. for my $title (keys %$diff) {
  683. if ($diff->{$title} == 1) {
  684. my @pieces = split(/:/, $title);
  685. if (@pieces > 1) {
  686. $pieces[0] = ($expand->{ $pieces[0] } || $pieces[0]);
  687. my $v = $self->get_text(join ':', @pieces);
  688. warn "Detected article name that needed expanding $title\n" if $self->{'debug'} > 1;
  689. $return{$title} = $v;
  690. if ($v =~ m/\#REDIRECT\s\[\[([^\[\]]+)\]\]/) {
  691. $v = $self->get_text($1);
  692. $return{$title} = $v;
  693. }
  694. }
  695. }
  696. }
  697. return \%return;
  698. }
  699. =head2 revert($pagename, $revid[,$summary])
  700. Reverts the specified page to $revid, with an edit summary of $summary. A default edit summary will be used if $summary is omitted.
  701. my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard");
  702. print "Reverting to $revid\n" if defined($revid);
  703. $bot->revert('User:Mike.lifeguard', $revid, 'rvv');
  704. =cut
  705. sub revert {
  706. my $self = shift;
  707. my $pagename = shift;
  708. my $revid = shift;
  709. my $summary = shift || "Reverting to old revision $revid";
  710. my $text = $self->get_text($pagename, $revid);
  711. my $res = $self->edit($pagename, $text, $summary);
  712. return $res;
  713. }
  714. =head2 undo($pagename, $revid[,$summary[,$after]])
  715. Reverts the specified $revid, with an edit summary of $summary, using the undo function. To undo all revisions from $revid up to but not including this one, set $after to another revid. If not set, just undo the one revision ($revid).
  716. =cut
  717. sub undo {
  718. my $self = shift;
  719. my $page = shift;
  720. my $revid = shift;
  721. my $summary = shift || "Reverting revision #$revid";
  722. my $after = shift;
  723. $summary = "Reverting edits between #$revid & #$after" if defined($after); # Is that clear? Correct?
  724. my ($edittoken, $basetimestamp, $starttimestamp) = $self->_get_edittoken($page);
  725. my $hash = {
  726. action => 'edit',
  727. title => $page,
  728. undo => $revid,
  729. undoafter => $after,
  730. summary => $summary,
  731. token => $edittoken,
  732. starttimestamp => $starttimestamp,
  733. basetimestamp => $basetimestamp,
  734. };
  735. my $res = $self->{api}->api($hash);
  736. return $self->_handle_api_error() unless $res;
  737. return $res;
  738. }
  739. =head2 get_last($page, $user)
  740. Returns the revid of the last revision to $page not made by $user. undef is returned if no result was found, as would be the case if the page is deleted.
  741. my $revid = $bot->get_last("User:Mike.lifeguard/sandbox", "Mike.lifeguard");
  742. if defined($revid) {
  743. print "Reverting to $revid\n";
  744. $bot->revert('User:Mike.lifeguard', $revid, 'rvv');
  745. }
  746. =cut
  747. sub get_last {
  748. my $self = shift;
  749. my $page = shift;
  750. my $user = shift;
  751. my $revertto = 0;
  752. my $res = $self->{api}->api({
  753. action => 'query',
  754. titles => $page,
  755. prop => 'revisions',
  756. rvlimit => 1,
  757. rvprop => 'ids|user',
  758. rvexcludeuser => $user,
  759. });
  760. return $self->_handle_api_error() unless $res;
  761. my ($id, $data) = %{ $res->{query}->{pages} };
  762. my $revid = $data->{'revisions'}[0]->{'revid'};
  763. return $revid;
  764. }
  765. =head2 update_rc($limit[,$options_hashref])
  766. Returns an array containing the Recent Changes to the wiki Main namespace. The array structure contains 'title', 'revid', 'old_revid', and 'timestamp'. The $options_hashref is the same as described in the section on linksearch().
  767. my @rc = $bot->update_rc(5);
  768. foreach my $hashref (@rc) {
  769. my $title = $hash->{'title'};
  770. print "$title\n";
  771. }
  772. # Or, use a callback for incremental processing:
  773. my $options = { hook => \&mysub, };
  774. $bot->update_rc($options);
  775. sub mysub {
  776. my ($res) = @_;
  777. foreach my $hashref (@$res) {
  778. my $page = $hashref->{'title'};
  779. print "$page\n";
  780. }
  781. }
  782. =cut
  783. sub update_rc {
  784. my $self = shift;
  785. my $limit = shift;
  786. my $options = shift;
  787. my $hash = {
  788. action => 'query',
  789. list => 'recentchanges',
  790. rcnamespace => 0,
  791. rclimit => $limit,
  792. };
  793. $options->{'max'} = 1 unless $options->{'max'};
  794. my $res = $self->{api}->list($hash, $options);
  795. return $self->_handle_api_error() unless $res;
  796. return 1 if (!ref $res); # Not a ref when using callback
  797. my @rc_table;
  798. foreach my $hash (@{$res}) {
  799. push(
  800. @rc_table,
  801. {
  802. title => $hash->{'title'},
  803. revid => $hash->{'revid'},
  804. old_revid => $hash->{'old_revid'},
  805. timestamp => $hash->{'timestamp'},
  806. });
  807. }
  808. return @rc_table;
  809. }
  810. =head2 what_links_here($page[,$filter[,$ns[,$options]]])
  811. Returns an array containing a list of all pages linking to $page. The array structure contains 'title' and 'redirect' is defined if the title is a redirect. $filter can be one of: all (default), redirects (list only redirects), nonredirects (list only non-redirects). $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). $options is a hashref as described by MediaWiki::API: Set max to limit the number of queries performed. Set hook to a subroutine reference to use a callback hook for incremental processing. Refer to the section on linksearch() for examples.
  812. A typical query:
  813. my @links = $bot->what_links_here("Meta:Sandbox", undef, 1, {hook=>\&mysub});
  814. sub mysub{
  815. my ($res) = @_;
  816. foreach my $hash (@$res) {
  817. my $title = $hash->{'title'};
  818. my $is_redir = $hash->{'redirect'};
  819. print "Redirect: $title\n" if $is_redir;
  820. print "Page: $title\n" unless $is_redir;
  821. }
  822. }
  823. Transclusions are no longer handled by what_links_here() - use list_transcludes() instead.
  824. =cut
  825. sub what_links_here {
  826. my $self = shift;
  827. my $page = shift;
  828. my $filter = shift;
  829. my $ns = shift;
  830. my $options = shift;
  831. $ns = join('|', @$ns) if (ref $ns eq 'ARRAY'); # Allow array of namespaces
  832. if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) { # Verify $filter
  833. $filter = $1;
  834. }
  835. # http://en.wikipedia.org/w/api.php?action=query&list=backlinks&bltitle=template:tlx
  836. my $hash = {
  837. action => 'query',
  838. list => 'backlinks',
  839. bltitle => $page,
  840. blnamespace => $ns,
  841. };
  842. $hash->{'blfilterredir'} = $filter if $filter;
  843. $options->{'max'} = 1 unless $options->{'max'};
  844. my $res = $self->{api}->list($hash, $options);
  845. return $self->_handle_api_error() unless $res;
  846. return 1 if (!ref $res); # When using a callback hook, this won't be a reference
  847. my @links;
  848. foreach my $hashref (@$res) {
  849. my $title = $hashref->{'title'};
  850. my $redirect = defined($hashref->{'redirect'});
  851. push @links, { title => $title, redirect => $redirect };
  852. }
  853. return @links;
  854. }
  855. =head2 list_transclusions($page[,$filter[,$ns[,$options]]])
  856. Returns an array containing a list of all pages transcluding $page. The array structure contains 'title' and 'redirect' is defined if the title is a redirect. $filter can be one of: all (default), redirects (list only redirects), nonredirects (list only non-redirects). $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). $options is a hashref as described by MediaWiki::API: Set max to limit the number of queries performed. Set hook to a subroutine reference to use a callback hook for incremental processing. Refer to the section on linksearch() or what_links_here() for examples.
  857. A typical query:
  858. $bot->list_transclusions("Template:Tlx", undef, 4, {hook => \&mysub});
  859. sub mysub{
  860. my ($res) = @_;
  861. foreach my $hash (@$res) {
  862. my $title = $hash->{'title'};
  863. my $is_redir = $hash->{'redirect'};
  864. print "Redirect: $title\n" if $is_redir;
  865. print "Page: $title\n" unless $is_redir;
  866. }
  867. }
  868. =cut
  869. sub list_transclusions {
  870. my $self = shift;
  871. my $page = shift;
  872. my $filter = shift;
  873. my $ns = shift;
  874. my $options = shift;
  875. $ns = join('|', @$ns) if (ref $ns eq 'ARRAY');
  876. if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) { # Verify $filter
  877. $filter = $1;
  878. }
  879. # http://en.wikipedia.org/w/api.php?action=query&list=embeddedin&eititle=Template:Stub
  880. my $hash = {
  881. action => 'query',
  882. list => 'embeddedin',
  883. eititle => $page,
  884. einamespace => $ns,
  885. };
  886. $hash->{'eifilterredir'} = $filter if $filter;
  887. $options->{'max'} = 1 unless $options->{'max'};
  888. my $res = $self->{api}->list($hash, $options);
  889. return $self->_handle_api_error() unless $res;
  890. return 1 if (!ref $res); # When using a callback hook, this won't be a reference
  891. my @links;
  892. foreach my $hashref (@$res) {
  893. my $title = $hashref->{'title'};
  894. my $redirect = defined($hashref->{'redirect'});
  895. push @links, { title => $title, redirect => $redirect };
  896. }
  897. return @links;
  898. }
  899. =head2 get_pages_in_category($category_name[,$options_hashref])
  900. Returns an array containing the names of all pages in the specified category (include Category: prefix). Does not recurse into sub-categories.
  901. my @pages = $bot->get_pages_in_category("Category:People on stamps of Gabon");
  902. print "The pages in Category:People on stamps of Gabon are:\n@pages\n";
  903. The options hashref is as described in the section on linksearch(). Use { max => 0 } to get all results.
  904. =cut
  905. sub get_pages_in_category {
  906. my $self = shift;
  907. my $category = shift;
  908. my $options = shift;
  909. if ($category =~ m/:/) { # It might have a namespace name
  910. my ($cat, $title) = split(/:/, $category, 2);
  911. if ($cat ne 'Category') { # 'Category' is a canonical name for ns14
  912. my $ns_data = $self->_get_ns_data();
  913. my $cat_ns_name = $ns_data->{'14'}; # ns14 gives us the localized name for 'Category'
  914. if ($cat ne $cat_ns_name) {
  915. $category = "$cat_ns_name:$category";
  916. }
  917. }
  918. }
  919. else { # Definitely no namespace name, since there's no colon
  920. $category = "Category:$category";
  921. }
  922. warn "Category to fetch is [[$category]]" if $self->{'debug'} > 1;
  923. my $hash = {
  924. action => 'query',
  925. list => 'categorymembers',
  926. cmtitle => $category,
  927. };
  928. $options->{'max'} = 1 unless defined($options->{'max'});
  929. delete($options->{'max'}) if $options->{'max'} == 0;
  930. my $res = $self->{api}->list($hash, $options);
  931. return 1 if (!ref $res); # Not a hashref when using callback
  932. return $self->_handle_api_error() unless $res;
  933. my @pages;
  934. foreach my $hash (@$res) {
  935. my $title = $hash->{'title'};
  936. push @pages, $title;
  937. }
  938. return @pages;
  939. }
  940. =head2 get_all_pages_in_category($category_name[,$options_hashref])
  941. Returns an array containing the names of ALL pages in the specified category (include the Category: prefix), including sub-categories. The $options_hashref is the same as described for get_pages_in_category().
  942. =cut
  943. { # Instead of using the state pragma, use a bare block
  944. my %data;
  945. sub get_all_pages_in_category {
  946. my $self = shift;
  947. my $base_category = shift;
  948. my $options = shift;
  949. $options->{'max'} = 0 unless defined($options->{'max'});
  950. my @first = $self->get_pages_in_category($base_category, $options);
  951. %data = () unless $_[0]; # This is a special flag for internal use.
  952. # It marks a call to this method as being
  953. # internal. Since %data is a fake state variable,
  954. # it needs to be cleared for every *external*
  955. # call, but not cleared when the call is recursive.
  956. my $ns_data = $self->_get_ns_data();
  957. my $cat_ns_name = $ns_data->{'14'};
  958. foreach my $page (@first) {
  959. if ($page =~ m/^$cat_ns_name:/) {
  960. if (!exists($data{$page})) {
  961. $data{$page} = '';
  962. my @pages = $self->get_all_pages_in_category($page, $options, 1);
  963. foreach (@pages) {
  964. $data{$_} = '';
  965. }
  966. }
  967. else {
  968. $data{$page} = '';
  969. }
  970. }
  971. else {
  972. $data{$page} = '';
  973. }
  974. }
  975. return keys %data;
  976. }
  977. } # This ends the bare block around get_all_pages_in_category()
  978. =head2 linksearch($link[,$ns[,$protocol[,$options]]])
  979. Runs a linksearch on the specified link and returns an array containing anonymous hashes with keys 'url' for the outbound URL, and 'title' for the page the link is on. $ns is a namespace number to search (pass an arrayref to search in multiple namespaces). You can search by $protocol (http is default). The optional $options hashref is fully documented in MediaWiki::API: Set `max` to limit the number of queries performed. Set `hook` to a subroutine reference to use a callback hook for incremental processing.
  980. Set max in $options to get more than one query's worth of results:
  981. my $options = { max => 10, }; # I only want some results
  982. my @links = $bot->linksearch("slashdot.org", 1, undef, $options);
  983. foreach my $hash (@links) {
  984. my $url = $hash->{'url'};
  985. my $page = $hash->{'title'};
  986. print "$page: $url\n";
  987. }
  988. You can also specify a callback function in $options:
  989. my $options = { hook => \&mysub, }; # I want to do incremental processing
  990. $bot->linksearch("slashdot.org", 1, undef, $options);
  991. sub mysub {
  992. my ($res) = @_;
  993. foreach my $hashref (@$res) {
  994. my $url = $hashref->{'url'};
  995. my $page = $hashref->{'title'};
  996. print "$page: $url\n";
  997. }
  998. }
  999. =cut
  1000. sub linksearch {
  1001. my $self = shift;
  1002. my $link = shift;
  1003. my $ns = shift;
  1004. my $prot = shift;
  1005. my $options = shift;
  1006. $ns = join('|', @$ns) if (ref $ns eq 'ARRAY');
  1007. my $hash = {
  1008. action => 'query',
  1009. list => 'exturlusage',
  1010. euprop => 'url|title',
  1011. euquery => $link,
  1012. eunamespace => $ns,
  1013. euprotocol => $prot,
  1014. };
  1015. $options->{'max'} = 1 unless $options->{'max'};
  1016. my $res = $self->{api}->list($hash, $options);
  1017. return $self->_handle_api_error() unless $res;
  1018. return 1 if (!ref $res); # When using a callback hook, this won't be a reference
  1019. my @links;
  1020. foreach my $hashref (@$res) {
  1021. my $url = $hashref->{'url'};
  1022. my $page = $hashref->{'title'};
  1023. push(@links, { 'url' => $url, 'title' => $page });
  1024. }
  1025. return @links;
  1026. }
  1027. =head2 purge_page($pagename)
  1028. Purges the server cache of the specified page. Pass an array reference to purge multiple pages. Returns true on success; false on failure. If you really care, a true return value is the number of pages successfully purged. You could check that it is the same as the number you wanted to purge.- maybe some pages don't exist, or you passed invalid titles, or you aren't allowed to purge the cache:
  1029. my @to_purge = ('Main Page', 'A', 'B', 'C', 'Very unlikely to exist');
  1030. my $size = scalar @to_purge;
  1031. print "all-at-once:\n";
  1032. my $success = $bot->purge_page(\@to_purge);
  1033. if ($success == $size) {
  1034. print "@to_purge: OK ($success/$size)\n";
  1035. }
  1036. else {
  1037. my $missed = @to_purge - $success;
  1038. print "We couldn't purge $missed pages (list was: "
  1039. . join(', ', @to_purge)
  1040. . ")\n";
  1041. }
  1042. # OR
  1043. print "\n\none-at-a-time:\n";
  1044. foreach my $page (@to_purge) {
  1045. my $ok = $bot->purge_page($page);
  1046. print "$page: $ok\n";
  1047. }
  1048. =cut
  1049. sub purge_page {
  1050. my $self = shift;
  1051. my $page = shift;
  1052. my $hash;
  1053. if (ref $page eq 'ARRAY') { # If it is an array reference...
  1054. $hash = {
  1055. action => 'purge',
  1056. titles => join('|', @$page), # dereference it and purge all those titles
  1057. };
  1058. }
  1059. else { # Just one page
  1060. $hash = {
  1061. action => 'purge',
  1062. titles => $page,
  1063. };
  1064. }
  1065. my $res = $self->{api}->api($hash);
  1066. return $self->_handle_api_error() unless $res;
  1067. my $success = 0;
  1068. foreach my $hashref (@{ $res->{'purge'} }) {
  1069. $success++ if exists $hashref->{'purged'};
  1070. }
  1071. return $success;
  1072. }
  1073. =head2 get_namespace_names()
  1074. get_namespace_names returns a hash linking the namespace id, such as 1, to its named equivalent, such as "Talk".
  1075. =cut
  1076. sub get_namespace_names {
  1077. my $self = shift;
  1078. my %return;
  1079. my $res = $self->{api}->api({
  1080. action => 'query',
  1081. meta => 'siteinfo',
  1082. siprop => 'namespaces'
  1083. });
  1084. return $self->_handle_api_error() unless $res;
  1085. foreach my $id (keys %{ $res->{query}->{namespaces} }) {
  1086. $return{$id} = $res->{query}->{namespaces}->{$id}->{'*'};
  1087. }
  1088. if ($return{1} or $_[0] > 1) {
  1089. return %return;
  1090. }
  1091. else {
  1092. return $self->get_namespace_names($_[0] + 1);
  1093. }
  1094. }
  1095. =head2 image_usage($image[,$ns[,$filter,[$options]]])
  1096. Gets a list of pages which include a certain image. Additional parameters are the namespace number to fetch results from (or an arrayref of multiple namespace numbers); $filter is 'all', 'redirect' (to return only redirects), or 'nonredirects' (to return no redirects). $options is a hashref as described in the section for linksearch().
  1097. my @pages = $bot->image_usage("File:Albert Einstein Head.jpg");
  1098. or, make use of the options hashref to do incremental processing:
  1099. $bot->image_usage("File:Albert Einstein Head.jpg", undef, undef, {hook=>\&mysub, max=>5});
  1100. sub mysub {
  1101. my $res = shift;
  1102. foreach my $page (@$res) {
  1103. my $title = $page->{'title'};
  1104. print "$title\n";
  1105. }
  1106. }
  1107. =cut
  1108. sub image_usage {
  1109. my $self = shift;
  1110. my $image = shift;
  1111. my $ns = shift;
  1112. my $filter = shift;
  1113. my $options = shift;
  1114. if ($image !~ m/^File:|Image:/) {
  1115. my $ns_data = $self->_get_ns_data();
  1116. my $image_ns_name = $ns_data->{'6'};
  1117. if ($image !~ m/^\Q$image_ns_name\E:/) {
  1118. $image = "$image_ns_name:$image";
  1119. }
  1120. }
  1121. $options->{'max'} = 1 unless defined($options->{'max'});
  1122. delete($options->{'max'}) if $options->{'max'} == 0;
  1123. $ns = join('|', @$ns) if (ref $ns eq 'ARRAY');
  1124. my $hash = {
  1125. action => 'query',
  1126. list => 'imageusage',
  1127. iutitle => $image,
  1128. iunamespace => $ns,
  1129. };
  1130. if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) {
  1131. $hash->{'iufilterredir'} = $1;
  1132. }
  1133. my $res = $self->{api}->list($hash, $options);
  1134. return $self->_handle_api_error() unless $res;
  1135. return 1 if (!ref $res); # When using a callback hook, this won't be a reference
  1136. my @pages;
  1137. foreach my $hashref (@$res) {
  1138. my $title = $hashref->{'title'};
  1139. push(@pages, $title);
  1140. }
  1141. return @pages;
  1142. }
  1143. =head2 links_to_image($image)
  1144. A backward-compatible call to image_usage(). You can provide only the image name.
  1145. =cut
  1146. sub links_to_image {
  1147. my $self = shift;
  1148. return $self->image_usage($_[0]);
  1149. }
  1150. =head2 is_blocked($user)
  1151. Checks if a user is currently blocked.
  1152. =cut
  1153. sub is_blocked {
  1154. my $self = shift;
  1155. my $user = shift;
  1156. # http://en.wikipedia.org/w/api.php?action=query&meta=blocks&bkusers=$user&bklimit=1&bkprop=id
  1157. my $hash = {
  1158. action => 'query',
  1159. list => 'blocks',
  1160. bkusers => $user,
  1161. bklimit => 1,
  1162. bkprop => 'id',
  1163. };
  1164. my $res = $self->{api}->api($hash);
  1165. return $self->_handle_api_error() unless $res;
  1166. my $number = scalar @{ $res->{query}->{"blocks"} }; # The number of blocks returned
  1167. if ($number == 1) {
  1168. return 1;
  1169. }
  1170. elsif ($number == 0) {
  1171. return 0;
  1172. }
  1173. else {
  1174. return; # UNPOSSIBLE!
  1175. }
  1176. }
  1177. =head2 test_blocked($user)
  1178. Retained for backwards compatibility. Use is_blocked($user) for clarity.
  1179. =cut
  1180. sub test_blocked { # For backwards-compatibility
  1181. return (is_blocked(@_));
  1182. }
  1183. =head2 test_image_exists($page)
  1184. Checks if an image exists at $page. 0 means no, 1 means yes, local, 2
  1185. means on commons, 3 means doesn't exist but there is text on the page.
  1186. If you pass in an arrayref of images, you'll get out an arrayref of
  1187. results.
  1188. my $exists = $bot->test_image_exists('File:Albert Einstein Head.jpg');
  1189. if ($exists == 0) {
  1190. print "Doesn't exist\n";
  1191. }
  1192. elsif ($exists == 1) {
  1193. print "Exists locally\n";
  1194. }
  1195. elsif ($exists == 2) {
  1196. print "Exists on Commons\n";
  1197. }
  1198. =cut
  1199. sub test_image_exists {
  1200. my $self = shift;
  1201. my $image = shift;
  1202. my $multi = 0;
  1203. if (ref $image eq 'ARRAY') {
  1204. $image = join('|', @$image);
  1205. $multi = 1; # so we know whether to return a hash or a single scalar
  1206. }
  1207. my $res = $self->{api}->api({
  1208. action => 'query',
  1209. titles => $image,
  1210. iilimit => 1,
  1211. prop => 'imageinfo'
  1212. });
  1213. return $self->_handle_api_error() unless $res;
  1214. my @return;
  1215. # use Data::Dumper; print STDERR Dumper($res) and die;
  1216. foreach my $id (keys %{ $res->{query}->{pages} }) {
  1217. my $title = $res->{query}->{pages}->{$id}->{title};
  1218. if ($res->{query}->{pages}->{$id}->{imagerepository} eq 'shared') {
  1219. if ($multi) {
  1220. unshift @return, 2;
  1221. }
  1222. else {
  1223. return 2;
  1224. }
  1225. }
  1226. elsif (exists($res->{query}->{pages}->{$id}->{missing})) {
  1227. if ($multi) {
  1228. unshift @return, 0;
  1229. }
  1230. else {
  1231. return 0;
  1232. }
  1233. }
  1234. elsif ($res->{query}->{pages}->{$id}->{imagerepository} eq '') {
  1235. if ($multi) {
  1236. unshift @return, 3;
  1237. }
  1238. else {
  1239. return 3;
  1240. }
  1241. }
  1242. elsif ($res->{query}->{pages}->{$id}->{imagerepository} eq 'local') {
  1243. if ($multi) {
  1244. unshift @return, 1;
  1245. }
  1246. else {
  1247. return 1;
  1248. }
  1249. }
  1250. }
  1251. # use Data::Dumper; print STDERR Dumper(\@return) and die;
  1252. return \@return;
  1253. }
  1254. =head2 get_pages_in_namespace($namespace_id, $page_limit)
  1255. Returns an array containing the names of all pages in the specified namespace. The $namespace_id must be a number, not a namespace name. Setting $page_limit is optional. If $page_limit is over 500, it will be rounded up to the next multiple of 500.
  1256. =cut
  1257. sub get_pages_in_namespace {
  1258. my $self = shift;
  1259. my $namespace = shift;
  1260. my $limit = shift || 500;
  1261. my $options = shift;
  1262. $limit = 5000 if $self->{'highlimits'};
  1263. my $hash = {
  1264. action => 'query',
  1265. list => 'allpages',
  1266. apnamespace => $namespace,
  1267. aplimit => $limit,
  1268. };
  1269. $options->{'max'} = 1 unless $options->{'max'};
  1270. my $res = $self->{api}->list($hash, $options);
  1271. return $self->_handle_api_error() unless $res;
  1272. return 1 if (!ref $res); # Not a ref when using callback
  1273. my @return;
  1274. foreach (@{$res}) {
  1275. push @return, $_->{title};
  1276. }
  1277. return @return;
  1278. }
  1279. =head2 count_contributions($user)
  1280. Uses the API to count $user's contributions.
  1281. =cut
  1282. sub count_contributions {
  1283. my $self = shift;
  1284. my $username = shift;
  1285. $username =~ s/User://i; # Strip namespace
  1286. my $res = $self->{api}->list({
  1287. action => 'query',
  1288. list => 'users',
  1289. ususers => $username,
  1290. usprop => 'editcount'
  1291. },
  1292. { max => 1 });
  1293. return $self->_handle_api_error() unless $res;
  1294. my $return = ${$res}[0]->{'editcount'};
  1295. if ($return or $_[0] > 1) {
  1296. return $return;
  1297. }
  1298. else {
  1299. return $self->count_contributions($username, $_[0] + 1);
  1300. }
  1301. }
  1302. =head2 last_active($user)
  1303. Returns the last active time of $user in YYYY-MM-DDTHH:MM:SSZ
  1304. =cut
  1305. sub last_active {
  1306. my $self = shift;
  1307. my $username = shift;
  1308. unless ($username =~ /User:/i) { $username = "User:" . $username; }
  1309. my $res = $self->{api}->list({
  1310. action => 'query',
  1311. list => 'usercontribs',
  1312. ucuser => $username,
  1313. uclimit => 1
  1314. },
  1315. { max => 1 });
  1316. return $self->_handle_api_error() unless $res;
  1317. return ${$res}[0]->{'timestamp'};
  1318. }
  1319. =head2 recent_edit_to_page($page)
  1320. Returns timestamp and username for most recent (top) edit to $page.
  1321. =cut
  1322. sub recent_edit_to_page {
  1323. my $self = shift;
  1324. my $page = shift;
  1325. my $res = $self->{api}->api({
  1326. action => 'query',
  1327. prop => 'revisions',
  1328. titles => $page,
  1329. rvlimit => 1
  1330. },
  1331. { max => 1 });
  1332. return $self->_handle_api_error() unless $res;
  1333. my ($id, $data) = %{ $res->{query}->{pages} };
  1334. return $data->{revisions}[0]->{timestamp};
  1335. }
  1336. =head2 get_users($page, $limit, $revision, $direction)
  1337. Gets the most recent editors to $page, up to $limit, starting from $revision and goint in $direction.
  1338. =cut
  1339. sub get_users {
  1340. my $self = shift;
  1341. my $pagename = shift;
  1342. my $limit = shift || 5;
  1343. my $rvstartid = shift;
  1344. my $direction = shift;
  1345. my @return;
  1346. my @revisions;
  1347. if ($limit > 50) {
  1348. $self->{errstr} = "Error requesting history for $pagename: Limit may not be set to values above 50";
  1349. carp $self->{errstr};
  1350. return;
  1351. }
  1352. my $hash = {
  1353. action => 'query',
  1354. prop => 'revisions',
  1355. titles => $pagename,
  1356. rvprop => 'ids|timestamp|user|comment',
  1357. rvlimit => $limit
  1358. };
  1359. $hash->{rvstartid} = $rvstartid if ($rvstartid);
  1360. $hash->{rvdir} = $direction if ($direction);
  1361. my $res = $self->{api}->api($hash);
  1362. return $self->_handle_api_error() unless $res;
  1363. my ($id) = keys %{ $res->{query}->{pages} };
  1364. my $array = $res->{query}->{pages}->{$id}->{revisions};
  1365. foreach (@{$array}) {
  1366. push @return, $_->{user};
  1367. }
  1368. return @return;
  1369. }
  1370. =head2 was_blocked($user)
  1371. Returns 1 if $user has ever been blocked.
  1372. =cut
  1373. sub was_blocked {
  1374. my $self = shift;
  1375. my $user = shift;
  1376. $user =~ s/User://i; # Strip User: prefix, if present
  1377. # http://en.wikipedia.org/w/api.php?action=query&list=logevents&letype=block&letitle=User:127.0.0.1&lelimit=1&leprop=ids
  1378. my $hash = {
  1379. action => 'query',
  1380. list => 'logevents',
  1381. letype => 'block',
  1382. letitle => "User:$user", # Ensure the User: prefix is there!
  1383. lelimit => 1,
  1384. leprop => 'ids',
  1385. };
  1386. my $res = $self->{api}->api($hash);
  1387. return $self->_handle_api_error() unless $res;
  1388. my $number = scalar @{ $res->{'query'}->{'logevents'} }; # The number of blocks returned
  1389. if ($number == 1) {
  1390. return 1;
  1391. }
  1392. elsif ($number == 0) {
  1393. return 0;
  1394. }
  1395. else {
  1396. return; # UNPOSSIBLE!
  1397. }
  1398. }
  1399. =head2 test_block_hist($user)
  1400. Retained for backwards compatibility. Use was_blocked($user) for clarity.
  1401. =cut
  1402. sub test_block_hist { # Backwards compatibility
  1403. return (was_blocked(@_));
  1404. }
  1405. =head2 expandtemplates($page[, $text])
  1406. Expands templates on $page, using $text if provided, otherwise loading the page text automatically.
  1407. =cut
  1408. sub expandtemplates {
  1409. my $self = shift;
  1410. my $page = shift;
  1411. my $text = shift;
  1412. unless ($text) {
  1413. $text = $self->get_text($page);
  1414. }
  1415. my $hash = {
  1416. action => 'expandtemplates',
  1417. title => $page,
  1418. text => $text,
  1419. };
  1420. my $res = $self->{api}->api($hash);
  1421. return $self->_handle_api_error() unless $res;
  1422. my $expanded = $res->{'expandtemplates'}->{'*'};
  1423. return $expanded;
  1424. }
  1425. =head2 get_allusers($limit)
  1426. Returns an array of all users. Default limit is 500.
  1427. =cut
  1428. sub get_allusers {
  1429. my $self = shift;
  1430. my $limit = shift;
  1431. my @return = ();
  1432. $limit = 500 unless $limit;
  1433. my $res = $self->{api}->api({
  1434. action => 'query',
  1435. list => 'allusers',
  1436. aulimit => $limit
  1437. });
  1438. for my $ref (@{ $res->{query}->{allusers} }) {
  1439. push @return, $ref->{name};
  1440. }
  1441. return @return;
  1442. }
  1443. =head2 db_to_domain($wiki)
  1444. Converts a wiki/database name (enwiki) to the domain name (en.wikipedia.org).
  1445. my @wikis = ("enwiki", "kowiki", "bat-smgwiki", "nonexistent");
  1446. foreach my $wiki (@wikis) {
  1447. my $domain = $bot->db_to_domain($wiki);
  1448. next if !defined($domain);
  1449. print "$wiki: $domain\n";
  1450. }
  1451. You can pass an arrayref to do bulk lookup:
  1452. my @wikis = ("enwiki", "kowiki", "bat-smgwiki", "nonexistent");
  1453. my $domains = $bot->db_to_domain(\@wikis);
  1454. foreach my $domain (@$domains) {
  1455. next if !defined($domain);
  1456. print "$domain\n";
  1457. }
  1458. =cut
  1459. sub db_to_domain {
  1460. my $self = shift;
  1461. my $wiki = shift;
  1462. if (!$self->{sitematrix}) {
  1463. $self->_get_sitematrix();
  1464. }
  1465. if (ref $wiki eq 'ARRAY') {
  1466. my @return;
  1467. foreach my $w (@$wiki) {
  1468. $wiki =~ s/_p$//; # Strip off a _p suffix, if present
  1469. my $domain = $self->{'sitematrix'}->{$w} || undef;
  1470. push(@return, $domain);
  1471. }
  1472. return \@return;
  1473. }
  1474. else {
  1475. $wiki =~ s/_p$//; # Strip off a _p suffix, if present
  1476. my $domain = $self->{'sitematrix'}->{$wiki} || undef;
  1477. return $domain;
  1478. }
  1479. }
  1480. =head2 domain_to_db($wiki)
  1481. As you might expect, does the opposite of domain_to_db(): Converts a domain
  1482. name into a database/wiki name.
  1483. =cut
  1484. sub domain_to_db {
  1485. my $self = shift;
  1486. my $wiki = shift;
  1487. if (!$self->{sitematrix}) {
  1488. $self->_get_sitematrix();
  1489. }
  1490. if (ref $wiki eq 'ARRAY') {
  1491. my @return;
  1492. foreach my $w (@$wiki) {
  1493. my $db = $self->{'sitematrix'}->{$w} || undef;
  1494. push(@return, $db);
  1495. }
  1496. return \@return;
  1497. }
  1498. else {
  1499. my $db = $self->{'sitematrix'}->{$wiki} || undef;
  1500. return $db;
  1501. }
  1502. }
  1503. =head2 diff($options_hashref)
  1504. This allows retrieval of a diff from the API. The return is a scalar containing the HTML table of the diff. Options are as follows:
  1505. =over 4
  1506. =item *
  1507. title is the title to use. Provide I<either> this or revid.
  1508. =item *
  1509. revid is any revid to diff from. If you also specified title, only title will be honoured.
  1510. =item *
  1511. oldid is an identifier to diff to. This can be a revid, or the special values 'cur', 'prev' or 'next'
  1512. =back
  1513. =cut
  1514. sub diff {
  1515. my $self = shift;
  1516. my $title;
  1517. my $revid;
  1518. my $oldid;
  1519. if (ref $_[0] eq 'HASH') {
  1520. $title = $_[0]->{'title'};
  1521. $revid = $_[0]->{'revid'};
  1522. $oldid = $_[0]->{'oldid'};
  1523. }
  1524. else {
  1525. $title = shift;
  1526. $revid = shift;
  1527. $oldid = shift;
  1528. }
  1529. my $hash = {
  1530. action => 'query',
  1531. prop => 'revisions',
  1532. rvdiffto => $oldid,
  1533. };
  1534. if ($title) {
  1535. $hash->{'titles'} = $title;
  1536. $hash->{'rvlimit'} = 1;
  1537. }
  1538. elsif ($revid) {
  1539. $hash->{'revids'} = $revid;
  1540. }
  1541. my $res = $self->{api}->api($hash);
  1542. return $self->_handle_api_error() unless $res;
  1543. my @revids = keys %{ $res->{'query'}->{'pages'} };
  1544. my $diff = $res->{'query'}->{'pages'}->{ $revids[0] }->{'revisions'}->[0]->{'diff'}->{'*'};
  1545. return $diff;
  1546. }
  1547. =head2 prefixindex($prefix[,$filter[,$ns[,$options]]])
  1548. This returns an array of hashrefs containing page titles that start with the given $prefix. $filter is one of 'all', 'redirects', or 'nonredirects'; $ns is a single namespace number (unlike linksearch etc, which can accept an arrayref of numbers). $options is a hashref as described in the section on linksearch() or in MediaWiki::API. The hashref has keys 'title' and 'redirect' (present if the page is a redirect, not present otherwise).
  1549. my @prefix_pages = $bot->prefixindex("User:Mike.lifeguard");
  1550. # Or, the more efficient equivalent
  1551. my @prefix_pages = $bot->prefixindex("Mike.lifeguard", 2);
  1552. foreach my $hashref (@pages) {
  1553. my $title = $hashref->{'title'};
  1554. if $hashref->{'redirect'} {
  1555. print "$title is a redirect\n";
  1556. }
  1557. else {
  1558. print "$title\n is not a redirect\n";
  1559. }
  1560. }
  1561. =cut
  1562. sub prefixindex {
  1563. my $self = shift;
  1564. my $prefix = shift;
  1565. my $ns = shift;
  1566. my $filter = shift;
  1567. my $options = shift;
  1568. if (defined($filter) and $filter =~ m/(all|redirects|nonredirects)/) { # Verify
  1569. $filter = $1;
  1570. }
  1571. if (!$ns && $prefix =~ m/:/) {
  1572. print STDERR "Converted '$prefix' to..." if $self->{'debug'} > 1;
  1573. my ($name) = split(/:/, $prefix, 2);
  1574. my $ns_data = $self->_get_ns_data();
  1575. $ns = $ns_data->{$name};
  1576. $prefix =~ s/^$name://;
  1577. warn "'$prefix' with a namespace filter $ns" if $self->{'debug'} > 1;
  1578. }
  1579. my $hash = {
  1580. action => 'query',
  1581. list => 'allpages',
  1582. apprefix => $prefix,
  1583. };
  1584. $hash->{'apnamespace'} = $ns if $ns;
  1585. $hash->{'apfilterredir'} = $filter if $filter;
  1586. $options->{'max'} = 1 unless $options->{'max'};
  1587. my $res = $self->{api}->list($hash, $options);
  1588. return $self->_handle_api_error() unless $res;
  1589. return 1 if (!ref $res); # Not a ref when using callback hook
  1590. my @pages;
  1591. foreach my $hashref (@$res) {
  1592. my $title = $hashref->{'title'};
  1593. my $redirect = defined($hashref->{'redirect'});
  1594. push @pages, { title => $title, redirect => $redirect };
  1595. }
  1596. return @pages;
  1597. }
  1598. =head2 search($search_term[,$ns[,$options_hashref]])
  1599. This is a simple search for your $search_term in page text. $ns is a namespace number to search in, or an arrayref of numbers (default is main namespace). $options_hashref is a hashref as described in MediaWiki::API or the section on linksearch(). It returns an array of page titles matching.
  1600. my @pages = $bot->search("Mike.lifeguard", 2);
  1601. print "@pages\n";
  1602. Or, use a callback for incremental processing:
  1603. my @pages = $bot->search("Mike.lifeguard", 2, { hook => \&mysub });
  1604. sub mysub {
  1605. my ($res) = @_;
  1606. foreach my $hashref (@$res) {
  1607. my $page = $hashref->{'title'};
  1608. print "$page\n";
  1609. }
  1610. }
  1611. =cut
  1612. sub search {
  1613. my $self = shift;
  1614. my $term = shift;
  1615. my $ns = shift || 0;
  1616. my $options = shift;
  1617. if (ref $ns eq 'ARRAY') { # Accept a hashref
  1618. $ns = join('|', @$ns);
  1619. }
  1620. my $hash = {
  1621. action => 'query',
  1622. list => 'search',
  1623. srsearch => $term,
  1624. srwhat => 'text',
  1625. #srinfo => 'totalhits',
  1626. srprop => 'size',
  1627. srredirects => 0,
  1628. };
  1629. $options->{'max'} = 1 unless $options->{'max'};
  1630. my $res = $self->{api}->list($hash, $options);
  1631. return $self->_handle_api_error() unless $res;
  1632. return 1 if (!ref $res); # Not a ref when used with callback
  1633. my @pages;
  1634. foreach my $result (@$res) {
  1635. my $title = $result->{'title'};
  1636. push @pages, $title;
  1637. }
  1638. return @pages;
  1639. }
  1640. =head2 get_log($data, $options)
  1641. This fetches log entries, and returns results as an array of hashes. The options are as follows:
  1642. =over 4
  1643. =item *
  1644. type is the log type (block, delete...)
  1645. =item *
  1646. user is the user who I<performed> the action. Do not include the User: prefix
  1647. =item *
  1648. target is the target of the action. Where an action was performed to a page, it is the page title. Where an action was performed to a user, it is User:$username.
  1649. =back
  1650. my $log = $bot->get_log({
  1651. type => 'block',
  1652. user => 'User:Mike.lifeguard',
  1653. });
  1654. foreach my $entry (@$log) {
  1655. my $user = $entry->{'title'};
  1656. print "$user\n";
  1657. }
  1658. $bot->get_log({
  1659. type => 'block',
  1660. user => 'User:Mike.lifeguard',
  1661. },
  1662. { hook => \&mysub, max => 10 }
  1663. );
  1664. sub mysub {
  1665. my ($res) = @_;
  1666. foreach my $hashref (@$res) {
  1667. my $title = $hashref->{'title'};
  1668. print "$title\n";
  1669. }
  1670. }
  1671. =cut
  1672. sub get_log {
  1673. my $self = shift;
  1674. my $data = shift;
  1675. my $options = shift;
  1676. my $log_type = $data->{'type'};
  1677. my $user = $data->{'user'};
  1678. my $target = $data->{'target'};
  1679. my $ns_data = $self->_get_ns_data();
  1680. my $user_ns_name = $ns_data->{'2'};
  1681. $user =~ s/^$user_ns_name://;
  1682. my $hash = {
  1683. action => 'query',
  1684. list => 'logevents',
  1685. };
  1686. $hash->{'letype'} = $log_type if $log_type;
  1687. $hash->{'leuser'} = $user if $user;
  1688. $hash->{'letitle'} = $target if $target;
  1689. $options->{'max'} = 1 unless $options->{'max'};
  1690. my $res = $self->{api}->list($hash, $options);
  1691. return $self->_handle_api_error() unless $res;
  1692. return 1 if (!ref $res); # Not a ref when using callback
  1693. return $res;
  1694. }
  1695. =head2 is_g_blocked($ip)
  1696. Returns what IP/range block I<currently in place> affects the IP/range. The return is a scalar of an IP/range if found (evaluates to true in boolean context); undef otherwise (evaluates false in boolean context). Pass in a single IP or CIDR range.
  1697. =cut
  1698. sub is_g_blocked {
  1699. my $self = shift;
  1700. my $ip = shift;
  1701. # http://en.wikipedia.org/w/api.php?action=query&list=globalblocks&bglimit=1&bgprop=address&bgip=127.0.0.1
  1702. my $res = $self->{api}->api({
  1703. action => 'query',
  1704. list => 'globalblocks',
  1705. bglimit => 1,
  1706. bgprop => 'address',
  1707. bgip => $ip, # So handy! It searches for blocks affecting this IP or IP range, including rangeblocks! Can't get that from UI.
  1708. });
  1709. return $self->_handle_api_error() unless $res;
  1710. return 0 unless ($res->{'query'}->{'globalblocks'}->[0]);
  1711. return $res->{'query'}->{'globalblocks'}->[0]->{'address'};
  1712. }
  1713. =head2 was_g_blocked($ip)
  1714. Returns whether an IP/range was ever globally blocked. You should probably call this method only when your bot is operating on Meta.
  1715. =cut
  1716. sub was_g_blocked {
  1717. my $self = shift;
  1718. my $ip = shift;
  1719. $ip =~ s/User://i; # Strip User: prefix, if present
  1720. # This query should always go to Meta
  1721. unless ($self->{api}->{config}->{api_url} =~
  1722. m,
  1723. http://meta.wikimedia.org/w/api.php
  1724. |
  1725. https://secure.wikimedia.org/wikipedia/meta/w/api.php
  1726. ,x # /x flag is pretty awesome :)
  1727. ) {
  1728. carp "GlobalBlocking queries should probably be sent to Meta; it doesn't look like you're doing so" if $self->{'debug'};
  1729. }
  1730. # http://meta.wikimedia.org/w/api.php?action=query&list=logevents&letype=gblblock&letitle=User:127.0.0.1&lelimit=1&leprop=ids
  1731. my $hash = {
  1732. action => 'query',
  1733. list => 'logevents',
  1734. letyp