PageRenderTime 64ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 0ms

/Wikimate.php

http://github.com/hamstar/Wikimate
PHP | 2470 lines | 1084 code | 288 blank | 1098 comment | 191 complexity | 5ada9f65ebcdcc20fc762db946c09f69 MD5 | raw file
Possible License(s): MIT

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * Wikimate is a wrapper for the MediaWiki API that aims to be very easy to use.
  4. *
  5. * @package Wikimate
  6. * @version 1.0.0
  7. * @copyright SPDX-License-Identifier: MIT
  8. */
  9. /**
  10. * Provides an interface over wiki API objects such as pages and files.
  11. *
  12. * All requests to the API can throw WikimateException if the server is lagged
  13. * and a finite number of retries is exhausted. By default requests are
  14. * retried indefinitely. See {@see Wikimate::request()} for more information.
  15. *
  16. * @author Robert McLeod & Frans P. de Vries
  17. * @since 0.2 December 2010
  18. */
  19. class Wikimate
  20. {
  21. /**
  22. * The current version number (conforms to Semantic Versioning)
  23. *
  24. * @var string
  25. * @link https://semver.org/
  26. */
  27. const VERSION = '1.0.0';
  28. /**
  29. * Identifier for CSRF token
  30. *
  31. * @var string
  32. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Tokens
  33. */
  34. const TOKEN_DEFAULT = 'csrf';
  35. /**
  36. * Identifier for Login token
  37. *
  38. * @var string
  39. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Tokens
  40. */
  41. const TOKEN_LOGIN = 'login';
  42. /**
  43. * Default lag value in seconds
  44. *
  45. * @var integer
  46. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/Manual:Maxlag_parameter
  47. */
  48. const MAXLAG_DEFAULT = 5;
  49. /**
  50. * Base URL for API requests
  51. *
  52. * @var string
  53. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Main_page#Endpoint
  54. */
  55. protected $api;
  56. /**
  57. * Default headers for Requests_Session
  58. *
  59. * @var array
  60. */
  61. protected $headers;
  62. /**
  63. * Default data for Requests_Session
  64. *
  65. * @var array
  66. */
  67. protected $data;
  68. /**
  69. * Default options for Requests_Session
  70. *
  71. * @var array
  72. */
  73. protected $options;
  74. /**
  75. * Username for API requests
  76. *
  77. * @var string
  78. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Login#Method_1._login
  79. */
  80. protected $username;
  81. /**
  82. * Password for API requests
  83. *
  84. * @var string
  85. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Login#Method_1._login
  86. */
  87. protected $password;
  88. /**
  89. * Session object for HTTP requests
  90. *
  91. * @var Requests_Session
  92. * @link https://requests.ryanmccue.info/
  93. */
  94. protected $session;
  95. /**
  96. * User agent string for Requests_Session
  97. *
  98. * @var string
  99. * @link https://requests.ryanmccue.info/docs/usage-advanced.html#session-handling
  100. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Etiquette#The_User-Agent_header
  101. */
  102. protected $useragent;
  103. /**
  104. * Error array with API and Wikimate errors
  105. *
  106. * @var array|null
  107. */
  108. protected $error = null;
  109. /**
  110. * Whether to output debug logging
  111. *
  112. * @var boolean
  113. */
  114. protected $debugMode = false;
  115. /**
  116. * Maximum lag in seconds to accept in requests
  117. *
  118. * @var integer
  119. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/Manual:Maxlag_parameter
  120. */
  121. protected $maxlag = self::MAXLAG_DEFAULT;
  122. /**
  123. * Maximum number of retries for lagged requests (-1 = retry indefinitely)
  124. *
  125. * @var integer
  126. */
  127. protected $maxretries = -1;
  128. /**
  129. * Stored CSRF token for API requests
  130. *
  131. * @var string|null
  132. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Tokens
  133. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Edit#Additional_notes
  134. */
  135. private $csrfToken = null;
  136. /**
  137. * Creates a new Wikimate object.
  138. *
  139. * @param string $api Base URL for the API
  140. * @param array $headers Default headers for API requests
  141. * @param array $data Default data for API requests
  142. * @param array $options Default options for API requests
  143. * @return Wikimate
  144. */
  145. public function __construct($api, $headers = array(), $data = array(), $options = array())
  146. {
  147. $this->api = $api;
  148. $this->headers = $headers;
  149. $this->data = $data;
  150. $this->options = $options;
  151. $this->initRequests();
  152. }
  153. /**
  154. * Sets up a Requests_Session with appropriate user agent.
  155. *
  156. * @return void
  157. * @link https://requests.ryanmccue.info/docs/usage-advanced.html#session-handling
  158. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Etiquette#The_User-Agent_header
  159. */
  160. protected function initRequests()
  161. {
  162. $this->useragent = 'Wikimate/' . self::VERSION . ' (https://github.com/hamstar/Wikimate)';
  163. $this->session = new Requests_Session($this->api, $this->headers, $this->data, $this->options);
  164. $this->session->useragent = $this->useragent;
  165. }
  166. /**
  167. * Sends a GET or POST request in JSON format to the API.
  168. *
  169. * This method handles maxlag errors as advised at:
  170. * {@see https://www.mediawiki.org/wiki/Special:MyLanguage/Manual:Maxlag_parameter}
  171. * The request is sent with the current maxlag value
  172. * (default: 5 seconds, per MAXLAG_DEFAULT).
  173. * If a lag error is received, the method waits (sleeps) for the
  174. * recommended time (per the Retry-After header), then tries again.
  175. * It will do this indefinitely unless the number of retries is limited,
  176. * in which case WikimateException is thrown once the limit is reached.
  177. *
  178. * The string type for $data is used only for upload POST requests,
  179. * and must contain the complete multipart body, including maxlag.
  180. *
  181. * @param array|string $data Data for the request
  182. * @param array $headers Optional extra headers to send with the request
  183. * @param boolean $post True to send a POST request, otherwise GET
  184. * @return array The API response
  185. * @throw WikimateException If lagged and ran out of retries,
  186. * or got an unexpected API response
  187. */
  188. private function request($data, $headers = array(), $post = false)
  189. {
  190. $retries = 0;
  191. // Add format & maxlag parameter to request
  192. if (is_array($data)) {
  193. $data['format'] = 'json';
  194. $data['maxlag'] = $this->getMaxlag();
  195. $action = $data['action'];
  196. } else {
  197. $action = 'upload';
  198. }
  199. // Define type of HTTP request for messages
  200. $httptype = $post ? 'POST' : 'GET';
  201. // Send appropriate type of request, once or multiple times
  202. do {
  203. if ($post) {
  204. // Debug logging of POST requests, except for upload string
  205. if ($this->debugMode && is_array($data)) {
  206. echo "$action $httptype parameters:\n";
  207. print_r($data);
  208. }
  209. $response = $this->session->post($this->api, $headers, $data);
  210. } else {
  211. // Debug logging of GET requests as a query string
  212. if ($this->debugMode) {
  213. echo "$action $httptype parameters:\n";
  214. echo http_build_query($data) . "\n";
  215. }
  216. $response = $this->session->get($this->api . '?' . http_build_query($data), $headers);
  217. }
  218. // Check for replication lag error
  219. $serverLagged = ($response->headers->offsetGet('X-Database-Lag') !== null);
  220. if ($serverLagged) {
  221. // Determine recommended or default delay
  222. if ($response->headers->offsetGet('Retry-After') !== null) {
  223. $sleep = (int)$response->headers->offsetGet('Retry-After');
  224. } else {
  225. $sleep = $this->getMaxlag();
  226. }
  227. if ($this->debugMode) {
  228. preg_match('/Waiting for [^ ]*: ([0-9.-]+) seconds? lagged/', $response->body, $match);
  229. echo "Server lagged for {$match[1]} seconds; will retry in {$sleep} seconds\n";
  230. }
  231. sleep($sleep);
  232. // Check retries limit
  233. if ($this->getMaxretries() >= 0) {
  234. $retries++;
  235. } else {
  236. $retries = -1; // continue indefinitely
  237. }
  238. }
  239. } while ($serverLagged && $retries <= $this->getMaxretries());
  240. // Throw exception if we ran out of retries
  241. if ($serverLagged) {
  242. throw new WikimateException("Server lagged ($retries consecutive maxlag responses)");
  243. }
  244. // Check if we got the API doc page (invalid request)
  245. if (strpos($response->body, "This is an auto-generated MediaWiki API documentation page") !== false) {
  246. throw new WikimateException("The API could not understand the $action $httptype request");
  247. }
  248. // Check if we got a JSON result
  249. $result = json_decode($response->body, true);
  250. if ($result === null) {
  251. throw new WikimateException("The API did not return the $action JSON response");
  252. }
  253. if ($this->debugMode) {
  254. echo "$action $httptype response:\n";
  255. print_r($result);
  256. }
  257. return $result;
  258. }
  259. /**
  260. * Obtains a wiki token for logging in or data-modifying actions.
  261. *
  262. * If a CSRF (default) token is requested, it is stored and returned
  263. * upon further such requests, instead of making another API call.
  264. * The stored token is discarded via {@see Wikimate::logout()}.
  265. *
  266. * For now this method, in Wikimate tradition, is kept simple and supports
  267. * only the two token types needed elsewhere in the library. It also
  268. * doesn't support the option to request multiple tokens at once.
  269. * See {@see https://www.mediawiki.org/wiki/Special:MyLanguage/API:Tokens}
  270. * for more information.
  271. *
  272. * @param string $type The token type
  273. * @return mixed The requested token (string), or null if error
  274. */
  275. protected function token($type = self::TOKEN_DEFAULT)
  276. {
  277. // Check for supported token types
  278. if ($type != self::TOKEN_DEFAULT && $type != self::TOKEN_LOGIN) {
  279. $this->error = array();
  280. $this->error['token'] = 'The API does not support the token type';
  281. return null;
  282. }
  283. // Check for existing CSRF token for this login session
  284. if ($type == self::TOKEN_DEFAULT && $this->csrfToken !== null) {
  285. return $this->csrfToken;
  286. }
  287. $details = array(
  288. 'action' => 'query',
  289. 'meta' => 'tokens',
  290. 'type' => $type,
  291. );
  292. // Send the token request
  293. $tokenResult = $this->request($details, array(), true);
  294. // Check for errors
  295. if (isset($tokenResult['error'])) {
  296. $this->error = $tokenResult['error']; // Set the error if there was one
  297. return null;
  298. } else {
  299. $this->error = null; // Reset the error status
  300. }
  301. if ($type == self::TOKEN_LOGIN) {
  302. return $tokenResult['query']['tokens']['logintoken'];
  303. } else {
  304. // Store CSRF token for this login session
  305. $this->csrfToken = $tokenResult['query']['tokens']['csrftoken'];
  306. return $this->csrfToken;
  307. }
  308. }
  309. /**
  310. * Logs in to the wiki.
  311. *
  312. * @param string $username The user name
  313. * @param string $password The user password
  314. * @param string $domain The domain (optional)
  315. * @return boolean True if logged in
  316. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Login#Method_1._login
  317. */
  318. public function login($username, $password, $domain = null)
  319. {
  320. // Obtain login token first
  321. if (($logintoken = $this->token(self::TOKEN_LOGIN)) === null) {
  322. return false;
  323. }
  324. $details = array(
  325. 'action' => 'login',
  326. 'lgname' => $username,
  327. 'lgpassword' => $password,
  328. 'lgtoken' => $logintoken,
  329. );
  330. // If $domain is provided, set the corresponding detail in the request information array
  331. if (is_string($domain)) {
  332. $details['lgdomain'] = $domain;
  333. }
  334. // Send the login request
  335. $loginResult = $this->request($details, array(), true);
  336. // Check for errors
  337. if (isset($loginResult['error'])) {
  338. $this->error = $loginResult['error']; // Set the error if there was one
  339. return false;
  340. } else {
  341. $this->error = null; // Reset the error status
  342. }
  343. if (isset($loginResult['login']['result']) && $loginResult['login']['result'] != 'Success') {
  344. // Some more comprehensive error checking
  345. $this->error = array();
  346. switch ($loginResult['login']['result']) {
  347. case 'Failed':
  348. $this->error['auth'] = 'Incorrect username or password';
  349. break;
  350. default:
  351. $this->error['auth'] = 'The API result was: ' . $loginResult['login']['result'];
  352. break;
  353. }
  354. return false;
  355. }
  356. return true;
  357. }
  358. /**
  359. * Logs out of the wiki and discard CSRF token.
  360. *
  361. * @return boolean True if logged out
  362. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Logout
  363. */
  364. public function logout()
  365. {
  366. // Obtain logout token first
  367. if (($logouttoken = $this->token()) === null) {
  368. return false;
  369. }
  370. // Token is needed in MediaWiki v1.34+, older versions produce an
  371. // 'Unrecognized parameter' warning which can be ignored
  372. $details = array(
  373. 'action' => 'logout',
  374. 'token' => $logouttoken,
  375. );
  376. // Send the logout request
  377. $logoutResult = $this->request($details, array(), true);
  378. // Check for errors
  379. if (isset($logoutResult['error'])) {
  380. $this->error = $logoutResult['error']; // Set the error if there was one
  381. return false;
  382. } else {
  383. $this->error = null; // Reset the error status
  384. }
  385. // Discard CSRF token for this login session
  386. $this->csrfToken = null;
  387. return true;
  388. }
  389. /**
  390. * Gets the current value of the maxlag parameter.
  391. *
  392. * @return integer The maxlag value in seconds
  393. */
  394. public function getMaxlag()
  395. {
  396. return $this->maxlag;
  397. }
  398. /**
  399. * Sets the new value of the maxlag parameter.
  400. *
  401. * @param integer $ml The new maxlag value in seconds
  402. * @return Wikimate This object
  403. */
  404. public function setMaxlag($ml)
  405. {
  406. $this->maxlag = (int)$ml;
  407. return $this;
  408. }
  409. /**
  410. * Gets the current value of the max retries limit.
  411. *
  412. * @return integer The max retries limit
  413. */
  414. public function getMaxretries()
  415. {
  416. return $this->maxretries;
  417. }
  418. /**
  419. * Sets the new value of the max retries limit.
  420. *
  421. * @param integer $mr The new max retries limit
  422. * @return Wikimate This object
  423. */
  424. public function setMaxretries($mr)
  425. {
  426. $this->maxretries = (int)$mr;
  427. return $this;
  428. }
  429. /**
  430. * Gets the user agent for API requests.
  431. *
  432. * @return string The default user agent, or the current one defined
  433. * by {@see Wikimate::setUserAgent()}
  434. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Etiquette#The_User-Agent_header
  435. */
  436. public function getUserAgent()
  437. {
  438. return $this->useragent;
  439. }
  440. /**
  441. * Sets the user agent for API requests.
  442. *
  443. * In order to use a custom user agent for all requests in the session,
  444. * call this method before invoking {@see Wikimate::login()}.
  445. *
  446. * @param string $ua The new user agent
  447. * @return Wikimate This object
  448. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Etiquette#The_User-Agent_header
  449. */
  450. public function setUserAgent($ua)
  451. {
  452. $this->useragent = (string)$ua;
  453. // Update the session
  454. $this->session->useragent = $this->useragent;
  455. return $this;
  456. }
  457. /**
  458. * Sets the debug mode.
  459. *
  460. * @param boolean $b True to turn debugging on
  461. * @return Wikimate This object
  462. */
  463. public function setDebugMode($b)
  464. {
  465. $this->debugMode = $b;
  466. return $this;
  467. }
  468. /**
  469. * Gets or prints the Requests configuration.
  470. *
  471. * @param boolean $echo Whether to echo the session options and headers
  472. * @return mixed Options array if $echo is false, or
  473. * True if options/headers have been echoed to STDOUT
  474. */
  475. public function debugRequestsConfig($echo = false)
  476. {
  477. if ($echo) {
  478. echo "<pre>Requests options:\n";
  479. print_r($this->session->options);
  480. echo "Requests headers:\n";
  481. print_r($this->session->headers);
  482. echo "</pre>";
  483. return true;
  484. }
  485. return $this->session->options;
  486. }
  487. /**
  488. * Returns a WikiPage object populated with the page data.
  489. *
  490. * @param string $title The name of the wiki article
  491. * @return WikiPage The page object
  492. */
  493. public function getPage($title)
  494. {
  495. return new WikiPage($title, $this);
  496. }
  497. /**
  498. * Returns a WikiFile object populated with the file data.
  499. *
  500. * @param string $filename The name of the wiki file
  501. * @return WikiFile The file object
  502. */
  503. public function getFile($filename)
  504. {
  505. return new WikiFile($filename, $this);
  506. }
  507. /**
  508. * Performs a query to the wiki API with the given details.
  509. *
  510. * @param array $array Array of details to be passed in the query
  511. * @return array Decoded JSON output from the wiki API
  512. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Query
  513. */
  514. public function query($array)
  515. {
  516. $array['action'] = 'query';
  517. return $this->request($array);
  518. }
  519. /**
  520. * Performs a parse query to the wiki API.
  521. *
  522. * @param array $array Array of details to be passed in the query
  523. * @return array Decoded JSON output from the wiki API
  524. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Parsing_wikitext
  525. */
  526. public function parse($array)
  527. {
  528. $array['action'] = 'parse';
  529. return $this->request($array);
  530. }
  531. /**
  532. * Perfoms an edit query to the wiki API.
  533. *
  534. * @param array $array Array of details to be passed in the query
  535. * @return array|boolean Decoded JSON output from the wiki API
  536. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Edit
  537. */
  538. public function edit($array)
  539. {
  540. // Obtain default token first
  541. if (($edittoken = $this->token()) === null) {
  542. return false;
  543. }
  544. $headers = array(
  545. 'Content-Type' => "application/x-www-form-urlencoded"
  546. );
  547. $array['action'] = 'edit';
  548. $array['token'] = $edittoken;
  549. return $this->request($array, $headers, true);
  550. }
  551. /**
  552. * Perfoms a delete query to the wiki API.
  553. *
  554. * @param array $array Array of details to be passed in the query
  555. * @return array|boolean Decoded JSON output from the wiki API
  556. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Delete
  557. */
  558. public function delete($array)
  559. {
  560. // Obtain default token first
  561. if (($deletetoken = $this->token()) === null) {
  562. return false;
  563. }
  564. $headers = array(
  565. 'Content-Type' => "application/x-www-form-urlencoded"
  566. );
  567. $array['action'] = 'delete';
  568. $array['token'] = $deletetoken;
  569. return $this->request($array, $headers, true);
  570. }
  571. /**
  572. * Downloads data from the given URL.
  573. *
  574. * @param string $url The URL to download from
  575. * @return mixed The downloaded data (string), or null if error
  576. */
  577. public function download($url)
  578. {
  579. $getResult = $this->session->get($url);
  580. if (!$getResult->success) {
  581. // Debug logging of Requests_Response only on failed download
  582. if ($this->debugMode) {
  583. echo "download GET response:\n";
  584. print_r($getResult);
  585. }
  586. $this->error = array();
  587. $this->error['file'] = 'Download error (HTTP status: ' . $getResult->status_code . ')';
  588. $this->error['http'] = $getResult->status_code;
  589. return null;
  590. }
  591. return $getResult->body;
  592. }
  593. /**
  594. * Uploads a file to the wiki API.
  595. *
  596. * @param array $array Array of details to be used in the upload
  597. * @return array|boolean Decoded JSON output from the wiki API
  598. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Upload
  599. */
  600. public function upload($array)
  601. {
  602. // Obtain default token first
  603. if (($uploadtoken = $this->token()) === null) {
  604. return false;
  605. }
  606. $array['action'] = 'upload';
  607. $array['format'] = 'json';
  608. $array['maxlag'] = $this->getMaxlag();
  609. $array['token'] = $uploadtoken;
  610. // Construct multipart body:
  611. // https://www.mediawiki.org/w/index.php?title=API:Upload&oldid=2293685#Sample_Raw_Upload
  612. // https://www.mediawiki.org/w/index.php?title=API:Upload&oldid=2339771#Sample_Raw_POST_of_a_single_chunk
  613. $boundary = '---Wikimate-' . md5(microtime());
  614. $body = '';
  615. foreach ($array as $fieldName => $fieldData) {
  616. $body .= "--{$boundary}\r\n";
  617. $body .= 'Content-Disposition: form-data; name="' . $fieldName . '"';
  618. // Process the (binary) file
  619. if ($fieldName == 'file') {
  620. $body .= '; filename="' . $array['filename'] . '"' . "\r\n";
  621. $body .= "Content-Type: application/octet-stream; charset=UTF-8\r\n";
  622. $body .= "Content-Transfer-Encoding: binary\r\n";
  623. // Process text parameters
  624. } else {
  625. $body .= "\r\n";
  626. $body .= "Content-Type: text/plain; charset=UTF-8\r\n";
  627. $body .= "Content-Transfer-Encoding: 8bit\r\n";
  628. }
  629. $body .= "\r\n{$fieldData}\r\n";
  630. }
  631. $body .= "--{$boundary}--\r\n";
  632. // Construct multipart headers
  633. $headers = array(
  634. 'Content-Type' => "multipart/form-data; boundary={$boundary}",
  635. 'Content-Length' => strlen($body),
  636. );
  637. return $this->request($body, $headers, true);
  638. }
  639. /**
  640. * Performs a file revert query to the wiki API.
  641. *
  642. * @param array $array Array of details to be passed in the query
  643. * @return array|boolean Decoded JSON output from the wiki API
  644. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Filerevert
  645. */
  646. public function filerevert($array)
  647. {
  648. // Obtain default token first
  649. if (($reverttoken = $this->token()) === null) {
  650. return false;
  651. }
  652. $array['action'] = 'filerevert';
  653. $array['token'] = $reverttoken;
  654. $headers = array(
  655. 'Content-Type' => "application/x-www-form-urlencoded"
  656. );
  657. return $this->request($array, $headers, true);
  658. }
  659. /**
  660. * Returns the latest error if there is one.
  661. *
  662. * @return mixed The error array, or null if no error
  663. */
  664. public function getError()
  665. {
  666. return $this->error;
  667. }
  668. }
  669. /**
  670. * Defines Wikimate's exception for unexpected run-time errors
  671. * while communicating with the API.
  672. * WikimateException can be thrown only from Wikimate::request(),
  673. * and is propagated to callers of this library.
  674. *
  675. * @author Frans P. de Vries
  676. * @since 1.0.0 August 2021
  677. * @link https://www.php.net/manual/en/class.runtimeexception.php
  678. */
  679. class WikimateException extends RuntimeException
  680. {
  681. }
  682. /**
  683. * Models a wiki article page that can have its text altered and retrieved.
  684. *
  685. * @author Robert McLeod & Frans P. de Vries
  686. * @since 0.2 December 2010
  687. */
  688. class WikiPage
  689. {
  690. /**
  691. * Use section indexes as keys in return array of {@see WikiPage::getAllSections()}
  692. *
  693. * @var integer
  694. */
  695. const SECTIONLIST_BY_INDEX = 1;
  696. /**
  697. * Use section names as keys in return array of {@see WikiPage::getAllSections()}
  698. *
  699. * @var integer
  700. */
  701. const SECTIONLIST_BY_NAME = 2;
  702. /**
  703. * The title of the page
  704. *
  705. * @var string|null
  706. */
  707. protected $title = null;
  708. /**
  709. * Wikimate object for API requests
  710. *
  711. * @var Wikimate|null
  712. */
  713. protected $wikimate = null;
  714. /**
  715. * Whether the page exists
  716. *
  717. * @var boolean
  718. */
  719. protected $exists = false;
  720. /**
  721. * Whether the page is invalid
  722. *
  723. * @var boolean
  724. */
  725. protected $invalid = false;
  726. /**
  727. * Error array with API and WikiPage errors
  728. *
  729. * @var array|null
  730. */
  731. protected $error = null;
  732. /**
  733. * Stores the timestamp for detection of edit conflicts
  734. *
  735. * @var integer|null
  736. */
  737. protected $starttimestamp = null;
  738. /**
  739. * The complete text of the page
  740. *
  741. * @var string|null
  742. */
  743. protected $text = null;
  744. /**
  745. * The sections object for the page
  746. *
  747. * @var stdClass|null
  748. */
  749. protected $sections = null;
  750. /*
  751. *
  752. * Magic methods
  753. *
  754. */
  755. /**
  756. * Constructs a WikiPage object from the title given
  757. * and associate with the passed Wikimate object.
  758. *
  759. * @param string $title Name of the wiki article
  760. * @param Wikimate $wikimate Wikimate object
  761. */
  762. public function __construct($title, $wikimate)
  763. {
  764. $this->wikimate = $wikimate;
  765. $this->title = $title;
  766. $this->sections = new \stdClass();
  767. $this->text = $this->getText(true);
  768. if ($this->invalid) {
  769. $this->error['page'] = 'Invalid page title - cannot create WikiPage';
  770. }
  771. }
  772. /**
  773. * Forgets all object properties.
  774. */
  775. public function __destruct()
  776. {
  777. $this->title = null;
  778. $this->wikimate = null;
  779. $this->exists = false;
  780. $this->invalid = false;
  781. $this->error = null;
  782. $this->starttimestamp = null;
  783. $this->text = null;
  784. $this->sections = null;
  785. }
  786. /**
  787. * Returns the wikicode of the page.
  788. *
  789. * @return string String of wikicode
  790. */
  791. public function __toString()
  792. {
  793. return $this->text;
  794. }
  795. /**
  796. * Returns an array sections with the section name as the key
  797. * and the text as the element, e.g.
  798. *
  799. * array(
  800. * 'intro' => 'this text is the introduction',
  801. * 'History' => 'this is text under the history section'
  802. *)
  803. *
  804. * @return array Array of sections
  805. */
  806. public function __invoke()
  807. {
  808. return $this->getAllSections(false, self::SECTIONLIST_BY_NAME);
  809. }
  810. /**
  811. * Returns the page existence status.
  812. *
  813. * @return boolean True if page exists
  814. */
  815. public function exists()
  816. {
  817. return $this->exists;
  818. }
  819. /**
  820. * Alias of self::__destruct().
  821. *
  822. * @return void
  823. */
  824. public function destroy()
  825. {
  826. $this->__destruct();
  827. }
  828. /*
  829. *
  830. * Page meta methods
  831. *
  832. */
  833. /**
  834. * Returns the latest error if there is one.
  835. *
  836. * @return mixed The error array, or null if no error
  837. */
  838. public function getError()
  839. {
  840. return $this->error;
  841. }
  842. /**
  843. * Returns the title of this page.
  844. *
  845. * @return string The title of this page
  846. */
  847. public function getTitle()
  848. {
  849. return $this->title;
  850. }
  851. /**
  852. * Returns the number of sections in this page.
  853. *
  854. * @return integer The number of sections in this page
  855. */
  856. public function getNumSections()
  857. {
  858. return count($this->sections->byIndex);
  859. }
  860. /**
  861. * Returns the sections offsets and lengths.
  862. *
  863. * @return stdClass Section class
  864. */
  865. public function getSectionOffsets()
  866. {
  867. return $this->sections;
  868. }
  869. /*
  870. *
  871. * Getter methods
  872. *
  873. */
  874. /**
  875. * Gets the text of the page. If refresh is true,
  876. * then this method will query the wiki API again for the page details.
  877. *
  878. * @param boolean $refresh True to query the wiki API again
  879. * @return mixed The text of the page (string), or null if error
  880. */
  881. public function getText($refresh = false)
  882. {
  883. if ($refresh) { // We want to query the API
  884. // Specify relevant page properties to retrieve
  885. $data = array(
  886. 'titles' => $this->title,
  887. 'prop' => 'info|revisions',
  888. 'rvprop' => 'content', // Need to get page text
  889. 'curtimestamp' => 1,
  890. );
  891. $r = $this->wikimate->query($data); // Run the query
  892. // Check for errors
  893. if (isset($r['error'])) {
  894. $this->error = $r['error']; // Set the error if there was one
  895. return null;
  896. } else {
  897. $this->error = null; // Reset the error status
  898. }
  899. // Get the page (there should only be one)
  900. $page = array_pop($r['query']['pages']);
  901. // Abort if invalid page title
  902. if (isset($page['invalid'])) {
  903. $this->invalid = true;
  904. return null;
  905. }
  906. $this->starttimestamp = $r['curtimestamp'];
  907. unset($r, $data);
  908. if (!isset($page['missing'])) {
  909. // Update the existence if the page is there
  910. $this->exists = true;
  911. // Put the content into text
  912. $this->text = $page['revisions'][0]['*'];
  913. }
  914. unset($page);
  915. // Now we need to get the section headers, if any
  916. preg_match_all('/(={1,6}).*?\1 *(?:\n|$)/', $this->text, $matches);
  917. // Set the intro section (between title and first section)
  918. $this->sections->byIndex[0]['offset'] = 0;
  919. $this->sections->byName['intro']['offset'] = 0;
  920. // Check for section header matches
  921. if (empty($matches[0])) {
  922. // Define lengths for page consisting only of intro section
  923. $this->sections->byIndex[0]['length'] = strlen($this->text);
  924. $this->sections->byName['intro']['length'] = strlen($this->text);
  925. } else {
  926. // Array of section header matches
  927. $sections = $matches[0];
  928. // Set up the current section
  929. $currIndex = 0;
  930. $currName = 'intro';
  931. // Collect offsets and lengths from section header matches
  932. foreach ($sections as $section) {
  933. // Get the current offset
  934. $currOffset = strpos($this->text, $section, $this->sections->byIndex[$currIndex]['offset']);
  935. // Are we still on the first section?
  936. if ($currIndex == 0) {
  937. $this->sections->byIndex[$currIndex]['length'] = $currOffset;
  938. $this->sections->byIndex[$currIndex]['depth'] = 0;
  939. $this->sections->byName[$currName]['length'] = $currOffset;
  940. $this->sections->byName[$currName]['depth'] = 0;
  941. }
  942. // Get the current name and index
  943. $currName = trim(str_replace('=', '', $section));
  944. $currIndex++;
  945. // Search for existing name and create unique one
  946. $cName = $currName;
  947. for ($seq = 2; array_key_exists($cName, $this->sections->byName); $seq++) {
  948. $cName = $currName . '_' . $seq;
  949. }
  950. if ($seq > 2) {
  951. $currName = $cName;
  952. }
  953. // Set the offset and depth (from the matched ='s) for the current section
  954. $this->sections->byIndex[$currIndex]['offset'] = $currOffset;
  955. $this->sections->byIndex[$currIndex]['depth'] = strlen($matches[1][$currIndex - 1]);
  956. $this->sections->byName[$currName]['offset'] = $currOffset;
  957. $this->sections->byName[$currName]['depth'] = strlen($matches[1][$currIndex - 1]);
  958. // If there is a section after this, set the length of this one
  959. if (isset($sections[$currIndex])) {
  960. // Get the offset of the next section
  961. $nextOffset = strpos($this->text, $sections[$currIndex], $currOffset);
  962. // Calculate the length of this one
  963. $length = $nextOffset - $currOffset;
  964. // Set the length of this section
  965. $this->sections->byIndex[$currIndex]['length'] = $length;
  966. $this->sections->byName[$currName]['length'] = $length;
  967. } else {
  968. // Set the length of last section
  969. $this->sections->byIndex[$currIndex]['length'] = strlen($this->text) - $currOffset;
  970. $this->sections->byName[$currName]['length'] = strlen($this->text) - $currOffset;
  971. }
  972. }
  973. }
  974. }
  975. return $this->text; // Return the text in any case
  976. }
  977. /**
  978. * Returns the requested section, with its subsections, if any.
  979. *
  980. * Section can be the following:
  981. * - section name (string, e.g. "History")
  982. * - section index (int, e.g. 3)
  983. *
  984. * @param mixed $section The section to get
  985. * @param boolean $includeHeading False to get section text only,
  986. * true to include heading too
  987. * @param boolean $includeSubsections False to get section text only,
  988. * true to include subsections too
  989. * @return mixed Wikitext of the section on the page,
  990. * or null if section is undefined
  991. */
  992. public function getSection($section, $includeHeading = false, $includeSubsections = true)
  993. {
  994. // Check if we have a section name or index
  995. if (is_int($section)) {
  996. if (!isset($this->sections->byIndex[$section])) {
  997. return null;
  998. }
  999. $coords = $this->sections->byIndex[$section];
  1000. } elseif (is_string($section)) {
  1001. if (!isset($this->sections->byName[$section])) {
  1002. return null;
  1003. }
  1004. $coords = $this->sections->byName[$section];
  1005. } else {
  1006. $coords = array();
  1007. }
  1008. // Extract the offset, depth and (initial) length
  1009. @extract($coords);
  1010. // Find subsections if requested, and not the intro
  1011. if ($includeSubsections && $offset > 0) {
  1012. $found = false;
  1013. foreach ($this->sections->byName as $section) {
  1014. if ($found) {
  1015. // Include length of this subsection
  1016. if ($depth < $section['depth']) {
  1017. $length += $section['length'];
  1018. // Done if not a subsection
  1019. } else {
  1020. break;
  1021. }
  1022. } else {
  1023. // Found our section if same offset
  1024. if ($offset == $section['offset']) {
  1025. $found = true;
  1026. }
  1027. }
  1028. }
  1029. }
  1030. // Extract text of section, and its subsections if requested
  1031. $text = substr($this->text, $offset, $length);
  1032. // Whack off the heading if requested, and not the intro
  1033. if (!$includeHeading && $offset > 0) {
  1034. // Chop off the first line
  1035. $text = substr($text, strpos($text, "\n"));
  1036. }
  1037. return $text;
  1038. }
  1039. /**
  1040. * Returns all the sections of the page in an array - the key names can be
  1041. * set to name or index by using the following for the second param:
  1042. * - self::SECTIONLIST_BY_NAME
  1043. * - self::SECTIONLIST_BY_INDEX
  1044. *
  1045. * @param boolean $includeHeading False to get section text only
  1046. * @param integer $keyNames Modifier for the array key names
  1047. * @return array Array of sections
  1048. * @throw UnexpectedValueException If $keyNames is not a supported constant
  1049. */
  1050. public function getAllSections($includeHeading = false, $keyNames = self::SECTIONLIST_BY_INDEX)
  1051. {
  1052. $sections = array();
  1053. switch ($keyNames) {
  1054. case self::SECTIONLIST_BY_INDEX:
  1055. $array = array_keys($this->sections->byIndex);
  1056. break;
  1057. case self::SECTIONLIST_BY_NAME:
  1058. $array = array_keys($this->sections->byName);
  1059. break;
  1060. default:
  1061. throw new \UnexpectedValueException("Unexpected keyNames parameter " .
  1062. "($keyNames) passed to WikiPage::getAllSections()");
  1063. }
  1064. foreach ($array as $key) {
  1065. $sections[$key] = $this->getSection($key, $includeHeading);
  1066. }
  1067. return $sections;
  1068. }
  1069. /*
  1070. *
  1071. * Setter methods
  1072. *
  1073. */
  1074. /**
  1075. * Sets the text in the page. Updates the starttimestamp to the timestamp
  1076. * after the page edit (if the edit is successful).
  1077. *
  1078. * Section can be the following:
  1079. * - section name (string, e.g. "History")
  1080. * - section index (int, e.g. 3)
  1081. * - a new section (the string "new")
  1082. * - the whole page (null)
  1083. *
  1084. * @param string $text The article text
  1085. * @param string $section The section to edit (whole page by default)
  1086. * @param boolean $minor True for minor edit
  1087. * @param string $summary Summary text, and section header in case
  1088. * of new section
  1089. * @return boolean True if page was edited successfully
  1090. */
  1091. public function setText($text, $section = null, $minor = false, $summary = null)
  1092. {
  1093. $data = array(
  1094. 'title' => $this->title,
  1095. 'text' => $text,
  1096. 'md5' => md5($text),
  1097. 'bot' => "true",
  1098. 'starttimestamp' => $this->starttimestamp,
  1099. );
  1100. // Set options from arguments
  1101. if (!is_null($section)) {
  1102. // Obtain section index in case it is a name
  1103. $data['section'] = $this->findSection($section);
  1104. if ($data['section'] == -1) {
  1105. return false;
  1106. }
  1107. }
  1108. if ($minor) {
  1109. $data['minor'] = $minor;
  1110. }
  1111. if (!is_null($summary)) {
  1112. $data['summary'] = $summary;
  1113. }
  1114. // Make sure we don't create a page by accident or overwrite another one
  1115. if (!$this->exists) {
  1116. $data['createonly'] = "true"; // createonly if not exists
  1117. } else {
  1118. $data['nocreate'] = "true"; // Don't create, it should exist
  1119. }
  1120. $r = $this->wikimate->edit($data); // The edit query
  1121. // Check if it worked
  1122. if (isset($r['edit']['result']) && $r['edit']['result'] == 'Success') {
  1123. $this->exists = true;
  1124. if (is_null($section)) {
  1125. $this->text = $text;
  1126. }
  1127. // Get the new starttimestamp
  1128. $data = array(
  1129. 'titles' => $this->title,
  1130. 'prop' => 'info',
  1131. 'curtimestamp' => 1,
  1132. );
  1133. $r = $this->wikimate->query($data);
  1134. // Check for errors
  1135. if (isset($r['error'])) {
  1136. $this->error = $r['error']; // Set the error if there was one
  1137. return false;
  1138. } else {
  1139. $this->error = null; // Reset the error status
  1140. }
  1141. $this->starttimestamp = $r['curtimestamp']; // Update the starttimestamp
  1142. return true;
  1143. }
  1144. // Return error response
  1145. if (isset($r['error'])) {
  1146. $this->error = $r['error'];
  1147. } else {
  1148. $this->error = array();
  1149. $this->error['page'] = 'Unexpected edit response: ' . $r['edit']['result'];
  1150. }
  1151. return false;
  1152. }
  1153. /**
  1154. * Sets the text of the given section.
  1155. * Essentially an alias of WikiPage:setText()
  1156. * with the summary and minor parameters switched.
  1157. *
  1158. * Section can be the following:
  1159. * - section name (string, e.g. "History")
  1160. * - section index (int, e.g. 3)
  1161. * - a new section (the string "new")
  1162. * - the whole page (null)
  1163. *
  1164. * @param string $text The text of the section
  1165. * @param mixed $section The section to edit (intro by default)
  1166. * @param string $summary Summary text, and section header in case
  1167. * of new section
  1168. * @param boolean $minor True for minor edit
  1169. * @return boolean True if the section was saved
  1170. */
  1171. public function setSection($text, $section = 0, $summary = null, $minor = false)
  1172. {
  1173. return $this->setText($text, $section, $minor, $summary);
  1174. }
  1175. /**
  1176. * Alias of WikiPage::setSection() specifically for creating new sections.
  1177. *
  1178. * @param string $name The heading name for the new section
  1179. * @param string $text The text of the new section
  1180. * @return boolean True if the section was saved
  1181. */
  1182. public function newSection($name, $text)
  1183. {
  1184. return $this->setSection($text, 'new', $name, false);
  1185. }
  1186. /**
  1187. * Deletes the page.
  1188. *
  1189. * @param string $reason Reason for the deletion
  1190. * @return boolean True if page was deleted successfully
  1191. */
  1192. public function delete($reason = null)
  1193. {
  1194. $data = array(
  1195. 'title' => $this->title,
  1196. );
  1197. // Set options from arguments
  1198. if (!is_null($reason)) {
  1199. $data['reason'] = $reason;
  1200. }
  1201. $r = $this->wikimate->delete($data); // The delete query
  1202. // Check if it worked
  1203. if (isset($r['delete'])) {
  1204. $this->exists = false; // The page was deleted
  1205. $this->error = null; // Reset the error status
  1206. return true;
  1207. }
  1208. $this->error = $r['error']; // Return error response
  1209. return false;
  1210. }
  1211. /*
  1212. *
  1213. * Private methods
  1214. *
  1215. */
  1216. /**
  1217. * Finds a section's index by name.
  1218. * If a section index or 'new' is passed, it is returned directly.
  1219. *
  1220. * @param mixed $section The section name or index to find
  1221. * @return mixed The section index, or -1 if not found
  1222. */
  1223. private function findSection($section)
  1224. {
  1225. // Check section type
  1226. if (is_int($section) || $section === 'new') {
  1227. return $section;
  1228. } elseif (is_string($section)) {
  1229. // Search section names for related index
  1230. $sections = array_keys($this->sections->byName);
  1231. $index = array_search($section, $sections);
  1232. // Return index if found
  1233. if ($index !== false) {
  1234. return $index;
  1235. }
  1236. }
  1237. // Return error message and value
  1238. $this->error = array();
  1239. $this->error['page'] = "Section '$section' was not found on this page";
  1240. return -1;
  1241. }
  1242. }
  1243. /**
  1244. * Models a wiki file that can have its properties retrieved and
  1245. * its contents downloaded and uploaded.
  1246. * All properties pertain to the current revision of the file.
  1247. *
  1248. * @author Robert McLeod & Frans P. de Vries
  1249. * @since 0.12.0 October 2016
  1250. */
  1251. class WikiFile
  1252. {
  1253. /**
  1254. * The name of the file
  1255. *
  1256. * @var string|null
  1257. */
  1258. protected $filename = null;
  1259. /**
  1260. * Wikimate object for API requests
  1261. *
  1262. * @var Wikimate|null
  1263. */
  1264. protected $wikimate = null;
  1265. /**
  1266. * Whether the file exists
  1267. *
  1268. * @var boolean
  1269. */
  1270. protected $exists = false;
  1271. /**
  1272. * Whether the file is invalid
  1273. *
  1274. * @var boolean
  1275. */
  1276. protected $invalid = false;
  1277. /**
  1278. * Error array with API and WikiFile errors
  1279. *
  1280. * @var array|null
  1281. */
  1282. protected $error = null;
  1283. /**
  1284. * Image info for the current file revision
  1285. *
  1286. * @var array|null
  1287. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Imageinfo
  1288. */
  1289. protected $info = null;
  1290. /**
  1291. * Image info for all file revisions
  1292. *
  1293. * @var array|null
  1294. * @link https://www.mediawiki.org/wiki/Special:MyLanguage/API:Imageinfo
  1295. */
  1296. protected $history = null;
  1297. /*
  1298. *
  1299. * Magic methods
  1300. *
  1301. */
  1302. /**
  1303. * Constructs a WikiFile object from the filename given
  1304. * and associate with the passed Wikimate object.
  1305. *
  1306. * @param string $filename Name of the wiki file
  1307. * @param Wikimate $wikimate Wikimate object
  1308. */
  1309. public function __construct($filename, $wikimate)
  1310. {
  1311. $this->wikimate = $wikimate;
  1312. $this->filename = $filename;
  1313. $this->info = $this->getInfo(true);
  1314. if ($this->invalid) {
  1315. $this->error['file'] = 'Invalid filename - cannot create WikiFile';
  1316. }
  1317. }
  1318. /**
  1319. * Forgets all object properties.
  1320. */
  1321. public function __destruct()
  1322. {
  1323. $this->filename = null;
  1324. $this->wikimate = null;
  1325. $this->exists = false;
  1326. $this->invalid = false;
  1327. $this->error = null;
  1328. $this->info = null;
  1329. $this->history = null;
  1330. }
  1331. /**
  1332. * Returns the file existence status.
  1333. *
  1334. * @return boolean True if file exists
  1335. */
  1336. public function exists()
  1337. {
  1338. return $this->exists;
  1339. }
  1340. /**
  1341. * Alias of self::__destruct().
  1342. *
  1343. * @return void
  1344. */
  1345. public function destroy()
  1346. {
  1347. $this->__destruct();
  1348. }
  1349. /*
  1350. *
  1351. * File meta methods
  1352. *
  1353. */
  1354. /**
  1355. * Returns the latest error if there is one.
  1356. *
  1357. * @return mixed The error array, or null if no error
  1358. */
  1359. public function getError()
  1360. {
  1361. return $this->error;
  1362. }
  1363. /**
  1364. * Returns the name of this file.
  1365. *
  1366. * @return string The name of this file
  1367. */
  1368. public function getFilename()
  1369. {
  1370. return $this->filename;
  1371. }
  1372. /*
  1373. *
  1374. * Getter methods
  1375. *
  1376. */
  1377. /**
  1378. * Gets the information of the file. If refresh is true,
  1379. * then this method will query the wiki API again for the file details.
  1380. *
  1381. * @param boolean $refresh True to query the wiki API again
  1382. * @param array $history An optional array of revision history parameters
  1383. * @return mixed The info of the file (array), or null if error
  1384. */
  1385. public function getInfo($refresh = false, $history = null)
  1386. {
  1387. if ($refresh) { // We want to query the API
  1388. // Specify relevant file properties to retrieve
  1389. $data = array(
  1390. 'titles' => 'File:' . $this->filename,
  1391. 'prop' => 'info|imageinfo',
  1392. 'iiprop' => 'bitdepth|canonicaltitle|comment|parsedcomment|'
  1393. . 'commonmetadata|metadata|extmetadata|mediatype|'
  1394. . 'mime|thumbmime|sha1|size|timestamp|url|user|userid',
  1395. );
  1396. // Add optional history parameters
  1397. if (is_array($history)) {
  1398. foreach ($history as $key => $val) {
  1399. $data[$key] = $val;
  1400. }
  1401. // Retrieve archive name property as well
  1402. $data['iiprop'] .= '|archivename';
  1403. }
  1404. $r = $this->wikimate->query($data); // Run the query
  1405. // Check for errors
  1406. if (isset($r['error'])) {
  1407. $this->error = $r['error']; // Set the error if there was one
  1408. return null;
  1409. } else {
  1410. $this->error = null; // Reset the error status
  1411. }
  1412. // Get the page (there should only be one)
  1413. $page = array_pop($r['query']['pages']);
  1414. unset($r, $data);
  1415. // Abort if invalid file title
  1416. if (isset($page['invalid'])) {
  1417. $this->invalid = true;
  1418. return null;
  1419. }
  1420. // Check that file is present and has info
  1421. if (!isset($page['missing']) && isset($page['imageinfo'])) {
  1422. // Update the existence if the file is there
  1423. $this->exists = true;
  1424. // Put the content into info & history
  1425. $this->info = $page['imageinfo'][0];
  1426. $this->history = $page['imageinfo'];
  1427. }
  1428. unset($page);
  1429. }
  1430. return $this->info; // Return the info in any case
  1431. }
  1432. /**
  1433. * Returns the anonymous flag of this file,
  1434. * or of its specified revision.
  1435. * If true, then getUser()'s value represents an anonymous IP address.
  1436. *
  1437. * @param mixed $revision The index or timestamp of the revision (optional)
  1438. * @return mixed The anonymous flag of this file (boolean),

Large files files are truncated, but you can click here to view the full file