PageRenderTime 47ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/dump/src/main/mediawiki/includes/api/ApiParse.php

https://github.com/SunghanKim/extraction-framework
PHP | 625 lines | 502 code | 69 blank | 54 comment | 63 complexity | d520d811641a6e0e84542e677d0a9b33 MD5 | raw file
  1. <?php
  2. /**
  3. * Created on Dec 01, 2007
  4. *
  5. * Copyright Š 2007 Yuri Astrakhan <Firstname><Lastname>@gmail.com
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License along
  18. * with this program; if not, write to the Free Software Foundation, Inc.,
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20. * http://www.gnu.org/copyleft/gpl.html
  21. *
  22. * @file
  23. */
  24. /**
  25. * @ingroup API
  26. */
  27. class ApiParse extends ApiBase {
  28. private $section, $text, $pstText = null;
  29. public function __construct( $main, $action ) {
  30. parent::__construct( $main, $action );
  31. }
  32. public function execute() {
  33. // The data is hot but user-dependent, like page views, so we set vary cookies
  34. $this->getMain()->setCacheMode( 'anon-public-user-private' );
  35. // Get parameters
  36. $params = $this->extractRequestParams();
  37. $text = $params['text'];
  38. $title = $params['title'];
  39. $page = $params['page'];
  40. $pageid = $params['pageid'];
  41. $oldid = $params['oldid'];
  42. if ( !is_null( $page ) && ( !is_null( $text ) || $title != 'API' ) ) {
  43. $this->dieUsage( 'The page parameter cannot be used together with the text and title parameters', 'params' );
  44. }
  45. $prop = array_flip( $params['prop'] );
  46. if ( isset( $params['section'] ) ) {
  47. $this->section = $params['section'];
  48. } else {
  49. $this->section = false;
  50. }
  51. // The parser needs $wgTitle to be set, apparently the
  52. // $title parameter in Parser::parse isn't enough *sigh*
  53. // TODO: Does this still need $wgTitle?
  54. global $wgParser, $wgTitle;
  55. // Currently unnecessary, code to act as a safeguard against any change in current behaviour of uselang breaks
  56. $oldLang = null;
  57. if ( isset( $params['uselang'] ) && $params['uselang'] != $this->getContext()->getLanguage()->getCode() ) {
  58. $oldLang = $this->getContext()->getLanguage(); // Backup language
  59. $this->getContext()->setLanguage( Language::factory( $params['uselang'] ) );
  60. }
  61. $popts = ParserOptions::newFromContext( $this->getContext() );
  62. $popts->setTidy( true );
  63. $popts->enableLimitReport( !$params['disablepp'] );
  64. $redirValues = null;
  65. // Return result
  66. $result = $this->getResult();
  67. if ( !is_null( $oldid ) || !is_null( $pageid ) || !is_null( $page ) ) {
  68. if ( !is_null( $oldid ) ) {
  69. // Don't use the parser cache
  70. $rev = Revision::newFromID( $oldid );
  71. if ( !$rev ) {
  72. $this->dieUsage( "There is no revision ID $oldid", 'missingrev' );
  73. }
  74. if ( !$rev->userCan( Revision::DELETED_TEXT, $this->getUser() ) ) {
  75. $this->dieUsage( "You don't have permission to view deleted revisions", 'permissiondenied' );
  76. }
  77. $titleObj = $rev->getTitle();
  78. $wgTitle = $titleObj;
  79. // If for some reason the "oldid" is actually the current revision, it may be cached
  80. if ( $titleObj->getLatestRevID() === intval( $oldid ) ) {
  81. // May get from/save to parser cache
  82. $p_result = $this->getParsedSectionOrText( $titleObj, $popts, $pageid,
  83. isset( $prop['wikitext'] ) ) ;
  84. } else { // This is an old revision, so get the text differently
  85. $this->text = $rev->getText( Revision::FOR_THIS_USER, $this->getUser() );
  86. if ( $this->section !== false ) {
  87. $this->text = $this->getSectionText( $this->text, 'r' . $rev->getId() );
  88. }
  89. // Should we save old revision parses to the parser cache?
  90. $p_result = $wgParser->parse( $this->text, $titleObj, $popts );
  91. }
  92. } else { // Not $oldid, but $pageid or $page
  93. if ( $params['redirects'] ) {
  94. $reqParams = array(
  95. 'action' => 'query',
  96. 'redirects' => '',
  97. );
  98. if ( !is_null ( $pageid ) ) {
  99. $reqParams['pageids'] = $pageid;
  100. } else { // $page
  101. $reqParams['titles'] = $page;
  102. }
  103. $req = new FauxRequest( $reqParams );
  104. $main = new ApiMain( $req );
  105. $main->execute();
  106. $data = $main->getResultData();
  107. $redirValues = isset( $data['query']['redirects'] )
  108. ? $data['query']['redirects']
  109. : array();
  110. $to = $page;
  111. foreach ( (array)$redirValues as $r ) {
  112. $to = $r['to'];
  113. }
  114. $titleObj = Title::newFromText( $to );
  115. } else {
  116. if ( !is_null ( $pageid ) ) {
  117. $reqParams['pageids'] = $pageid;
  118. $titleObj = Title::newFromID( $pageid );
  119. } else { // $page
  120. $to = $page;
  121. $titleObj = Title::newFromText( $to );
  122. }
  123. }
  124. if ( !is_null ( $pageid ) ) {
  125. if ( !$titleObj ) {
  126. // Still throw nosuchpageid error if pageid was provided
  127. $this->dieUsageMsg( array( 'nosuchpageid', $pageid ) );
  128. }
  129. } elseif ( !$titleObj || !$titleObj->exists() ) {
  130. $this->dieUsage( "The page you specified doesn't exist", 'missingtitle' );
  131. }
  132. $wgTitle = $titleObj;
  133. if ( isset( $prop['revid'] ) ) {
  134. $oldid = $titleObj->getLatestRevID();
  135. }
  136. // Potentially cached
  137. $p_result = $this->getParsedSectionOrText( $titleObj, $popts, $pageid,
  138. isset( $prop['wikitext'] ) ) ;
  139. }
  140. } else { // Not $oldid, $pageid, $page. Hence based on $text
  141. if ( is_null( $text ) ) {
  142. $this->dieUsage( 'The text parameter should be passed with the title parameter. Should you be using the "page" parameter instead?', 'params' );
  143. }
  144. $this->text = $text;
  145. $titleObj = Title::newFromText( $title );
  146. if ( !$titleObj ) {
  147. $this->dieUsageMsg( array( 'invalidtitle', $title ) );
  148. }
  149. $wgTitle = $titleObj;
  150. if ( $this->section !== false ) {
  151. $this->text = $this->getSectionText( $this->text, $titleObj->getText() );
  152. }
  153. if ( $params['pst'] || $params['onlypst'] ) {
  154. $this->pstText = $wgParser->preSaveTransform( $this->text, $titleObj, $this->getUser(), $popts );
  155. }
  156. if ( $params['onlypst'] ) {
  157. // Build a result and bail out
  158. $result_array = array();
  159. $result_array['text'] = array();
  160. $result->setContent( $result_array['text'], $this->pstText );
  161. if ( isset( $prop['wikitext'] ) ) {
  162. $result_array['wikitext'] = array();
  163. $result->setContent( $result_array['wikitext'], $this->text );
  164. }
  165. $result->addValue( null, $this->getModuleName(), $result_array );
  166. return;
  167. }
  168. // Not cached (save or load)
  169. $p_result = $wgParser->parse( $params['pst'] ? $this->pstText : $this->text, $titleObj, $popts );
  170. }
  171. $result_array = array();
  172. $result_array['title'] = $titleObj->getPrefixedText();
  173. if ( !is_null( $oldid ) ) {
  174. $result_array['revid'] = intval( $oldid );
  175. }
  176. if ( $params['redirects'] && !is_null( $redirValues ) ) {
  177. $result_array['redirects'] = $redirValues;
  178. }
  179. if ( isset( $prop['text'] ) ) {
  180. $result_array['text'] = array();
  181. // $result->setContent( $result_array['text'], $p_result->getText() );
  182. $result->setContent( $result_array['text'], DBpediaFunctions::cleanHtml($p_result->getText()));
  183. }
  184. if ( !is_null( $params['summary'] ) ) {
  185. $result_array['parsedsummary'] = array();
  186. $result->setContent( $result_array['parsedsummary'], Linker::formatComment( $params['summary'], $titleObj ) );
  187. }
  188. if ( isset( $prop['langlinks'] ) ) {
  189. $result_array['langlinks'] = $this->formatLangLinks( $p_result->getLanguageLinks() );
  190. }
  191. if ( isset( $prop['languageshtml'] ) ) {
  192. $languagesHtml = $this->languagesHtml( $p_result->getLanguageLinks() );
  193. $result_array['languageshtml'] = array();
  194. $result->setContent( $result_array['languageshtml'], $languagesHtml );
  195. }
  196. if ( isset( $prop['categories'] ) ) {
  197. $result_array['categories'] = $this->formatCategoryLinks( $p_result->getCategories() );
  198. }
  199. if ( isset( $prop['categorieshtml'] ) ) {
  200. $categoriesHtml = $this->categoriesHtml( $p_result->getCategories() );
  201. $result_array['categorieshtml'] = array();
  202. $result->setContent( $result_array['categorieshtml'], $categoriesHtml );
  203. }
  204. if ( isset( $prop['links'] ) ) {
  205. $result_array['links'] = $this->formatLinks( $p_result->getLinks() );
  206. }
  207. if ( isset( $prop['templates'] ) ) {
  208. $result_array['templates'] = $this->formatLinks( $p_result->getTemplates() );
  209. }
  210. if ( isset( $prop['images'] ) ) {
  211. $result_array['images'] = array_keys( $p_result->getImages() );
  212. }
  213. if ( isset( $prop['externallinks'] ) ) {
  214. $result_array['externallinks'] = array_keys( $p_result->getExternalLinks() );
  215. }
  216. if ( isset( $prop['sections'] ) ) {
  217. $result_array['sections'] = $p_result->getSections();
  218. }
  219. if ( isset( $prop['displaytitle'] ) ) {
  220. $result_array['displaytitle'] = $p_result->getDisplayTitle() ?
  221. $p_result->getDisplayTitle() :
  222. $titleObj->getPrefixedText();
  223. }
  224. if ( isset( $prop['headitems'] ) || isset( $prop['headhtml'] ) ) {
  225. $context = $this->getContext();
  226. $context->setTitle( $titleObj );
  227. $context->getOutput()->addParserOutputNoText( $p_result );
  228. if ( isset( $prop['headitems'] ) ) {
  229. $headItems = $this->formatHeadItems( $p_result->getHeadItems() );
  230. $css = $this->formatCss( $context->getOutput()->buildCssLinksArray() );
  231. $scripts = array( $context->getOutput()->getHeadScripts() );
  232. $result_array['headitems'] = array_merge( $headItems, $css, $scripts );
  233. }
  234. if ( isset( $prop['headhtml'] ) ) {
  235. $result_array['headhtml'] = array();
  236. $result->setContent( $result_array['headhtml'], $context->getOutput()->headElement( $context->getSkin() ) );
  237. }
  238. }
  239. if ( isset( $prop['iwlinks'] ) ) {
  240. $result_array['iwlinks'] = $this->formatIWLinks( $p_result->getInterwikiLinks() );
  241. }
  242. if ( isset( $prop['wikitext'] ) ) {
  243. $result_array['wikitext'] = array();
  244. $result->setContent( $result_array['wikitext'], $this->text );
  245. if ( !is_null( $this->pstText ) ) {
  246. $result_array['psttext'] = array();
  247. $result->setContent( $result_array['psttext'], $this->pstText );
  248. }
  249. }
  250. if ( isset( $prop['properties'] ) ) {
  251. $result_array['properties'] = $this->formatProperties( $p_result->getProperties() );
  252. }
  253. $result_mapping = array(
  254. 'redirects' => 'r',
  255. 'langlinks' => 'll',
  256. 'categories' => 'cl',
  257. 'links' => 'pl',
  258. 'templates' => 'tl',
  259. 'images' => 'img',
  260. 'externallinks' => 'el',
  261. 'iwlinks' => 'iw',
  262. 'sections' => 's',
  263. 'headitems' => 'hi',
  264. 'properties' => 'pp',
  265. );
  266. $this->setIndexedTagNames( $result_array, $result_mapping );
  267. $result->addValue( null, $this->getModuleName(), $result_array );
  268. if ( !is_null( $oldLang ) ) {
  269. $this->getContext()->setLanguage( $oldLang ); // Reset language to $oldLang
  270. }
  271. }
  272. /**
  273. * @param $titleObj Title
  274. * @param $popts ParserOptions
  275. * @param $pageId Int
  276. * @param $getWikitext Bool
  277. * @return ParserOutput
  278. */
  279. private function getParsedSectionOrText( $titleObj, $popts, $pageId = null, $getWikitext = false ) {
  280. global $wgParser;
  281. $page = WikiPage::factory( $titleObj );
  282. if ( $this->section !== false ) {
  283. $this->text = $this->getSectionText( $page->getRawText(), !is_null( $pageId )
  284. ? 'page id ' . $pageId : $titleObj->getText() );
  285. // Not cached (save or load)
  286. return $wgParser->parse( $this->text, $titleObj, $popts );
  287. } else {
  288. // Try the parser cache first
  289. // getParserOutput will save to Parser cache if able
  290. $pout = $page->getParserOutput( $popts );
  291. if ( !$pout ) {
  292. $this->dieUsage( "There is no revision ID {$page->getLatest()}", 'missingrev' );
  293. }
  294. if ( $getWikitext ) {
  295. $this->text = $page->getRawText();
  296. }
  297. return $pout;
  298. }
  299. }
  300. private function getSectionText( $text, $what ) {
  301. global $wgParser;
  302. // Not cached (save or load)
  303. $text = $wgParser->getSection( $text, $this->section, false );
  304. if ( $text === false ) {
  305. $this->dieUsage( "There is no section {$this->section} in " . $what, 'nosuchsection' );
  306. }
  307. return $text;
  308. }
  309. private function formatLangLinks( $links ) {
  310. $result = array();
  311. foreach ( $links as $link ) {
  312. $entry = array();
  313. $bits = explode( ':', $link, 2 );
  314. $title = Title::newFromText( $link );
  315. $entry['lang'] = $bits[0];
  316. if ( $title ) {
  317. $entry['url'] = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT );
  318. }
  319. $this->getResult()->setContent( $entry, $bits[1] );
  320. $result[] = $entry;
  321. }
  322. return $result;
  323. }
  324. private function formatCategoryLinks( $links ) {
  325. $result = array();
  326. foreach ( $links as $link => $sortkey ) {
  327. $entry = array();
  328. $entry['sortkey'] = $sortkey;
  329. $this->getResult()->setContent( $entry, $link );
  330. $result[] = $entry;
  331. }
  332. return $result;
  333. }
  334. private function categoriesHtml( $categories ) {
  335. $context = $this->getContext();
  336. $context->getOutput()->addCategoryLinks( $categories );
  337. return $context->getSkin()->getCategories();
  338. }
  339. /**
  340. * @deprecated since 1.18 No modern skin generates language links this way, please use language links
  341. * data to generate your own HTML.
  342. * @param $languages array
  343. * @return string
  344. */
  345. private function languagesHtml( $languages ) {
  346. wfDeprecated( __METHOD__, '1.18' );
  347. global $wgContLang, $wgHideInterlanguageLinks;
  348. if ( $wgHideInterlanguageLinks || count( $languages ) == 0 ) {
  349. return '';
  350. }
  351. $s = htmlspecialchars( wfMsg( 'otherlanguages' ) . wfMsg( 'colon-separator' ) );
  352. $langs = array();
  353. foreach ( $languages as $l ) {
  354. $nt = Title::newFromText( $l );
  355. $text = Language::fetchLanguageName( $nt->getInterwiki() );
  356. $langs[] = Html::element( 'a',
  357. array( 'href' => $nt->getFullURL(), 'title' => $nt->getText(), 'class' => "external" ),
  358. $text == '' ? $l : $text );
  359. }
  360. $s .= implode( htmlspecialchars( wfMsgExt( 'pipe-separator', 'escapenoentities' ) ), $langs );
  361. if ( $wgContLang->isRTL() ) {
  362. $s = Html::rawElement( 'span', array( 'dir' => "LTR" ), $s );
  363. }
  364. return $s;
  365. }
  366. private function formatLinks( $links ) {
  367. $result = array();
  368. foreach ( $links as $ns => $nslinks ) {
  369. foreach ( $nslinks as $title => $id ) {
  370. $entry = array();
  371. $entry['ns'] = $ns;
  372. $this->getResult()->setContent( $entry, Title::makeTitle( $ns, $title )->getFullText() );
  373. if ( $id != 0 ) {
  374. $entry['exists'] = '';
  375. }
  376. $result[] = $entry;
  377. }
  378. }
  379. return $result;
  380. }
  381. private function formatIWLinks( $iw ) {
  382. $result = array();
  383. foreach ( $iw as $prefix => $titles ) {
  384. foreach ( array_keys( $titles ) as $title ) {
  385. $entry = array();
  386. $entry['prefix'] = $prefix;
  387. $title = Title::newFromText( "{$prefix}:{$title}" );
  388. if ( $title ) {
  389. $entry['url'] = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT );
  390. }
  391. $this->getResult()->setContent( $entry, $title->getFullText() );
  392. $result[] = $entry;
  393. }
  394. }
  395. return $result;
  396. }
  397. private function formatHeadItems( $headItems ) {
  398. $result = array();
  399. foreach ( $headItems as $tag => $content ) {
  400. $entry = array();
  401. $entry['tag'] = $tag;
  402. $this->getResult()->setContent( $entry, $content );
  403. $result[] = $entry;
  404. }
  405. return $result;
  406. }
  407. private function formatProperties( $properties ) {
  408. $result = array();
  409. foreach ( $properties as $name => $value ) {
  410. $entry = array();
  411. $entry['name'] = $name;
  412. $this->getResult()->setContent( $entry, $value );
  413. $result[] = $entry;
  414. }
  415. return $result;
  416. }
  417. private function formatCss( $css ) {
  418. $result = array();
  419. foreach ( $css as $file => $link ) {
  420. $entry = array();
  421. $entry['file'] = $file;
  422. $this->getResult()->setContent( $entry, $link );
  423. $result[] = $entry;
  424. }
  425. return $result;
  426. }
  427. private function setIndexedTagNames( &$array, $mapping ) {
  428. foreach ( $mapping as $key => $name ) {
  429. if ( isset( $array[$key] ) ) {
  430. $this->getResult()->setIndexedTagName( $array[$key], $name );
  431. }
  432. }
  433. }
  434. public function getAllowedParams() {
  435. return array(
  436. 'title' => array(
  437. ApiBase::PARAM_DFLT => 'API',
  438. ),
  439. 'text' => null,
  440. 'summary' => null,
  441. 'page' => null,
  442. 'pageid' => array(
  443. ApiBase::PARAM_TYPE => 'integer',
  444. ),
  445. 'redirects' => false,
  446. 'oldid' => array(
  447. ApiBase::PARAM_TYPE => 'integer',
  448. ),
  449. 'prop' => array(
  450. ApiBase::PARAM_DFLT => 'text|langlinks|categories|links|templates|images|externallinks|sections|revid|displaytitle|iwlinks|properties',
  451. ApiBase::PARAM_ISMULTI => true,
  452. ApiBase::PARAM_TYPE => array(
  453. 'text',
  454. 'langlinks',
  455. 'languageshtml',
  456. 'categories',
  457. 'categorieshtml',
  458. 'links',
  459. 'templates',
  460. 'images',
  461. 'externallinks',
  462. 'sections',
  463. 'revid',
  464. 'displaytitle',
  465. 'headitems',
  466. 'headhtml',
  467. 'iwlinks',
  468. 'wikitext',
  469. 'properties',
  470. )
  471. ),
  472. 'pst' => false,
  473. 'onlypst' => false,
  474. 'uselang' => null,
  475. 'section' => null,
  476. 'disablepp' => false,
  477. );
  478. }
  479. public function getParamDescription() {
  480. $p = $this->getModulePrefix();
  481. return array(
  482. 'text' => 'Wikitext to parse',
  483. 'summary' => 'Summary to parse',
  484. 'redirects' => "If the {$p}page or the {$p}pageid parameter is set to a redirect, resolve it",
  485. 'title' => 'Title of page the text belongs to',
  486. 'page' => "Parse the content of this page. Cannot be used together with {$p}text and {$p}title",
  487. 'pageid' => "Parse the content of this page. Overrides {$p}page",
  488. 'oldid' => "Parse the content of this revision. Overrides {$p}page and {$p}pageid",
  489. 'prop' => array(
  490. 'Which pieces of information to get',
  491. ' text - Gives the parsed text of the wikitext',
  492. ' langlinks - Gives the language links in the parsed wikitext',
  493. ' categories - Gives the categories in the parsed wikitext',
  494. ' categorieshtml - Gives the HTML version of the categories',
  495. ' languageshtml - Gives the HTML version of the language links',
  496. ' links - Gives the internal links in the parsed wikitext',
  497. ' templates - Gives the templates in the parsed wikitext',
  498. ' images - Gives the images in the parsed wikitext',
  499. ' externallinks - Gives the external links in the parsed wikitext',
  500. ' sections - Gives the sections in the parsed wikitext',
  501. ' revid - Adds the revision ID of the parsed page',
  502. ' displaytitle - Adds the title of the parsed wikitext',
  503. ' headitems - Gives items to put in the <head> of the page',
  504. ' headhtml - Gives parsed <head> of the page',
  505. ' iwlinks - Gives interwiki links in the parsed wikitext',
  506. ' wikitext - Gives the original wikitext that was parsed',
  507. ' properties - Gives various properties defined in the parsed wikitext',
  508. ),
  509. 'pst' => array(
  510. 'Do a pre-save transform on the input before parsing it',
  511. 'Ignored if page, pageid or oldid is used'
  512. ),
  513. 'onlypst' => array(
  514. 'Do a pre-save transform (PST) on the input, but don\'t parse it',
  515. 'Returns the same wikitext, after a PST has been applied. Ignored if page, pageid or oldid is used'
  516. ),
  517. 'uselang' => 'Which language to parse the request in',
  518. 'section' => 'Only retrieve the content of this section number',
  519. 'disablepp' => 'Disable the PP Report from the parser output',
  520. );
  521. }
  522. public function getDescription() {
  523. return array(
  524. 'Parses wikitext and returns parser output',
  525. 'See the various prop-Modules of action=query to get information from the current version of a page',
  526. );
  527. }
  528. public function getPossibleErrors() {
  529. return array_merge( parent::getPossibleErrors(), array(
  530. array( 'code' => 'params', 'info' => 'The page parameter cannot be used together with the text and title parameters' ),
  531. array( 'code' => 'params', 'info' => 'The text parameter should be passed with the title parameter. Should you be using the "page" parameter instead?' ),
  532. array( 'code' => 'missingrev', 'info' => 'There is no revision ID oldid' ),
  533. array( 'code' => 'permissiondenied', 'info' => 'You don\'t have permission to view deleted revisions' ),
  534. array( 'code' => 'missingtitle', 'info' => 'The page you specified doesn\'t exist' ),
  535. array( 'code' => 'nosuchsection', 'info' => 'There is no section sectionnumber in page' ),
  536. array( 'nosuchpageid' ),
  537. array( 'invalidtitle', 'title' ),
  538. ) );
  539. }
  540. public function getExamples() {
  541. return array(
  542. 'api.php?action=parse&text={{Project:Sandbox}}'
  543. );
  544. }
  545. public function getHelpUrls() {
  546. return 'https://www.mediawiki.org/wiki/API:Parsing_wikitext#parse';
  547. }
  548. public function getVersion() {
  549. return __CLASS__ . ': $Id$';
  550. }
  551. }