PageRenderTime 42ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/includes/parser/LinkHolderArray.php

https://github.com/daevid/MWFork
PHP | 599 lines | 407 code | 62 blank | 130 comment | 50 complexity | 3b59c24be17d60bcb7393b445fc98033 MD5 | raw file
  1. <?php
  2. /**
  3. * Holder of replacement pairs for wiki links
  4. *
  5. * @file
  6. */
  7. /**
  8. * @ingroup Parser
  9. */
  10. class LinkHolderArray {
  11. var $internals = array(), $interwikis = array();
  12. var $size = 0;
  13. var $parent;
  14. protected $tempIdOffset;
  15. function __construct( $parent ) {
  16. $this->parent = $parent;
  17. }
  18. /**
  19. * Reduce memory usage to reduce the impact of circular references
  20. */
  21. function __destruct() {
  22. foreach ( $this as $name => $value ) {
  23. unset( $this->$name );
  24. }
  25. }
  26. /**
  27. * Don't serialize the parent object, it is big, and not needed when it is
  28. * a parameter to mergeForeign(), which is the only application of
  29. * serializing at present.
  30. *
  31. * Compact the titles, only serialize the text form.
  32. */
  33. function __sleep() {
  34. foreach ( $this->internals as &$nsLinks ) {
  35. foreach ( $nsLinks as &$entry ) {
  36. unset( $entry['title'] );
  37. }
  38. }
  39. unset( $nsLinks );
  40. unset( $entry );
  41. foreach ( $this->interwikis as &$entry ) {
  42. unset( $entry['title'] );
  43. }
  44. unset( $entry );
  45. return array( 'internals', 'interwikis', 'size' );
  46. }
  47. /**
  48. * Recreate the Title objects
  49. */
  50. function __wakeup() {
  51. foreach ( $this->internals as &$nsLinks ) {
  52. foreach ( $nsLinks as &$entry ) {
  53. $entry['title'] = Title::newFromText( $entry['pdbk'] );
  54. }
  55. }
  56. unset( $nsLinks );
  57. unset( $entry );
  58. foreach ( $this->interwikis as &$entry ) {
  59. $entry['title'] = Title::newFromText( $entry['pdbk'] );
  60. }
  61. unset( $entry );
  62. }
  63. /**
  64. * Merge another LinkHolderArray into this one
  65. * @param $other LinkHolderArray
  66. */
  67. function merge( $other ) {
  68. foreach ( $other->internals as $ns => $entries ) {
  69. $this->size += count( $entries );
  70. if ( !isset( $this->internals[$ns] ) ) {
  71. $this->internals[$ns] = $entries;
  72. } else {
  73. $this->internals[$ns] += $entries;
  74. }
  75. }
  76. $this->interwikis += $other->interwikis;
  77. }
  78. /**
  79. * Merge a LinkHolderArray from another parser instance into this one. The
  80. * keys will not be preserved. Any text which went with the old
  81. * LinkHolderArray and needs to work with the new one should be passed in
  82. * the $texts array. The strings in this array will have their link holders
  83. * converted for use in the destination link holder. The resulting array of
  84. * strings will be returned.
  85. *
  86. * @param $other LinkHolderArray
  87. * @param $texts Array of strings
  88. * @return Array
  89. */
  90. function mergeForeign( $other, $texts ) {
  91. $this->tempIdOffset = $idOffset = $this->parent->nextLinkID();
  92. $maxId = 0;
  93. # Renumber internal links
  94. foreach ( $other->internals as $ns => $nsLinks ) {
  95. foreach ( $nsLinks as $key => $entry ) {
  96. $newKey = $idOffset + $key;
  97. $this->internals[$ns][$newKey] = $entry;
  98. $maxId = $newKey > $maxId ? $newKey : $maxId;
  99. }
  100. }
  101. $texts = preg_replace_callback( '/(<!--LINK \d+:)(\d+)(-->)/',
  102. array( $this, 'mergeForeignCallback' ), $texts );
  103. # Renumber interwiki links
  104. foreach ( $other->interwikis as $key => $entry ) {
  105. $newKey = $idOffset + $key;
  106. $this->interwikis[$newKey] = $entry;
  107. $maxId = $newKey > $maxId ? $newKey : $maxId;
  108. }
  109. $texts = preg_replace_callback( '/(<!--IWLINK )(\d+)(-->)/',
  110. array( $this, 'mergeForeignCallback' ), $texts );
  111. # Set the parent link ID to be beyond the highest used ID
  112. $this->parent->setLinkID( $maxId + 1 );
  113. $this->tempIdOffset = null;
  114. return $texts;
  115. }
  116. protected function mergeForeignCallback( $m ) {
  117. return $m[1] . ( $m[2] + $this->tempIdOffset ) . $m[3];
  118. }
  119. /**
  120. * Get a subset of the current LinkHolderArray which is sufficient to
  121. * interpret the given text.
  122. */
  123. function getSubArray( $text ) {
  124. $sub = new LinkHolderArray( $this->parent );
  125. # Internal links
  126. $pos = 0;
  127. while ( $pos < strlen( $text ) ) {
  128. if ( !preg_match( '/<!--LINK (\d+):(\d+)-->/',
  129. $text, $m, PREG_OFFSET_CAPTURE, $pos ) )
  130. {
  131. break;
  132. }
  133. $ns = $m[1][0];
  134. $key = $m[2][0];
  135. $sub->internals[$ns][$key] = $this->internals[$ns][$key];
  136. $pos = $m[0][1] + strlen( $m[0][0] );
  137. }
  138. # Interwiki links
  139. $pos = 0;
  140. while ( $pos < strlen( $text ) ) {
  141. if ( !preg_match( '/<!--IWLINK (\d+)-->/', $text, $m, PREG_OFFSET_CAPTURE, $pos ) ) {
  142. break;
  143. }
  144. $key = $m[1][0];
  145. $sub->interwikis[$key] = $this->interwikis[$key];
  146. $pos = $m[0][1] + strlen( $m[0][0] );
  147. }
  148. return $sub;
  149. }
  150. /**
  151. * Returns true if the memory requirements of this object are getting large
  152. */
  153. function isBig() {
  154. global $wgLinkHolderBatchSize;
  155. return $this->size > $wgLinkHolderBatchSize;
  156. }
  157. /**
  158. * Clear all stored link holders.
  159. * Make sure you don't have any text left using these link holders, before you call this
  160. */
  161. function clear() {
  162. $this->internals = array();
  163. $this->interwikis = array();
  164. $this->size = 0;
  165. }
  166. /**
  167. * Make a link placeholder. The text returned can be later resolved to a real link with
  168. * replaceLinkHolders(). This is done for two reasons: firstly to avoid further
  169. * parsing of interwiki links, and secondly to allow all existence checks and
  170. * article length checks (for stub links) to be bundled into a single query.
  171. *
  172. * @param $nt Title
  173. */
  174. function makeHolder( $nt, $text = '', $query = array(), $trail = '', $prefix = '' ) {
  175. wfProfileIn( __METHOD__ );
  176. if ( ! is_object($nt) ) {
  177. # Fail gracefully
  178. $retVal = "<!-- ERROR -->{$prefix}{$text}{$trail}";
  179. } else {
  180. # Separate the link trail from the rest of the link
  181. list( $inside, $trail ) = Linker::splitTrail( $trail );
  182. $entry = array(
  183. 'title' => $nt,
  184. 'text' => $prefix.$text.$inside,
  185. 'pdbk' => $nt->getPrefixedDBkey(),
  186. );
  187. if ( $query !== array() ) {
  188. $entry['query'] = $query;
  189. }
  190. if ( $nt->isExternal() ) {
  191. // Use a globally unique ID to keep the objects mergable
  192. $key = $this->parent->nextLinkID();
  193. $this->interwikis[$key] = $entry;
  194. $retVal = "<!--IWLINK $key-->{$trail}";
  195. } else {
  196. $key = $this->parent->nextLinkID();
  197. $ns = $nt->getNamespace();
  198. $this->internals[$ns][$key] = $entry;
  199. $retVal = "<!--LINK $ns:$key-->{$trail}";
  200. }
  201. $this->size++;
  202. }
  203. wfProfileOut( __METHOD__ );
  204. return $retVal;
  205. }
  206. /**
  207. * @todo FIXME: Update documentation. makeLinkObj() is deprecated.
  208. * Replace <!--LINK--> link placeholders with actual links, in the buffer
  209. * Placeholders created in Skin::makeLinkObj()
  210. * Returns an array of link CSS classes, indexed by PDBK.
  211. */
  212. function replace( &$text ) {
  213. wfProfileIn( __METHOD__ );
  214. $colours = $this->replaceInternal( $text );
  215. $this->replaceInterwiki( $text );
  216. wfProfileOut( __METHOD__ );
  217. return $colours;
  218. }
  219. /**
  220. * Replace internal links
  221. */
  222. protected function replaceInternal( &$text ) {
  223. if ( !$this->internals ) {
  224. return;
  225. }
  226. wfProfileIn( __METHOD__ );
  227. global $wgContLang;
  228. $colours = array();
  229. $linkCache = LinkCache::singleton();
  230. $output = $this->parent->getOutput();
  231. wfProfileIn( __METHOD__.'-check' );
  232. $dbr = wfGetDB( DB_SLAVE );
  233. $threshold = $this->parent->getOptions()->getStubThreshold();
  234. # Sort by namespace
  235. ksort( $this->internals );
  236. $linkcolour_ids = array();
  237. # Generate query
  238. $queries = array();
  239. foreach ( $this->internals as $ns => $entries ) {
  240. foreach ( $entries as $entry ) {
  241. $title = $entry['title'];
  242. $pdbk = $entry['pdbk'];
  243. # Skip invalid entries.
  244. # Result will be ugly, but prevents crash.
  245. if ( is_null( $title ) ) {
  246. continue;
  247. }
  248. # Check if it's a static known link, e.g. interwiki
  249. if ( $title->isAlwaysKnown() ) {
  250. $colours[$pdbk] = '';
  251. } elseif ( $ns == NS_SPECIAL ) {
  252. $colours[$pdbk] = 'new';
  253. } elseif ( ( $id = $linkCache->getGoodLinkID( $pdbk ) ) != 0 ) {
  254. $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
  255. $output->addLink( $title, $id );
  256. $linkcolour_ids[$id] = $pdbk;
  257. } elseif ( $linkCache->isBadLink( $pdbk ) ) {
  258. $colours[$pdbk] = 'new';
  259. } else {
  260. # Not in the link cache, add it to the query
  261. $queries[$ns][] = $title->getDBkey();
  262. }
  263. }
  264. }
  265. if ( $queries ) {
  266. $where = array();
  267. foreach( $queries as $ns => $pages ){
  268. $where[] = $dbr->makeList(
  269. array(
  270. 'page_namespace' => $ns,
  271. 'page_title' => $pages,
  272. ),
  273. LIST_AND
  274. );
  275. }
  276. $res = $dbr->select(
  277. 'page',
  278. array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len', 'page_latest' ),
  279. $dbr->makeList( $where, LIST_OR ),
  280. __METHOD__
  281. );
  282. # Fetch data and form into an associative array
  283. # non-existent = broken
  284. foreach ( $res as $s ) {
  285. $title = Title::makeTitle( $s->page_namespace, $s->page_title );
  286. $pdbk = $title->getPrefixedDBkey();
  287. $linkCache->addGoodLinkObj( $s->page_id, $title, $s->page_len, $s->page_is_redirect, $s->page_latest );
  288. $output->addLink( $title, $s->page_id );
  289. # @todo FIXME: Convoluted data flow
  290. # The redirect status and length is passed to getLinkColour via the LinkCache
  291. # Use formal parameters instead
  292. $colours[$pdbk] = Linker::getLinkColour( $title, $threshold );
  293. //add id to the extension todolist
  294. $linkcolour_ids[$s->page_id] = $pdbk;
  295. }
  296. unset( $res );
  297. }
  298. if ( count($linkcolour_ids) ) {
  299. //pass an array of page_ids to an extension
  300. wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
  301. }
  302. wfProfileOut( __METHOD__.'-check' );
  303. # Do a second query for different language variants of links and categories
  304. if($wgContLang->hasVariants()) {
  305. $this->doVariants( $colours );
  306. }
  307. # Construct search and replace arrays
  308. wfProfileIn( __METHOD__.'-construct' );
  309. $replacePairs = array();
  310. foreach ( $this->internals as $ns => $entries ) {
  311. foreach ( $entries as $index => $entry ) {
  312. $pdbk = $entry['pdbk'];
  313. $title = $entry['title'];
  314. $query = isset( $entry['query'] ) ? $entry['query'] : array();
  315. $key = "$ns:$index";
  316. $searchkey = "<!--LINK $key-->";
  317. $displayText = $entry['text'];
  318. if ( $displayText === '' ) {
  319. $displayText = null;
  320. }
  321. if ( !isset( $colours[$pdbk] ) ) {
  322. $colours[$pdbk] = 'new';
  323. }
  324. $attribs = array();
  325. if ( $colours[$pdbk] == 'new' ) {
  326. $linkCache->addBadLinkObj( $title );
  327. $output->addLink( $title, 0 );
  328. $type = array( 'broken' );
  329. } else {
  330. if ( $colours[$pdbk] != '' ) {
  331. $attribs['class'] = $colours[$pdbk];
  332. }
  333. $type = array( 'known', 'noclasses' );
  334. }
  335. $replacePairs[$searchkey] = Linker::link( $title, $displayText,
  336. $attribs, $query, $type );
  337. }
  338. }
  339. $replacer = new HashtableReplacer( $replacePairs, 1 );
  340. wfProfileOut( __METHOD__.'-construct' );
  341. # Do the thing
  342. wfProfileIn( __METHOD__.'-replace' );
  343. $text = preg_replace_callback(
  344. '/(<!--LINK .*?-->)/',
  345. $replacer->cb(),
  346. $text);
  347. wfProfileOut( __METHOD__.'-replace' );
  348. wfProfileOut( __METHOD__ );
  349. }
  350. /**
  351. * Replace interwiki links
  352. */
  353. protected function replaceInterwiki( &$text ) {
  354. if ( empty( $this->interwikis ) ) {
  355. return;
  356. }
  357. wfProfileIn( __METHOD__ );
  358. # Make interwiki link HTML
  359. $output = $this->parent->getOutput();
  360. $replacePairs = array();
  361. foreach( $this->interwikis as $key => $link ) {
  362. $replacePairs[$key] = Linker::link( $link['title'], $link['text'] );
  363. $output->addInterwikiLink( $link['title'] );
  364. }
  365. $replacer = new HashtableReplacer( $replacePairs, 1 );
  366. $text = preg_replace_callback(
  367. '/<!--IWLINK (.*?)-->/',
  368. $replacer->cb(),
  369. $text );
  370. wfProfileOut( __METHOD__ );
  371. }
  372. /**
  373. * Modify $this->internals and $colours according to language variant linking rules
  374. */
  375. protected function doVariants( &$colours ) {
  376. global $wgContLang;
  377. $linkBatch = new LinkBatch();
  378. $variantMap = array(); // maps $pdbkey_Variant => $keys (of link holders)
  379. $output = $this->parent->getOutput();
  380. $linkCache = LinkCache::singleton();
  381. $threshold = $this->parent->getOptions()->getStubThreshold();
  382. $titlesToBeConverted = '';
  383. $titlesAttrs = array();
  384. // Concatenate titles to a single string, thus we only need auto convert the
  385. // single string to all variants. This would improve parser's performance
  386. // significantly.
  387. foreach ( $this->internals as $ns => $entries ) {
  388. foreach ( $entries as $index => $entry ) {
  389. $pdbk = $entry['pdbk'];
  390. // we only deal with new links (in its first query)
  391. if ( !isset( $colours[$pdbk] ) ) {
  392. $title = $entry['title'];
  393. $titleText = $title->getText();
  394. $titlesAttrs[] = array(
  395. 'ns' => $ns,
  396. 'key' => "$ns:$index",
  397. 'titleText' => $titleText,
  398. );
  399. // separate titles with \0 because it would never appears
  400. // in a valid title
  401. $titlesToBeConverted .= $titleText . "\0";
  402. }
  403. }
  404. }
  405. // Now do the conversion and explode string to text of titles
  406. $titlesAllVariants = $wgContLang->autoConvertToAllVariants( $titlesToBeConverted );
  407. $allVariantsName = array_keys( $titlesAllVariants );
  408. foreach ( $titlesAllVariants as &$titlesVariant ) {
  409. $titlesVariant = explode( "\0", $titlesVariant );
  410. }
  411. $l = count( $titlesAttrs );
  412. // Then add variants of links to link batch
  413. for ( $i = 0; $i < $l; $i ++ ) {
  414. foreach ( $allVariantsName as $variantName ) {
  415. $textVariant = $titlesAllVariants[$variantName][$i];
  416. if ( $textVariant != $titlesAttrs[$i]['titleText'] ) {
  417. $variantTitle = Title::makeTitle( $titlesAttrs[$i]['ns'], $textVariant );
  418. if( is_null( $variantTitle ) ) {
  419. continue;
  420. }
  421. $linkBatch->addObj( $variantTitle );
  422. $variantMap[$variantTitle->getPrefixedDBkey()][] = $titlesAttrs[$i]['key'];
  423. }
  424. }
  425. }
  426. // process categories, check if a category exists in some variant
  427. $categoryMap = array(); // maps $category_variant => $category (dbkeys)
  428. $varCategories = array(); // category replacements oldDBkey => newDBkey
  429. foreach( $output->getCategoryLinks() as $category ){
  430. $variants = $wgContLang->autoConvertToAllVariants( $category );
  431. foreach($variants as $variant){
  432. if($variant != $category){
  433. $variantTitle = Title::newFromDBkey( Title::makeName(NS_CATEGORY,$variant) );
  434. if(is_null($variantTitle)) continue;
  435. $linkBatch->addObj( $variantTitle );
  436. $categoryMap[$variant] = $category;
  437. }
  438. }
  439. }
  440. if(!$linkBatch->isEmpty()){
  441. // construct query
  442. $dbr = wfGetDB( DB_SLAVE );
  443. $varRes = $dbr->select( 'page',
  444. array( 'page_id', 'page_namespace', 'page_title', 'page_is_redirect', 'page_len' ),
  445. $linkBatch->constructSet( 'page', $dbr ),
  446. __METHOD__
  447. );
  448. $linkcolour_ids = array();
  449. // for each found variants, figure out link holders and replace
  450. foreach ( $varRes as $s ) {
  451. $variantTitle = Title::makeTitle( $s->page_namespace, $s->page_title );
  452. $varPdbk = $variantTitle->getPrefixedDBkey();
  453. $vardbk = $variantTitle->getDBkey();
  454. $holderKeys = array();
  455. if( isset( $variantMap[$varPdbk] ) ) {
  456. $holderKeys = $variantMap[$varPdbk];
  457. $linkCache->addGoodLinkObj( $s->page_id, $variantTitle, $s->page_len, $s->page_is_redirect );
  458. $output->addLink( $variantTitle, $s->page_id );
  459. }
  460. // loop over link holders
  461. foreach( $holderKeys as $key ) {
  462. list( $ns, $index ) = explode( ':', $key, 2 );
  463. $entry =& $this->internals[$ns][$index];
  464. $pdbk = $entry['pdbk'];
  465. if(!isset($colours[$pdbk])){
  466. // found link in some of the variants, replace the link holder data
  467. $entry['title'] = $variantTitle;
  468. $entry['pdbk'] = $varPdbk;
  469. // set pdbk and colour
  470. # @todo FIXME: Convoluted data flow
  471. # The redirect status and length is passed to getLinkColour via the LinkCache
  472. # Use formal parameters instead
  473. $colours[$varPdbk] = Linker::getLinkColour( $variantTitle, $threshold );
  474. $linkcolour_ids[$s->page_id] = $pdbk;
  475. }
  476. }
  477. // check if the object is a variant of a category
  478. if(isset($categoryMap[$vardbk])){
  479. $oldkey = $categoryMap[$vardbk];
  480. if($oldkey != $vardbk)
  481. $varCategories[$oldkey]=$vardbk;
  482. }
  483. }
  484. wfRunHooks( 'GetLinkColours', array( $linkcolour_ids, &$colours ) );
  485. // rebuild the categories in original order (if there are replacements)
  486. if(count($varCategories)>0){
  487. $newCats = array();
  488. $originalCats = $output->getCategories();
  489. foreach($originalCats as $cat => $sortkey){
  490. // make the replacement
  491. if( array_key_exists($cat,$varCategories) )
  492. $newCats[$varCategories[$cat]] = $sortkey;
  493. else $newCats[$cat] = $sortkey;
  494. }
  495. $output->setCategoryLinks($newCats);
  496. }
  497. }
  498. }
  499. /**
  500. * Replace <!--LINK--> link placeholders with plain text of links
  501. * (not HTML-formatted).
  502. *
  503. * @param $text String
  504. * @return String
  505. */
  506. function replaceText( $text ) {
  507. wfProfileIn( __METHOD__ );
  508. $text = preg_replace_callback(
  509. '/<!--(LINK|IWLINK) (.*?)-->/',
  510. array( &$this, 'replaceTextCallback' ),
  511. $text );
  512. wfProfileOut( __METHOD__ );
  513. return $text;
  514. }
  515. /**
  516. * Callback for replaceText()
  517. *
  518. * @param $matches Array
  519. * @return string
  520. * @private
  521. */
  522. function replaceTextCallback( $matches ) {
  523. $type = $matches[1];
  524. $key = $matches[2];
  525. if( $type == 'LINK' ) {
  526. list( $ns, $index ) = explode( ':', $key, 2 );
  527. if( isset( $this->internals[$ns][$index]['text'] ) ) {
  528. return $this->internals[$ns][$index]['text'];
  529. }
  530. } elseif( $type == 'IWLINK' ) {
  531. if( isset( $this->interwikis[$key]['text'] ) ) {
  532. return $this->interwikis[$key]['text'];
  533. }
  534. }
  535. return $matches[0];
  536. }
  537. }