PageRenderTime 27ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/exe/indexer.php

https://gitlab.com/kidaa/Door43
PHP | 209 lines | 120 code | 26 blank | 63 comment | 24 complexity | 175ea130c183e4e28d74d1771cf3fd84 MD5 | raw file
  1. <?php
  2. /**
  3. * DokuWiki indexer
  4. *
  5. * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
  6. * @author Andreas Gohr <andi@splitbrain.org>
  7. */
  8. if(!defined('DOKU_INC')) define('DOKU_INC',dirname(__FILE__).'/../../');
  9. define('DOKU_DISABLE_GZIP_OUTPUT',1);
  10. require_once(DOKU_INC.'inc/init.php');
  11. session_write_close(); //close session
  12. if(!defined('NL')) define('NL',"\n");
  13. // keep running after browser closes connection
  14. @ignore_user_abort(true);
  15. // check if user abort worked, if yes send output early
  16. $defer = !@ignore_user_abort() || $conf['broken_iua'];
  17. $output = $INPUT->has('debug') && $conf['allowdebug'];
  18. if(!$defer && !$output){
  19. sendGIF(); // send gif
  20. }
  21. $ID = cleanID($INPUT->str('id'));
  22. // Catch any possible output (e.g. errors)
  23. if(!$output) ob_start();
  24. else header('Content-Type: text/plain');
  25. // run one of the jobs
  26. $tmp = array(); // No event data
  27. $evt = new Doku_Event('INDEXER_TASKS_RUN', $tmp);
  28. if ($evt->advise_before()) {
  29. runIndexer() or
  30. runSitemapper() or
  31. sendDigest() or
  32. runTrimRecentChanges() or
  33. runTrimRecentChanges(true) or
  34. $evt->advise_after();
  35. }
  36. if(!$output) {
  37. ob_end_clean();
  38. if($defer) sendGIF();
  39. }
  40. exit;
  41. // --------------------------------------------------------------------
  42. /**
  43. * Trims the recent changes cache (or imports the old changelog) as needed.
  44. *
  45. * @param bool $media_changes If the media changelog shall be trimmed instead of
  46. * the page changelog
  47. * @return bool
  48. *
  49. * @author Ben Coburn <btcoburn@silicodon.net>
  50. */
  51. function runTrimRecentChanges($media_changes = false) {
  52. global $conf;
  53. echo "runTrimRecentChanges($media_changes): started".NL;
  54. $fn = ($media_changes ? $conf['media_changelog'] : $conf['changelog']);
  55. // Trim the Recent Changes
  56. // Trims the recent changes cache to the last $conf['changes_days'] recent
  57. // changes or $conf['recent'] items, which ever is larger.
  58. // The trimming is only done once a day.
  59. if (file_exists($fn) &&
  60. (@filemtime($fn.'.trimmed')+86400)<time() &&
  61. !file_exists($fn.'_tmp')) {
  62. @touch($fn.'.trimmed');
  63. io_lock($fn);
  64. $lines = file($fn);
  65. if (count($lines)<=$conf['recent']) {
  66. // nothing to trim
  67. io_unlock($fn);
  68. echo "runTrimRecentChanges($media_changes): finished".NL;
  69. return false;
  70. }
  71. io_saveFile($fn.'_tmp', ''); // presave tmp as 2nd lock
  72. $trim_time = time() - $conf['recent_days']*86400;
  73. $out_lines = array();
  74. $old_lines = array();
  75. for ($i=0; $i<count($lines); $i++) {
  76. $log = parseChangelogLine($lines[$i]);
  77. if ($log === false) continue; // discard junk
  78. if ($log['date'] < $trim_time) {
  79. $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions)
  80. } else {
  81. $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines
  82. }
  83. }
  84. if (count($lines)==count($out_lines)) {
  85. // nothing to trim
  86. @unlink($fn.'_tmp');
  87. io_unlock($fn);
  88. echo "runTrimRecentChanges($media_changes): finished".NL;
  89. return false;
  90. }
  91. // sort the final result, it shouldn't be necessary,
  92. // however the extra robustness in making the changelog cache self-correcting is worth it
  93. ksort($out_lines);
  94. $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum
  95. if ($extra > 0) {
  96. ksort($old_lines);
  97. $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines);
  98. }
  99. // save trimmed changelog
  100. io_saveFile($fn.'_tmp', implode('', $out_lines));
  101. @unlink($fn);
  102. if (!rename($fn.'_tmp', $fn)) {
  103. // rename failed so try another way...
  104. io_unlock($fn);
  105. io_saveFile($fn, implode('', $out_lines));
  106. @unlink($fn.'_tmp');
  107. } else {
  108. io_unlock($fn);
  109. }
  110. echo "runTrimRecentChanges($media_changes): finished".NL;
  111. return true;
  112. }
  113. // nothing done
  114. echo "runTrimRecentChanges($media_changes): finished".NL;
  115. return false;
  116. }
  117. /**
  118. * Runs the indexer for the current page
  119. *
  120. * @author Andreas Gohr <andi@splitbrain.org>
  121. */
  122. function runIndexer(){
  123. global $ID;
  124. global $conf;
  125. print "runIndexer(): started".NL;
  126. if(!$ID) return false;
  127. // do the work
  128. return idx_addPage($ID, true);
  129. }
  130. /**
  131. * Builds a Google Sitemap of all public pages known to the indexer
  132. *
  133. * The map is placed in the root directory named sitemap.xml.gz - This
  134. * file needs to be writable!
  135. *
  136. * @author Andreas Gohr
  137. * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html
  138. */
  139. function runSitemapper(){
  140. print "runSitemapper(): started".NL;
  141. $result = Sitemapper::generate() && Sitemapper::pingSearchEngines();
  142. print 'runSitemapper(): finished'.NL;
  143. return $result;
  144. }
  145. /**
  146. * Send digest and list mails for all subscriptions which are in effect for the
  147. * current page
  148. *
  149. * @author Adrian Lang <lang@cosmocode.de>
  150. */
  151. function sendDigest() {
  152. global $conf;
  153. global $ID;
  154. echo 'sendDigest(): started'.NL;
  155. if(!actionOK('subscribe')) {
  156. echo 'sendDigest(): disabled'.NL;
  157. return false;
  158. }
  159. $sub = new Subscription();
  160. $sent = $sub->send_bulk($ID);
  161. echo "sendDigest(): sent $sent mails".NL;
  162. echo 'sendDigest(): finished'.NL;
  163. return (bool) $sent;
  164. }
  165. /**
  166. * Just send a 1x1 pixel blank gif to the browser
  167. *
  168. * @author Andreas Gohr <andi@splitbrain.org>
  169. * @author Harry Fuecks <fuecks@gmail.com>
  170. */
  171. function sendGIF(){
  172. $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
  173. header('Content-Type: image/gif');
  174. header('Content-Length: '.strlen($img));
  175. header('Connection: Close');
  176. print $img;
  177. tpl_flush();
  178. // Browser should drop connection after this
  179. // Thinks it's got the whole image
  180. }
  181. //Setup VIM: ex: et ts=4 :
  182. // No trailing PHP closing tag - no output please!
  183. // See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php