PageRenderTime 54ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/bin/update-search-index.php

https://bitbucket.org/wez/mtrack/
PHP | 159 lines | 137 code | 15 blank | 7 comment | 10 complexity | 66d967467d35263cf3d7b3bcce997031 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. <?php # vim:ts=2:sw=2:et:
  2. /* For licensing and copyright terms, see the file named LICENSE */
  3. if (function_exists('date_default_timezone_set')) {
  4. date_default_timezone_set('UTC');
  5. }
  6. include dirname(__FILE__) . '/../inc/common.php';
  7. MTrackSearchDB::setBatchMode();
  8. $vardir = MTrackConfig::get('core', 'vardir');
  9. /* only allow one instance to run concurrently */
  10. $fp = fopen("$vardir/.indexer.lock", 'w');
  11. if (!$fp) {
  12. exit(1);
  13. }
  14. if (!flock($fp, LOCK_EX|LOCK_NB)) {
  15. echo "Another instance is already running\n";
  16. exit(1);
  17. }
  18. /* "leak" $fp, so that the lock is held while we continue to run */
  19. /* log to a file in the var dir */
  20. function log_output($buffer)
  21. {
  22. global $log_file;
  23. fwrite($log_file, $buffer);
  24. fflush($log_file);
  25. }
  26. $log_file = fopen("$vardir/indexer.log", 'w');
  27. if ($log_file) {
  28. ob_start('log_output');
  29. }
  30. function log_flush() {
  31. flush();
  32. ob_flush();
  33. flush();
  34. }
  35. $start_time = time();
  36. echo "Indexing started at " . date('c') . "\n";
  37. log_flush();
  38. $last = '1990-01-01T00:00:00';
  39. $ALL = true;
  40. foreach (MTrackDB::q('select last_run from search_engine_state')->fetchAll()
  41. as $row) {
  42. $last = $row[0];
  43. $ALL = false;
  44. }
  45. $LATEST = strtotime($last);
  46. $FIRST = $LATEST;
  47. $ITEMS = 0;
  48. $DONE = array();
  49. function index_and_measure($object)
  50. {
  51. global $DONE;
  52. if (isset($DONE[$object])) {
  53. return true;
  54. }
  55. $DONE[$object] = true;
  56. echo "Examine: $object\n";
  57. log_flush();
  58. $start = time();
  59. $res = MTrackSearchDB::index_object($object);
  60. $elapsed = time() - $start;
  61. printf("Indexed $object in %f seconds\n", $elapsed);
  62. log_flush();
  63. return $res;
  64. }
  65. function index_items($lower)
  66. {
  67. global $LATEST;
  68. global $ITEMS;
  69. global $start_time;
  70. global $DONE;
  71. global $FIRST;
  72. /* do the work here */
  73. foreach (MTrackDB::q('select object, max(changedate) from changes where changedate > ? group by object order by max(changedate)', $lower)->fetchAll(PDO::FETCH_NUM)
  74. as $row) {
  75. if ($LATEST > ($FIRST + 3) && time() - $start_time > 280) {
  76. // Step back 1 second on the next run, otherwise we may miss out
  77. // a couple of items from the current second
  78. $LATEST--;
  79. break;
  80. }
  81. list($object, $when) = $row;
  82. if (true) {
  83. $ITEMS++;
  84. $res = index_and_measure($object);
  85. } else {
  86. $res = true;
  87. }
  88. if ($res === false) {
  89. echo "Don't know how to index $object\n";
  90. } else {
  91. echo "Processed $object $when > $lower\n";
  92. }
  93. $t = strtotime($when);
  94. if ($t > $LATEST) {
  95. $LATEST = $t;
  96. }
  97. }
  98. }
  99. if ($ALL) {
  100. // walk all the wiki pages, in case someone checked in against the
  101. // wiki repo outside of the app
  102. $repo = null;
  103. $root = MTrackWikiItem::getRepoAndRoot($repo);
  104. $suf = MTrackConfig::get('core', 'wikifilenamesuffix');
  105. function walk_wiki($repo, $dir, $suf)
  106. {
  107. global $DONE;
  108. $items = $repo->readdir($dir);
  109. foreach ($items as $file) {
  110. if ($file->is_dir) {
  111. walk_wiki($repo, $file->name, $suf);
  112. } else {
  113. if (!strlen($suf) || substr($file->name, -strlen($suf)) == $suf) {
  114. //echo "Going to index wiki:$file->name\n";
  115. $object = "wiki:$file->name";
  116. index_and_measure($object);
  117. } else {
  118. //echo "NO: wiki:$file->name\n";
  119. }
  120. }
  121. }
  122. }
  123. walk_wiki($repo, $root, $suf);
  124. }
  125. index_items($last);
  126. $db = MTrackDB::get();
  127. $db->beginTransaction();
  128. $db->exec("delete from search_engine_state");
  129. $insert = $db->prepare("insert into search_engine_state (last_run) values (?)");
  130. $insert->execute(array(MTrackDB::unixtime($LATEST)));
  131. $db->commit();
  132. if ($ITEMS > 0) {
  133. MTrackSearchDB::commit();
  134. }
  135. $end_time = time();
  136. $elapsed = $end_time - $start_time;
  137. echo "$ITEMS items processed (in $elapsed seconds)\n";