PageRenderTime 48ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/search/classes/engine.php

https://github.com/adamann2/moodle
PHP | 444 lines | 146 code | 55 blank | 243 comment | 23 complexity | e864ff77cd80d9faf233a9672ac59bff MD5 | raw file
  1. <?php
  2. // This file is part of Moodle - http://moodle.org/
  3. //
  4. // Moodle is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // Moodle is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * Base class for search engines.
  18. *
  19. * All search engines must extend this class.
  20. *
  21. * @package core_search
  22. * @copyright 2015 Daniel Neis
  23. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  24. */
  25. namespace core_search;
  26. defined('MOODLE_INTERNAL') || die();
  27. /**
  28. * Base class for search engines.
  29. *
  30. * All search engines must extend this class.
  31. *
  32. * @package core_search
  33. * @copyright 2015 Daniel Neis
  34. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  35. */
  36. abstract class engine {
  37. /**
  38. * The search engine configuration.
  39. *
  40. * @var stdClass
  41. */
  42. protected $config = null;
  43. /**
  44. * Last executed query error, if there was any.
  45. * @var string
  46. */
  47. protected $queryerror = null;
  48. /**
  49. * @var array Internal cache.
  50. */
  51. protected $cachedareas = array();
  52. /**
  53. * @var array Internal cache.
  54. */
  55. protected $cachedcourses = array();
  56. /**
  57. * User data required to show their fullnames. Indexed by userid.
  58. *
  59. * @var stdClass[]
  60. */
  61. protected static $cachedusers = array();
  62. /**
  63. * @var string Frankenstyle plugin name.
  64. */
  65. protected $pluginname = null;
  66. /**
  67. * Initialises the search engine configuration.
  68. *
  69. * Search engine availability should be checked separately.
  70. *
  71. * @return void
  72. */
  73. public function __construct() {
  74. $classname = get_class($this);
  75. if (strpos($classname, '\\') === false) {
  76. throw new \coding_exception('"' . $classname . '" class should specify its component namespace and it should be named engine.');
  77. } else if (strpos($classname, '_') === false) {
  78. throw new \coding_exception('"' . $classname . '" class namespace should be its frankenstyle name');
  79. }
  80. // This is search_xxxx config.
  81. $this->pluginname = substr($classname, 0, strpos($classname, '\\'));
  82. if ($config = get_config($this->pluginname)) {
  83. $this->config = $config;
  84. } else {
  85. $this->config = new stdClass();
  86. }
  87. }
  88. /**
  89. * Returns a course instance checking internal caching.
  90. *
  91. * @param int $courseid
  92. * @return stdClass
  93. */
  94. protected function get_course($courseid) {
  95. if (!empty($this->cachedcourses[$courseid])) {
  96. return $this->cachedcourses[$courseid];
  97. }
  98. // No need to clone, only read.
  99. $this->cachedcourses[$courseid] = get_course($courseid, false);
  100. return $this->cachedcourses[$courseid];
  101. }
  102. /**
  103. * Returns user data checking the internal static cache.
  104. *
  105. * Including here the minimum required user information as this may grow big.
  106. *
  107. * @param int $userid
  108. * @return stdClass
  109. */
  110. public function get_user($userid) {
  111. global $DB;
  112. if (empty(self::$cachedusers[$userid])) {
  113. $fields = get_all_user_name_fields(true);
  114. self::$cachedusers[$userid] = $DB->get_record('user', array('id' => $userid), 'id, ' . $fields);
  115. }
  116. return self::$cachedusers[$userid];
  117. }
  118. /**
  119. * Returns a search instance of the specified area checking internal caching.
  120. *
  121. * @param string $areaid Area id
  122. * @return \core_search\base
  123. */
  124. protected function get_search_area($areaid) {
  125. if (isset($this->cachedareas[$areaid]) && $this->cachedareas[$areaid] === false) {
  126. // We already checked that area and it is not available.
  127. return false;
  128. }
  129. if (!isset($this->cachedareas[$areaid])) {
  130. // First result that matches this area.
  131. $this->cachedareas[$areaid] = \core_search\manager::get_search_area($areaid);
  132. if ($this->cachedareas[$areaid] === false) {
  133. // The area does not exist or it is not available any more.
  134. $this->cachedareas[$areaid] = false;
  135. return false;
  136. }
  137. if (!$this->cachedareas[$areaid]->is_enabled()) {
  138. // We skip the area if it is not enabled.
  139. // Marking it as false so next time we don' need to check it again.
  140. $this->cachedareas[$areaid] = false;
  141. return false;
  142. }
  143. }
  144. return $this->cachedareas[$areaid];
  145. }
  146. /**
  147. * Returns a document instance prepared to be rendered.
  148. *
  149. * @param \core_search\base $searcharea
  150. * @param array $docdata
  151. * @return \core_search\document
  152. */
  153. protected function to_document(\core_search\base $searcharea, $docdata) {
  154. list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($docdata['areaid']);
  155. $doc = \core_search\document_factory::instance($docdata['itemid'], $componentname, $areaname, $this);
  156. $doc->set_data_from_engine($docdata);
  157. $doc->set_doc_url($searcharea->get_doc_url($doc));
  158. $doc->set_context_url($searcharea->get_context_url($doc));
  159. // Uses the internal caches to get required data needed to render the document later.
  160. $course = $this->get_course($doc->get('courseid'));
  161. $doc->set_extra('coursefullname', $course->fullname);
  162. if ($doc->is_set('userid')) {
  163. $user = $this->get_user($doc->get('userid'));
  164. $doc->set_extra('userfullname', fullname($user));
  165. }
  166. return $doc;
  167. }
  168. /**
  169. * Loop through given iterator of search documents
  170. * and and have the search engine back end add them
  171. * to the index.
  172. *
  173. * @param iterator $iterator the iterator of documents to index
  174. * @param searcharea $searcharea the area for the documents to index
  175. * @param array $options document indexing options
  176. * @return array Processed document counts
  177. */
  178. public function add_documents($iterator, $searcharea, $options) {
  179. $numrecords = 0;
  180. $numdocs = 0;
  181. $numdocsignored = 0;
  182. $lastindexeddoc = 0;
  183. $firstindexeddoc = 0;
  184. $partial = false;
  185. foreach ($iterator as $document) {
  186. // Stop if we have exceeded the time limit (and there are still more items). Always
  187. // do at least one second's worth of documents otherwise it will never make progress.
  188. if ($lastindexeddoc !== $firstindexeddoc &&
  189. !empty($options['stopat']) && manager::get_current_time() >= $options['stopat']) {
  190. $partial = true;
  191. break;
  192. }
  193. if (!$document instanceof \core_search\document) {
  194. continue;
  195. }
  196. if (isset($options['lastindexedtime']) && $options['lastindexedtime'] == 0) {
  197. // If we have never indexed this area before, it must be new.
  198. $document->set_is_new(true);
  199. }
  200. if ($options['indexfiles']) {
  201. // Attach files if we are indexing.
  202. $searcharea->attach_files($document);
  203. }
  204. if ($this->add_document($document, $options['indexfiles'])) {
  205. $numdocs++;
  206. } else {
  207. $numdocsignored++;
  208. }
  209. $lastindexeddoc = $document->get('modified');
  210. if (!$firstindexeddoc) {
  211. $firstindexeddoc = $lastindexeddoc;
  212. }
  213. $numrecords++;
  214. }
  215. return array($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial);
  216. }
  217. /**
  218. * Returns the plugin name.
  219. *
  220. * @return string Frankenstyle plugin name.
  221. */
  222. public function get_plugin_name() {
  223. return $this->pluginname;
  224. }
  225. /**
  226. * Gets the document class used by this search engine.
  227. *
  228. * Search engines can overwrite \core_search\document with \search_ENGINENAME\document class.
  229. *
  230. * Looks for a document class in the current search engine namespace, falling back to \core_search\document.
  231. * Publicly available because search areas do not have access to the engine details,
  232. * \core_search\document_factory accesses this function.
  233. *
  234. * @return string
  235. */
  236. public function get_document_classname() {
  237. $classname = $this->pluginname . '\\document';
  238. if (!class_exists($classname)) {
  239. $classname = '\\core_search\\document';
  240. }
  241. return $classname;
  242. }
  243. /**
  244. * Run any pre-indexing operations.
  245. *
  246. * Should be overwritten if the search engine needs to do any pre index preparation.
  247. *
  248. * @param bool $fullindex True if a full index will be performed
  249. * @return void
  250. */
  251. public function index_starting($fullindex = false) {
  252. // Nothing by default.
  253. }
  254. /**
  255. * Run any post indexing operations.
  256. *
  257. * Should be overwritten if the search engine needs to do any post index cleanup.
  258. *
  259. * @param int $numdocs The number of documents that were added to the index
  260. * @param bool $fullindex True if a full index was performed
  261. * @return void
  262. */
  263. public function index_complete($numdocs = 0, $fullindex = false) {
  264. // Nothing by default.
  265. }
  266. /**
  267. * Do anything that may need to be done before an area is indexed.
  268. *
  269. * @param \core_search\base $searcharea The search area that was complete
  270. * @param bool $fullindex True if a full index is being performed
  271. * @return void
  272. */
  273. public function area_index_starting($searcharea, $fullindex = false) {
  274. // Nothing by default.
  275. }
  276. /**
  277. * Do any area cleanup needed, and do anything to confirm contents.
  278. *
  279. * Return false to prevent the search area completed time and stats from being updated.
  280. *
  281. * @param \core_search\base $searcharea The search area that was complete
  282. * @param int $numdocs The number of documents that were added to the index
  283. * @param bool $fullindex True if a full index is being performed
  284. * @return bool True means that data is considered indexed
  285. */
  286. public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
  287. return true;
  288. }
  289. /**
  290. * Optimizes the search engine.
  291. *
  292. * Should be overwritten if the search engine can optimize its contents.
  293. *
  294. * @return void
  295. */
  296. public function optimize() {
  297. // Nothing by default.
  298. }
  299. /**
  300. * Does the system satisfy all the requirements.
  301. *
  302. * Should be overwritten if the search engine has any system dependencies
  303. * that needs to be checked.
  304. *
  305. * @return bool
  306. */
  307. public function is_installed() {
  308. return true;
  309. }
  310. /**
  311. * Returns any error reported by the search engine when executing the provided query.
  312. *
  313. * It should be called from static::execute_query when an exception is triggered.
  314. *
  315. * @return string
  316. */
  317. public function get_query_error() {
  318. return $this->queryerror;
  319. }
  320. /**
  321. * Returns the total number of documents available for the most recent call to execute_query.
  322. *
  323. * This can be an estimate, but should get more accurate the higher the limited passed to execute_query is.
  324. * To do that, the engine can use (actual result returned count + count of unchecked documents), or
  325. * (total possible docs - docs that have been checked and rejected).
  326. *
  327. * Engine can limit to manager::MAX_RESULTS if there is cost to determining more.
  328. * If this cannot be computed in a reasonable way, manager::MAX_RESULTS may be returned.
  329. *
  330. * @return int
  331. */
  332. abstract public function get_query_total_count();
  333. /**
  334. * Return true if file indexing is supported and enabled. False otherwise.
  335. *
  336. * @return bool
  337. */
  338. public function file_indexing_enabled() {
  339. return false;
  340. }
  341. /**
  342. * Clears the current query error value.
  343. *
  344. * @return void
  345. */
  346. public function clear_query_error() {
  347. $this->queryerror = null;
  348. }
  349. /**
  350. * Is the server ready to use?
  351. *
  352. * This should also check that the search engine configuration is ok.
  353. *
  354. * @return true|string Returns true if all good or an error string.
  355. */
  356. abstract function is_server_ready();
  357. /**
  358. * Adds a document to the search engine.
  359. *
  360. * @param document $document
  361. * @param bool $fileindexing True if file indexing is to be used
  362. * @return bool False if the file was skipped or failed, true on success
  363. */
  364. abstract function add_document($document, $fileindexing = false);
  365. /**
  366. * Executes the query on the engine.
  367. *
  368. * Implementations of this function should check user context array to limit the results to contexts where the
  369. * user have access. They should also limit the owneruserid field to manger::NO_OWNER_ID or the current user's id.
  370. * Engines must use area->check_access() to confirm user access.
  371. *
  372. * Engines should reasonably attempt to fill up to limit with valid results if they are available.
  373. *
  374. * @param stdClass $filters Query and filters to apply.
  375. * @param array $usercontexts Contexts where the user has access. True if the user can access all contexts.
  376. * @param int $limit The maximum number of results to return. If empty, limit to manager::MAX_RESULTS.
  377. * @return \core_search\document[] Results or false if no results
  378. */
  379. abstract function execute_query($filters, $usercontexts, $limit = 0);
  380. /**
  381. * Delete all documents.
  382. *
  383. * @param string $areaid To filter by area
  384. * @return void
  385. */
  386. abstract function delete($areaid = null);
  387. }