PageRenderTime 47ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/search/engine/solr/classes/document.php

https://gitlab.com/unofficial-mirrors/moodle
PHP | 203 lines | 86 code | 23 blank | 94 comment | 4 complexity | fc3a1058ec730d63eab9a911947abcbc MD5 | raw file
  1. <?php
  2. // This file is part of Moodle - http://moodle.org/
  3. //
  4. // Moodle is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // Moodle is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * Document representation.
  18. *
  19. * @package search_solr
  20. * @copyright 2015 David Monllao {@link http://www.davidmonllao.com}
  21. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  22. */
  23. namespace search_solr;
  24. defined('MOODLE_INTERNAL') || die();
  25. /**
  26. * Respresents a document to index.
  27. *
  28. * @copyright 2015 David Monllao {@link http://www.davidmonllao.com}
  29. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  30. */
  31. class document extends \core_search\document {
  32. /**
  33. * Indicates the file contents were not indexed due to an error.
  34. */
  35. const INDEXED_FILE_ERROR = -1;
  36. /**
  37. * Indicates the file contents were not indexed due filtering/settings.
  38. */
  39. const INDEXED_FILE_FALSE = 0;
  40. /**
  41. * Indicates the file contents are indexed with the record.
  42. */
  43. const INDEXED_FILE_TRUE = 1;
  44. /**
  45. * Any fields that are engine specifc. These are fields that are solely used by a seach engine plugin
  46. * for internal purposes.
  47. *
  48. * @var array
  49. */
  50. protected static $enginefields = array(
  51. 'solr_filegroupingid' => array(
  52. 'type' => 'string',
  53. 'stored' => true,
  54. 'indexed' => true
  55. ),
  56. 'solr_fileid' => array(
  57. 'type' => 'string',
  58. 'stored' => true,
  59. 'indexed' => true
  60. ),
  61. 'solr_filecontenthash' => array(
  62. 'type' => 'string',
  63. 'stored' => true,
  64. 'indexed' => true
  65. ),
  66. // Stores the status of file indexing.
  67. 'solr_fileindexstatus' => array(
  68. 'type' => 'int',
  69. 'stored' => true,
  70. 'indexed' => true
  71. ),
  72. // Field to index, but not store, file contents.
  73. 'solr_filecontent' => array(
  74. 'type' => 'text',
  75. 'stored' => false,
  76. 'indexed' => true,
  77. 'mainquery' => true
  78. )
  79. );
  80. /**
  81. * Formats the timestamp according to the search engine needs.
  82. *
  83. * @param int $timestamp
  84. * @return string
  85. */
  86. public static function format_time_for_engine($timestamp) {
  87. return gmdate(\search_solr\engine::DATE_FORMAT, $timestamp);
  88. }
  89. /**
  90. * Formats the timestamp according to the search engine needs.
  91. *
  92. * @param int $timestamp
  93. * @return string
  94. */
  95. public static function format_string_for_engine($string) {
  96. // 2^15 default. We could convert this to a setting as is possible to
  97. // change the max in solr.
  98. return \core_text::str_max_bytes($string, 32766);
  99. }
  100. /**
  101. * Returns a timestamp from the value stored in the search engine.
  102. *
  103. * @param string $time
  104. * @return int
  105. */
  106. public static function import_time_from_engine($time) {
  107. return strtotime($time);
  108. }
  109. /**
  110. * Overwritten to use markdown format as we use markdown for solr highlighting.
  111. *
  112. * @return int
  113. */
  114. protected function get_text_format() {
  115. return FORMAT_HTML;
  116. }
  117. /**
  118. * Formats a text string coming from the search engine.
  119. *
  120. * @param string $text Text to format
  121. * @return string HTML text to be renderer
  122. */
  123. protected function format_text($text) {
  124. // Since we allow output for highlighting, we need to encode html entities.
  125. // This ensures plaintext html chars don't become valid html.
  126. $out = s($text);
  127. $startcount = 0;
  128. $endcount = 0;
  129. // Remove end/start pairs that span a few common seperation characters. Allows us to highlight phrases instead of words.
  130. $regex = '|'.engine::HIGHLIGHT_END.'([ .,-]{0,3})'.engine::HIGHLIGHT_START.'|';
  131. $out = preg_replace($regex, '$1', $out);
  132. // Now replace our start and end highlight markers.
  133. $out = str_replace(engine::HIGHLIGHT_START, '<span class="highlight">', $out, $startcount);
  134. $out = str_replace(engine::HIGHLIGHT_END, '</span>', $out, $endcount);
  135. // This makes sure any highlight tags are balanced, incase truncation or the highlight text contained our markers.
  136. while ($startcount > $endcount) {
  137. $out .= '</span>';
  138. $endcount++;
  139. }
  140. while ($startcount < $endcount) {
  141. $out = '<span class="highlight">' . $out;
  142. $endcount++;
  143. }
  144. return parent::format_text($out);
  145. }
  146. /**
  147. * Apply any defaults to unset fields before export. Called after document building, but before export.
  148. *
  149. * Sub-classes of this should make sure to call parent::apply_defaults().
  150. */
  151. protected function apply_defaults() {
  152. parent::apply_defaults();
  153. // We want to set the solr_filegroupingid to id if it isn't set.
  154. if (!isset($this->data['solr_filegroupingid'])) {
  155. $this->data['solr_filegroupingid'] = $this->data['id'];
  156. }
  157. }
  158. /**
  159. * Export the data for the given file in relation to this document.
  160. *
  161. * @param \stored_file $file The stored file we are talking about.
  162. * @return array
  163. */
  164. public function export_file_for_engine($file) {
  165. $data = $this->export_for_engine();
  166. // Content is index in the main document.
  167. unset($data['content']);
  168. unset($data['description1']);
  169. unset($data['description2']);
  170. // Going to append the fileid to give it a unique id.
  171. $data['id'] = $data['id'].'-solrfile'.$file->get_id();
  172. $data['type'] = \core_search\manager::TYPE_FILE;
  173. $data['solr_fileid'] = $file->get_id();
  174. $data['solr_filecontenthash'] = $file->get_contenthash();
  175. $data['solr_fileindexstatus'] = self::INDEXED_FILE_TRUE;
  176. $data['title'] = $file->get_filename();
  177. $data['modified'] = self::format_time_for_engine($file->get_timemodified());
  178. return $data;
  179. }
  180. }