PageRenderTime 48ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/sources/SearchAPI-Fulltext.class.php

https://github.com/Arantor/Elkarte
PHP | 293 lines | 142 code | 34 blank | 117 comment | 30 complexity | 0ac82696948382cef880286bd7a9c89e MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-3.0
  1. <?php
  2. /**
  3. * @name ElkArte Forum
  4. * @copyright ElkArte Forum contributors
  5. * @license BSD http://opensource.org/licenses/BSD-3-Clause
  6. *
  7. * This software is a derived product, based on:
  8. *
  9. * Simple Machines Forum (SMF)
  10. * copyright: 2011 Simple Machines (http://www.simplemachines.org)
  11. * license: BSD, See included LICENSE.TXT for terms and conditions.
  12. *
  13. * @version 1.0 Alpha
  14. *
  15. */
  16. if (!defined('ELKARTE'))
  17. die('No access...');
  18. /**
  19. * SearchAPI-Fulltext.php, Fulltext API, used when an SQL fulltext index is used
  20. */
  21. class Fulltext_Search
  22. {
  23. /**
  24. * This is the last version of ELKARTE that this was tested on, to protect against API changes.
  25. * @var string
  26. */
  27. public $version_compatible = 'ELKARTE 1.0 Alpha';
  28. /**
  29. * This won't work with versions of ELKARTE less than this.
  30. * @var string
  31. */
  32. public $min_elk_version = 'ELKARTE 1.0 Alpha';
  33. /**
  34. * Is it supported?
  35. *
  36. * @var boolean
  37. */
  38. public $is_supported = true;
  39. /**
  40. * What words are banned?
  41. * @var array
  42. */
  43. protected $bannedWords = array();
  44. /**
  45. * What is the minimum word length?
  46. * @var int
  47. */
  48. protected $min_word_length = 4;
  49. /**
  50. * What databases support the fulltext index?
  51. * @var array
  52. */
  53. protected $supported_databases = array('mysql');
  54. /**
  55. * Fulltext_Search::__construct()
  56. *
  57. */
  58. public function __construct()
  59. {
  60. global $smcFunc, $db_connection, $modSettings, $db_type;
  61. // Is this database supported?
  62. if (!in_array($db_type, $this->supported_databases))
  63. {
  64. $this->is_supported = false;
  65. return;
  66. }
  67. $this->bannedWords = empty($modSettings['search_banned_words']) ? array() : explode(',', $modSettings['search_banned_words']);
  68. $this->min_word_length = $this->_getMinWordLength();
  69. }
  70. /**
  71. * Fulltext_Search::supportsMethod()
  72. *
  73. * Check whether the method can be performed by this API.
  74. *
  75. * @param mixed $methodName
  76. * @param mixed $query_params
  77. * @return
  78. */
  79. public function supportsMethod($methodName, $query_params = null)
  80. {
  81. switch ($methodName)
  82. {
  83. case 'searchSort':
  84. case 'prepareIndexes':
  85. case 'indexedWordQuery':
  86. return true;
  87. break;
  88. // All other methods, too bad dunno you.
  89. default:
  90. return false;
  91. break;
  92. }
  93. }
  94. /**
  95. * Fulltext_Search::_getMinWordLength()
  96. *
  97. * What is the minimum word length full text supports?
  98. *
  99. * @return
  100. */
  101. protected function _getMinWordLength()
  102. {
  103. global $smcFunc;
  104. // Try to determine the minimum number of letters for a fulltext search.
  105. $request = $smcFunc['db_search_query']('max_fulltext_length', '
  106. SHOW VARIABLES
  107. LIKE {string:fulltext_minimum_word_length}',
  108. array(
  109. 'fulltext_minimum_word_length' => 'ft_min_word_len',
  110. )
  111. );
  112. if ($request !== false && $smcFunc['db_num_rows']($request) == 1)
  113. {
  114. list (, $min_word_length) = $smcFunc['db_fetch_row']($request);
  115. $smcFunc['db_free_result']($request);
  116. }
  117. // 4 is the MySQL default...
  118. else
  119. $min_word_length = 4;
  120. return $min_word_length;
  121. }
  122. /**
  123. * callback function for usort used to sort the fulltext results.
  124. * the order of sorting is: large words, small words, large words that
  125. * are excluded from the search, small words that are excluded.
  126. *
  127. * @param string $a Word A
  128. * @param string $b Word B
  129. * @return int
  130. */
  131. public function searchSort($a, $b)
  132. {
  133. global $modSettings, $excludedWords, $smcFunc;
  134. $x = $smcFunc['strlen']($a) - (in_array($a, $excludedWords) ? 1000 : 0);
  135. $y = $smcFunc['strlen']($b) - (in_array($b, $excludedWords) ? 1000 : 0);
  136. return $x < $y ? 1 : ($x > $y ? -1 : 0);
  137. }
  138. /**
  139. * Fulltext_Search::prepareIndexes()
  140. *
  141. * Do we have to do some work with the words we are searching for to prepare them?
  142. *
  143. * @param mixed $word
  144. * @param mixed $wordsSearch
  145. * @param mixed $wordsExclude
  146. * @param mixed $isExcluded
  147. * @return
  148. */
  149. public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded)
  150. {
  151. global $modSettings, $smcFunc;
  152. $subwords = text2words($word, null, false);
  153. if (empty($modSettings['search_force_index']))
  154. {
  155. // A boolean capable search engine and not forced to only use an index, we may use a non indexed search
  156. // this is harder on the server so we are restrictive here
  157. if (count($subwords) > 1 && preg_match('~[.:@$]~', $word))
  158. {
  159. // using special characters that a full index would ignore and the remaining words are short which would also be ignored
  160. if (($smcFunc['strlen'](current($subwords)) < $this->min_word_length) && ($smcFunc['strlen'](next($subwords)) < $this->min_word_length))
  161. {
  162. $wordsSearch['words'][] = trim($word, "/*- ");
  163. $wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"';
  164. }
  165. }
  166. elseif ($smcFunc['strlen'](trim($word, "/*- ")) < $this->min_word_length)
  167. {
  168. // short words have feelings too
  169. $wordsSearch['words'][] = trim($word, "/*- ");
  170. $wordsSearch['complex_words'][] = count($subwords) === 1 ? $word : '"' . $word . '"';
  171. }
  172. }
  173. $fulltextWord = count($subwords) === 1 ? $word : '"' . $word . '"';
  174. $wordsSearch['indexed_words'][] = $fulltextWord;
  175. if ($isExcluded)
  176. $wordsExclude[] = $fulltextWord;
  177. }
  178. /**
  179. * Fulltext_Search::indexedWordQuery()
  180. *
  181. * Search for indexed words.
  182. *
  183. * @param mixed $words
  184. * @param mixed $search_data
  185. * @return
  186. */
  187. public function indexedWordQuery($words, $search_data)
  188. {
  189. global $modSettings, $smcFunc;
  190. $query_select = array(
  191. 'id_msg' => 'm.id_msg',
  192. );
  193. $query_where = array();
  194. $query_params = $search_data['params'];
  195. if ($query_params['id_search'])
  196. $query_select['id_search'] = '{int:id_search}';
  197. $count = 0;
  198. if (empty($modSettings['search_simple_fulltext']))
  199. foreach ($words['words'] as $regularWord)
  200. {
  201. $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:complex_body_' . $count . '}';
  202. $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
  203. }
  204. if ($query_params['user_query'])
  205. $query_where[] = '{raw:user_query}';
  206. if ($query_params['board_query'])
  207. $query_where[] = 'm.id_board {raw:board_query}';
  208. if ($query_params['topic'])
  209. $query_where[] = 'm.id_topic = {int:topic}';
  210. if ($query_params['min_msg_id'])
  211. $query_where[] = 'm.id_msg >= {int:min_msg_id}';
  212. if ($query_params['max_msg_id'])
  213. $query_where[] = 'm.id_msg <= {int:max_msg_id}';
  214. $count = 0;
  215. if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
  216. foreach ($query_params['excluded_phrases'] as $phrase)
  217. {
  218. $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_phrase_' . $count . '}';
  219. $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
  220. }
  221. $count = 0;
  222. if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
  223. foreach ($query_params['excluded_subject_words'] as $excludedWord)
  224. {
  225. $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : 'RLIKE') . '{string:exclude_subject_words_' . $count . '}';
  226. $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
  227. }
  228. if (!empty($modSettings['search_simple_fulltext']))
  229. {
  230. $query_where[] = 'MATCH (body) AGAINST ({string:body_match})';
  231. $query_params['body_match'] = implode(' ', array_diff($words['indexed_words'], $query_params['excluded_index_words']));
  232. }
  233. else
  234. {
  235. $query_params['boolean_match'] = '';
  236. // remove any indexed words that are used in the complex body search terms
  237. $words['indexed_words'] = array_diff($words['indexed_words'], $words['complex_words']);
  238. foreach ($words['indexed_words'] as $fulltextWord)
  239. $query_params['boolean_match'] .= (in_array($fulltextWord, $query_params['excluded_index_words']) ? '-' : '+') . $fulltextWord . ' ';
  240. $query_params['boolean_match'] = substr($query_params['boolean_match'], 0, -1);
  241. // if we have bool terms to search, add them in
  242. if ($query_params['boolean_match'])
  243. $query_where[] = 'MATCH (body) AGAINST ({string:boolean_match} IN BOOLEAN MODE)';
  244. }
  245. $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ( '
  246. INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
  247. (' . implode(', ', array_keys($query_select)) . ')') : '') . '
  248. SELECT ' . implode(', ', $query_select) . '
  249. FROM {db_prefix}messages AS m
  250. WHERE ' . implode('
  251. AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
  252. LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
  253. $query_params
  254. );
  255. return $ignoreRequest;
  256. }
  257. }