PageRenderTime 39ms CodeModel.GetById 10ms RepoModel.GetById 1ms app.codeStats 0ms

/sources/SearchAPI-Custom.class.php

https://github.com/Arantor/Elkarte
PHP | 357 lines | 206 code | 45 blank | 106 comment | 31 complexity | e999c0994c5529a8d557666e93431028 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-3.0
  1. <?php
  2. /**
  3. * @name ElkArte Forum
  4. * @copyright ElkArte Forum contributors
  5. * @license BSD http://opensource.org/licenses/BSD-3-Clause
  6. *
  7. * This software is a derived product, based on:
  8. *
  9. * Simple Machines Forum (SMF)
  10. * copyright: 2011 Simple Machines (http://www.simplemachines.org)
  11. * license: BSD, See included LICENSE.TXT for terms and conditions.
  12. *
  13. * @version 1.0 Alpha
  14. *
  15. */
  16. if (!defined('ELKARTE'))
  17. die('No access...');
  18. /**
  19. * SearchAPI-Custom.php, Custom Search API class .. used when custom ELKARTE index is used
  20. */
  21. class Custom_Search
  22. {
  23. /**
  24. *This is the last version of ELKARTE that this was tested on, to protect against API changes.
  25. * @var string
  26. */
  27. public $version_compatible = 'ELKARTE 1.0 Alpha';
  28. /**
  29. *This won't work with versions of ELKARTE less than this.
  30. * @var string
  31. */
  32. public $min_elk_version = 'ELKARTE 1.0 Alpha';
  33. /**
  34. * Is it supported?
  35. * @var boolean
  36. */
  37. public $is_supported = true;
  38. /**
  39. * Index Settings
  40. * @var array
  41. */
  42. protected $indexSettings = array();
  43. /**
  44. * What words are banned?
  45. * @var array
  46. */
  47. protected $bannedWords = array();
  48. /**
  49. * What is the minimum word length?
  50. * @var int
  51. */
  52. protected $min_word_length = null;
  53. /**
  54. * What databases support the custom index?
  55. * @var array
  56. */
  57. protected $supported_databases = array('mysql', 'postgresql', 'sqlite');
  58. /**
  59. * constructor function
  60. *
  61. * @return type
  62. */
  63. public function __construct()
  64. {
  65. global $modSettings, $db_type;
  66. // Is this database supported?
  67. if (!in_array($db_type, $this->supported_databases))
  68. {
  69. $this->is_supported = false;
  70. return;
  71. }
  72. if (empty($modSettings['search_custom_index_config']))
  73. return;
  74. $this->indexSettings = unserialize($modSettings['search_custom_index_config']);
  75. $this->bannedWords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
  76. $this->min_word_length = $this->indexSettings['bytes_per_word'];
  77. }
  78. /**
  79. * Check whether the search can be performed by this API.
  80. *
  81. * @param type $methodName
  82. * @param type $query_params
  83. * @return boolean
  84. */
  85. public function supportsMethod($methodName, $query_params = null)
  86. {
  87. switch ($methodName)
  88. {
  89. case 'isValid':
  90. case 'searchSort':
  91. case 'prepareIndexes':
  92. case 'indexedWordQuery':
  93. case 'postCreated':
  94. case 'postModified':
  95. return true;
  96. break;
  97. // All other methods, too bad dunno you.
  98. default:
  99. return false;
  100. return;
  101. }
  102. }
  103. /**
  104. * If the settings don't exist we can't continue.
  105. *
  106. * @return type
  107. */
  108. public function isValid()
  109. {
  110. global $modSettings;
  111. return !empty($modSettings['search_custom_index_config']);
  112. }
  113. /**
  114. * callback function for usort used to sort the fulltext results.
  115. * the order of sorting is: large words, small words, large words that
  116. * are excluded from the search, small words that are excluded.
  117. * @param string $a Word A
  118. * @param string $b Word B
  119. * @return int
  120. */
  121. public function searchSort($a, $b)
  122. {
  123. global $modSettings, $excludedWords;
  124. $x = strlen($a) - (in_array($a, $excludedWords) ? 1000 : 0);
  125. $y = strlen($b) - (in_array($b, $excludedWords) ? 1000 : 0);
  126. return $y < $x ? 1 : ($y > $x ? -1 : 0);
  127. }
  128. /**
  129. * Do we have to do some work with the words we are searching for to prepare them?
  130. *
  131. * @param string $word
  132. * @param array $wordsSearch
  133. * @param array $wordsExclude
  134. * @param boolean $isExcluded
  135. */
  136. public function prepareIndexes($word, &$wordsSearch, &$wordsExclude, $isExcluded)
  137. {
  138. global $modSettings, $smcFunc;
  139. $subwords = text2words($word, $this->min_word_length, true);
  140. if (empty($modSettings['search_force_index']))
  141. $wordsSearch['words'][] = $word;
  142. // Excluded phrases don't benefit from being split into subwords.
  143. if (count($subwords) > 1 && $isExcluded)
  144. continue;
  145. else
  146. {
  147. foreach ($subwords as $subword)
  148. {
  149. if ($smcFunc['strlen']($subword) >= $this->min_word_length && !in_array($subword, $this->bannedWords))
  150. {
  151. $wordsSearch['indexed_words'][] = $subword;
  152. if ($isExcluded)
  153. $wordsExclude[] = $subword;
  154. }
  155. }
  156. }
  157. }
  158. /**
  159. * Search for indexed words.
  160. *
  161. * @param array $words
  162. * @param array $search_data
  163. * @return type
  164. */
  165. public function indexedWordQuery($words, $search_data)
  166. {
  167. global $modSettings, $smcFunc;
  168. $query_select = array(
  169. 'id_msg' => 'm.id_msg',
  170. );
  171. $query_inner_join = array();
  172. $query_left_join = array();
  173. $query_where = array();
  174. $query_params = $search_data['params'];
  175. if ($query_params['id_search'])
  176. $query_select['id_search'] = '{int:id_search}';
  177. $count = 0;
  178. foreach ($words['words'] as $regularWord)
  179. {
  180. $query_where[] = 'm.body' . (in_array($regularWord, $query_params['excluded_words']) ? ' NOT' : '') . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:complex_body_' . $count . '}';
  181. $query_params['complex_body_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($regularWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $regularWord), '\\\'') . '[[:>:]]';
  182. }
  183. if ($query_params['user_query'])
  184. $query_where[] = '{raw:user_query}';
  185. if ($query_params['board_query'])
  186. $query_where[] = 'm.id_board {raw:board_query}';
  187. if ($query_params['topic'])
  188. $query_where[] = 'm.id_topic = {int:topic}';
  189. if ($query_params['min_msg_id'])
  190. $query_where[] = 'm.id_msg >= {int:min_msg_id}';
  191. if ($query_params['max_msg_id'])
  192. $query_where[] = 'm.id_msg <= {int:max_msg_id}';
  193. $count = 0;
  194. if (!empty($query_params['excluded_phrases']) && empty($modSettings['search_force_index']))
  195. foreach ($query_params['excluded_phrases'] as $phrase)
  196. {
  197. $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_phrase_' . $count . '}';
  198. $query_params['exclude_subject_phrase_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($phrase, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $phrase), '\\\'') . '[[:>:]]';
  199. }
  200. $count = 0;
  201. if (!empty($query_params['excluded_subject_words']) && empty($modSettings['search_force_index']))
  202. foreach ($query_params['excluded_subject_words'] as $excludedWord)
  203. {
  204. $query_where[] = 'subject NOT ' . (empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? ' LIKE ' : ' RLIKE ') . '{string:exclude_subject_words_' . $count . '}';
  205. $query_params['exclude_subject_words_' . $count++] = empty($modSettings['search_match_words']) || $search_data['no_regexp'] ? '%' . strtr($excludedWord, array('_' => '\\_', '%' => '\\%')) . '%' : '[[:<:]]' . addcslashes(preg_replace(array('/([\[\]$.+*?|{}()])/'), array('[$1]'), $excludedWord), '\\\'') . '[[:>:]]';
  206. }
  207. $numTables = 0;
  208. $prev_join = 0;
  209. foreach ($words['indexed_words'] as $indexedWord)
  210. {
  211. $numTables++;
  212. if (in_array($indexedWord, $query_params['excluded_index_words']))
  213. {
  214. $query_left_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_word = ' . $indexedWord . ' AND lsw' . $numTables . '.id_msg = m.id_msg)';
  215. $query_where[] = '(lsw' . $numTables . '.id_word IS NULL)';
  216. }
  217. else
  218. {
  219. $query_inner_join[] = '{db_prefix}log_search_words AS lsw' . $numTables . ' ON (lsw' . $numTables . '.id_msg = ' . ($prev_join === 0 ? 'm' : 'lsw' . $prev_join) . '.id_msg)';
  220. $query_where[] = 'lsw' . $numTables . '.id_word = ' . $indexedWord;
  221. $prev_join = $numTables;
  222. }
  223. }
  224. $ignoreRequest = $smcFunc['db_search_query']('insert_into_log_messages_fulltext', ($smcFunc['db_support_ignore'] ? ( '
  225. INSERT IGNORE INTO {db_prefix}' . $search_data['insert_into'] . '
  226. (' . implode(', ', array_keys($query_select)) . ')') : '') . '
  227. SELECT ' . implode(', ', $query_select) . '
  228. FROM {db_prefix}messages AS m' . (empty($query_inner_join) ? '' : '
  229. INNER JOIN ' . implode('
  230. INNER JOIN ', $query_inner_join)) . (empty($query_left_join) ? '' : '
  231. LEFT JOIN ' . implode('
  232. LEFT JOIN ', $query_left_join)) . '
  233. WHERE ' . implode('
  234. AND ', $query_where) . (empty($search_data['max_results']) ? '' : '
  235. LIMIT ' . ($search_data['max_results'] - $search_data['indexed_results'])),
  236. $query_params
  237. );
  238. return $ignoreRequest;
  239. }
  240. /**
  241. * After a post is made, we update the search index database
  242. *
  243. * @param array $msgOptions
  244. * @param array $topicOptions
  245. * @param array $posterOptions
  246. */
  247. public function postCreated($msgOptions, $topicOptions, $posterOptions)
  248. {
  249. global $modSettings, $smcFunc;
  250. $customIndexSettings = unserialize($modSettings['search_custom_index_config']);
  251. $inserts = array();
  252. foreach (text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true) as $word)
  253. $inserts[] = array($word, $msgOptions['id']);
  254. if (!empty($inserts))
  255. $smcFunc['db_insert']('ignore',
  256. '{db_prefix}log_search_words',
  257. array('id_word' => 'int', 'id_msg' => 'int'),
  258. $inserts,
  259. array('id_word', 'id_msg')
  260. );
  261. }
  262. /**
  263. * After a post is modified, we update the search index database.
  264. *
  265. * @param array $msgOptions
  266. * @param array $topicOptions
  267. * @param array $posterOptions
  268. */
  269. public function postModified($msgOptions, $topicOptions, $posterOptions)
  270. {
  271. global $modSettings, $smcFunc;
  272. if (isset($msgOptions['body']))
  273. {
  274. $customIndexSettings = unserialize($modSettings['search_custom_index_config']);
  275. $stopwords = empty($modSettings['search_stopwords']) ? array() : explode(',', $modSettings['search_stopwords']);
  276. $old_body = isset($msgOptions['old_body']) ? $msgOptions['old_body'] : '';
  277. // create thew new and old index
  278. $old_index = text2words($old_body, $customIndexSettings['bytes_per_word'], true);
  279. $new_index = text2words($msgOptions['body'], $customIndexSettings['bytes_per_word'], true);
  280. // Calculate the words to be added and removed from the index.
  281. $removed_words = array_diff(array_diff($old_index, $new_index), $stopwords);
  282. $inserted_words = array_diff(array_diff($new_index, $old_index), $stopwords);
  283. // Delete the removed words AND the added ones to avoid key constraints.
  284. if (!empty($removed_words))
  285. {
  286. $removed_words = array_merge($removed_words, $inserted_words);
  287. $smcFunc['db_query']('', '
  288. DELETE FROM {db_prefix}log_search_words
  289. WHERE id_msg = {int:id_msg}
  290. AND id_word IN ({array_int:removed_words})',
  291. array(
  292. 'removed_words' => $removed_words,
  293. 'id_msg' => $msgOptions['id'],
  294. )
  295. );
  296. }
  297. // Add the new words to be indexed.
  298. if (!empty($inserted_words))
  299. {
  300. $inserts = array();
  301. foreach ($inserted_words as $word)
  302. $inserts[] = array($word, $msgOptions['id']);
  303. $smcFunc['db_insert']('insert',
  304. '{db_prefix}log_search_words',
  305. array('id_word' => 'string', 'id_msg' => 'int'),
  306. $inserts,
  307. array('id_word', 'id_msg')
  308. );
  309. }
  310. }
  311. }
  312. }