PageRenderTime 363ms CodeModel.GetById 43ms RepoModel.GetById 19ms app.codeStats 1ms

/phpBB/phpbb/search/fulltext_native.php

http://github.com/phpbb/phpbb
PHP | 2068 lines | 1409 code | 223 blank | 436 comment | 168 complexity | 7bddd02efb6a2be52ab39cde36ee7c87 MD5 | raw file
Possible License(s): GPL-3.0, AGPL-1.0

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. *
  4. * This file is part of the phpBB Forum Software package.
  5. *
  6. * @copyright (c) phpBB Limited <https://www.phpbb.com>
  7. * @license GNU General Public License, version 2 (GPL-2.0)
  8. *
  9. * For full copyright and license information, please see
  10. * the docs/CREDITS.txt file.
  11. *
  12. */
  13. namespace phpbb\search;
  14. /**
  15. * phpBB's own db driven fulltext search, version 2
  16. */
  17. class fulltext_native extends \phpbb\search\base
  18. {
  19. const UTF8_HANGUL_FIRST = "\xEA\xB0\x80";
  20. const UTF8_HANGUL_LAST = "\xED\x9E\xA3";
  21. const UTF8_CJK_FIRST = "\xE4\xB8\x80";
  22. const UTF8_CJK_LAST = "\xE9\xBE\xBB";
  23. const UTF8_CJK_B_FIRST = "\xF0\xA0\x80\x80";
  24. const UTF8_CJK_B_LAST = "\xF0\xAA\x9B\x96";
  25. /**
  26. * Associative array holding index stats
  27. * @var array
  28. */
  29. protected $stats = array();
  30. /**
  31. * Associative array stores the min and max word length to be searched
  32. * @var array
  33. */
  34. protected $word_length = array();
  35. /**
  36. * Contains tidied search query.
  37. * Operators are prefixed in search query and common words excluded
  38. * @var string
  39. */
  40. protected $search_query;
  41. /**
  42. * Contains common words.
  43. * Common words are words with length less/more than min/max length
  44. * @var array
  45. */
  46. protected $common_words = array();
  47. /**
  48. * Post ids of posts containing words that are to be included
  49. * @var array
  50. */
  51. protected $must_contain_ids = array();
  52. /**
  53. * Post ids of posts containing words that should not be included
  54. * @var array
  55. */
  56. protected $must_not_contain_ids = array();
  57. /**
  58. * Post ids of posts containing at least one word that needs to be excluded
  59. * @var array
  60. */
  61. protected $must_exclude_one_ids = array();
  62. /**
  63. * Relative path to board root
  64. * @var string
  65. */
  66. protected $phpbb_root_path;
  67. /**
  68. * PHP Extension
  69. * @var string
  70. */
  71. protected $php_ext;
  72. /**
  73. * Config object
  74. * @var \phpbb\config\config
  75. */
  76. protected $config;
  77. /**
  78. * Database connection
  79. * @var \phpbb\db\driver\driver_interface
  80. */
  81. protected $db;
  82. /**
  83. * phpBB event dispatcher object
  84. * @var \phpbb\event\dispatcher_interface
  85. */
  86. protected $phpbb_dispatcher;
  87. /**
  88. * User object
  89. * @var \phpbb\user
  90. */
  91. protected $user;
  92. /**
  93. * Initialises the fulltext_native search backend with min/max word length
  94. *
  95. * @param boolean|string &$error is passed by reference and should either be set to false on success or an error message on failure
  96. * @param string $phpbb_root_path phpBB root path
  97. * @param string $phpEx PHP file extension
  98. * @param \phpbb\auth\auth $auth Auth object
  99. * @param \phpbb\config\config $config Config object
  100. * @param \phpbb\db\driver\driver_interface $db Database object
  101. * @param \phpbb\user $user User object
  102. * @param \phpbb\event\dispatcher_interface $phpbb_dispatcher Event dispatcher object
  103. */
  104. public function __construct(&$error, $phpbb_root_path, $phpEx, $auth, $config, $db, $user, $phpbb_dispatcher)
  105. {
  106. $this->phpbb_root_path = $phpbb_root_path;
  107. $this->php_ext = $phpEx;
  108. $this->config = $config;
  109. $this->db = $db;
  110. $this->phpbb_dispatcher = $phpbb_dispatcher;
  111. $this->user = $user;
  112. $this->word_length = array('min' => (int) $this->config['fulltext_native_min_chars'], 'max' => (int) $this->config['fulltext_native_max_chars']);
  113. /**
  114. * Load the UTF tools
  115. */
  116. if (!function_exists('utf8_decode_ncr'))
  117. {
  118. include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext);
  119. }
  120. $error = false;
  121. }
  122. /**
  123. * Returns the name of this search backend to be displayed to administrators
  124. *
  125. * @return string Name
  126. */
  127. public function get_name()
  128. {
  129. return 'phpBB Native Fulltext';
  130. }
  131. /**
  132. * Returns the search_query
  133. *
  134. * @return string search query
  135. */
  136. public function get_search_query()
  137. {
  138. return $this->search_query;
  139. }
  140. /**
  141. * Returns the common_words array
  142. *
  143. * @return array common words that are ignored by search backend
  144. */
  145. public function get_common_words()
  146. {
  147. return $this->common_words;
  148. }
  149. /**
  150. * Returns the word_length array
  151. *
  152. * @return array min and max word length for searching
  153. */
  154. public function get_word_length()
  155. {
  156. return $this->word_length;
  157. }
  158. /**
  159. * This function fills $this->search_query with the cleaned user search query
  160. *
  161. * If $terms is 'any' then the words will be extracted from the search query
  162. * and combined with | inside brackets. They will afterwards be treated like
  163. * an standard search query.
  164. *
  165. * Then it analyses the query and fills the internal arrays $must_not_contain_ids,
  166. * $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search()
  167. *
  168. * @param string $keywords contains the search query string as entered by the user
  169. * @param string $terms is either 'all' (use search query as entered, default words to 'must be contained in post')
  170. * or 'any' (find all posts containing at least one of the given words)
  171. * @return boolean false if no valid keywords were found and otherwise true
  172. */
  173. public function split_keywords($keywords, $terms)
  174. {
  175. $tokens = '+-|()* ';
  176. $keywords = trim($this->cleanup($keywords, $tokens));
  177. // allow word|word|word without brackets
  178. if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
  179. {
  180. $keywords = '(' . $keywords . ')';
  181. }
  182. $open_bracket = $space = false;
  183. for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
  184. {
  185. if ($open_bracket !== false)
  186. {
  187. switch ($keywords[$i])
  188. {
  189. case ')':
  190. if ($open_bracket + 1 == $i)
  191. {
  192. $keywords[$i - 1] = '|';
  193. $keywords[$i] = '|';
  194. }
  195. $open_bracket = false;
  196. break;
  197. case '(':
  198. $keywords[$i] = '|';
  199. break;
  200. case '+':
  201. case '-':
  202. case ' ':
  203. $keywords[$i] = '|';
  204. break;
  205. case '*':
  206. // $i can never be 0 here since $open_bracket is initialised to false
  207. if (strpos($tokens, $keywords[$i - 1]) !== false && ($i + 1 === $n || strpos($tokens, $keywords[$i + 1]) !== false))
  208. {
  209. $keywords[$i] = '|';
  210. }
  211. break;
  212. }
  213. }
  214. else
  215. {
  216. switch ($keywords[$i])
  217. {
  218. case ')':
  219. $keywords[$i] = ' ';
  220. break;
  221. case '(':
  222. $open_bracket = $i;
  223. $space = false;
  224. break;
  225. case '|':
  226. $keywords[$i] = ' ';
  227. break;
  228. case '-':
  229. case '+':
  230. $space = $keywords[$i];
  231. break;
  232. case ' ':
  233. if ($space !== false)
  234. {
  235. $keywords[$i] = $space;
  236. }
  237. break;
  238. default:
  239. $space = false;
  240. }
  241. }
  242. }
  243. if ($open_bracket !== false)
  244. {
  245. $keywords .= ')';
  246. }
  247. $match = array(
  248. '# +#',
  249. '#\|\|+#',
  250. '#(\+|\-)(?:\+|\-)+#',
  251. '#\(\|#',
  252. '#\|\)#',
  253. );
  254. $replace = array(
  255. ' ',
  256. '|',
  257. '$1',
  258. '(',
  259. ')',
  260. );
  261. $keywords = preg_replace($match, $replace, $keywords);
  262. $num_keywords = count(explode(' ', $keywords));
  263. // We limit the number of allowed keywords to minimize load on the database
  264. if ($this->config['max_num_search_keywords'] && $num_keywords > $this->config['max_num_search_keywords'])
  265. {
  266. trigger_error($this->user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', (int) $this->config['max_num_search_keywords'], $num_keywords));
  267. }
  268. // $keywords input format: each word separated by a space, words in a bracket are not separated
  269. // the user wants to search for any word, convert the search query
  270. if ($terms == 'any')
  271. {
  272. $words = array();
  273. preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
  274. if (count($words[1]))
  275. {
  276. $keywords = '(' . implode('|', $words[1]) . ')';
  277. }
  278. }
  279. // Remove non trailing wildcards from each word to prevent a full table scan (it's now using the database index)
  280. $match = '#\*(?!$|\s)#';
  281. $replace = '$1';
  282. $keywords = preg_replace($match, $replace, $keywords);
  283. // Only allow one wildcard in the search query to limit the database load
  284. $match = '#\*#';
  285. $replace = '$1';
  286. $count_wildcards = substr_count($keywords, '*');
  287. // Reverse the string to remove all wildcards except the first one
  288. $keywords = strrev(preg_replace($match, $replace, strrev($keywords), $count_wildcards - 1));
  289. unset($count_wildcards);
  290. // set the search_query which is shown to the user
  291. $this->search_query = $keywords;
  292. $exact_words = array();
  293. preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
  294. $exact_words = $exact_words[1];
  295. $common_ids = $words = array();
  296. if (count($exact_words))
  297. {
  298. $sql = 'SELECT word_id, word_text, word_common
  299. FROM ' . SEARCH_WORDLIST_TABLE . '
  300. WHERE ' . $this->db->sql_in_set('word_text', $exact_words) . '
  301. ORDER BY word_count ASC';
  302. $result = $this->db->sql_query($sql);
  303. // store an array of words and ids, remove common words
  304. while ($row = $this->db->sql_fetchrow($result))
  305. {
  306. if ($row['word_common'])
  307. {
  308. $this->common_words[] = $row['word_text'];
  309. $common_ids[$row['word_text']] = (int) $row['word_id'];
  310. continue;
  311. }
  312. $words[$row['word_text']] = (int) $row['word_id'];
  313. }
  314. $this->db->sql_freeresult($result);
  315. }
  316. // Handle +, - without preceding whitespace character
  317. $match = array('#(\S)\+#', '#(\S)-#');
  318. $replace = array('$1 +', '$1 +');
  319. $keywords = preg_replace($match, $replace, $keywords);
  320. // now analyse the search query, first split it using the spaces
  321. $query = explode(' ', $keywords);
  322. $this->must_contain_ids = array();
  323. $this->must_not_contain_ids = array();
  324. $this->must_exclude_one_ids = array();
  325. foreach ($query as $word)
  326. {
  327. if (empty($word))
  328. {
  329. continue;
  330. }
  331. // words which should not be included
  332. if ($word[0] == '-')
  333. {
  334. $word = substr($word, 1);
  335. // a group of which at least one may not be in the resulting posts
  336. if ($word[0] == '(')
  337. {
  338. $word = array_unique(explode('|', substr($word, 1, -1)));
  339. $mode = 'must_exclude_one';
  340. }
  341. // one word which should not be in the resulting posts
  342. else
  343. {
  344. $mode = 'must_not_contain';
  345. }
  346. $ignore_no_id = true;
  347. }
  348. // words which have to be included
  349. else
  350. {
  351. // no prefix is the same as a +prefix
  352. if ($word[0] == '+')
  353. {
  354. $word = substr($word, 1);
  355. }
  356. // a group of words of which at least one word should be in every resulting post
  357. if (isset($word[0]) && $word[0] == '(')
  358. {
  359. $word = array_unique(explode('|', substr($word, 1, -1)));
  360. }
  361. $ignore_no_id = false;
  362. $mode = 'must_contain';
  363. }
  364. if (empty($word))
  365. {
  366. continue;
  367. }
  368. // if this is an array of words then retrieve an id for each
  369. if (is_array($word))
  370. {
  371. $non_common_words = array();
  372. $id_words = array();
  373. foreach ($word as $i => $word_part)
  374. {
  375. if (strpos($word_part, '*') !== false)
  376. {
  377. $len = utf8_strlen(str_replace('*', '', $word_part));
  378. if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
  379. {
  380. $id_words[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
  381. $non_common_words[] = $word_part;
  382. }
  383. else
  384. {
  385. $this->common_words[] = $word_part;
  386. }
  387. }
  388. else if (isset($words[$word_part]))
  389. {
  390. $id_words[] = $words[$word_part];
  391. $non_common_words[] = $word_part;
  392. }
  393. else
  394. {
  395. $len = utf8_strlen($word_part);
  396. if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
  397. {
  398. $this->common_words[] = $word_part;
  399. }
  400. }
  401. }
  402. if (count($id_words))
  403. {
  404. sort($id_words);
  405. if (count($id_words) > 1)
  406. {
  407. $this->{$mode . '_ids'}[] = $id_words;
  408. }
  409. else
  410. {
  411. $mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
  412. $this->{$mode . '_ids'}[] = $id_words[0];
  413. }
  414. }
  415. // throw an error if we shall not ignore unexistant words
  416. else if (!$ignore_no_id && count($non_common_words))
  417. {
  418. trigger_error(sprintf($this->user->lang['WORDS_IN_NO_POST'], implode($this->user->lang['COMMA_SEPARATOR'], $non_common_words)));
  419. }
  420. unset($non_common_words);
  421. }
  422. // else we only need one id
  423. else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
  424. {
  425. if ($wildcard)
  426. {
  427. $len = utf8_strlen(str_replace('*', '', $word));
  428. if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
  429. {
  430. $this->{$mode . '_ids'}[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word)) . '\'';
  431. }
  432. else
  433. {
  434. $this->common_words[] = $word;
  435. }
  436. }
  437. else
  438. {
  439. $this->{$mode . '_ids'}[] = $words[$word];
  440. }
  441. }
  442. else
  443. {
  444. if (!isset($common_ids[$word]))
  445. {
  446. $len = utf8_strlen($word);
  447. if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
  448. {
  449. $this->common_words[] = $word;
  450. }
  451. }
  452. }
  453. }
  454. // Return true if all words are not common words
  455. if (count($exact_words) - count($this->common_words) > 0)
  456. {
  457. return true;
  458. }
  459. return false;
  460. }
  461. /**
  462. * Performs a search on keywords depending on display specific params. You have to run split_keywords() first
  463. *
  464. * @param string $type contains either posts or topics depending on what should be searched for
  465. * @param string $fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
  466. * @param string $terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
  467. * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query
  468. * @param string $sort_key is the key of $sort_by_sql for the selected sorting
  469. * @param string $sort_dir is either a or d representing ASC and DESC
  470. * @param string $sort_days specifies the maximum amount of days a post may be old
  471. * @param array $ex_fid_ary specifies an array of forum ids which should not be searched
  472. * @param string $post_visibility specifies which types of posts the user can view in which forums
  473. * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
  474. * @param array $author_ary an array of author ids if the author should be ignored during the search the array is empty
  475. * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match
  476. * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
  477. * @param int $start indicates the first index of the page
  478. * @param int $per_page number of ids each page is supposed to contain
  479. * @return boolean|int total number of results
  480. */
  481. public function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page)
  482. {
  483. // No keywords? No posts.
  484. if (empty($this->search_query))
  485. {
  486. return false;
  487. }
  488. // we can't search for negatives only
  489. if (empty($this->must_contain_ids))
  490. {
  491. return false;
  492. }
  493. $must_contain_ids = $this->must_contain_ids;
  494. $must_not_contain_ids = $this->must_not_contain_ids;
  495. $must_exclude_one_ids = $this->must_exclude_one_ids;
  496. sort($must_contain_ids);
  497. sort($must_not_contain_ids);
  498. sort($must_exclude_one_ids);
  499. // generate a search_key from all the options to identify the results
  500. $search_key_array = array(
  501. serialize($must_contain_ids),
  502. serialize($must_not_contain_ids),
  503. serialize($must_exclude_one_ids),
  504. $type,
  505. $fields,
  506. $terms,
  507. $sort_days,
  508. $sort_key,
  509. $topic_id,
  510. implode(',', $ex_fid_ary),
  511. $post_visibility,
  512. implode(',', $author_ary),
  513. $author_name,
  514. );
  515. /**
  516. * Allow changing the search_key for cached results
  517. *
  518. * @event core.search_native_by_keyword_modify_search_key
  519. * @var array search_key_array Array with search parameters to generate the search_key
  520. * @var array must_contain_ids Array with post ids of posts containing words that are to be included
  521. * @var array must_not_contain_ids Array with post ids of posts containing words that should not be included
  522. * @var array must_exclude_one_ids Array with post ids of posts containing at least one word that needs to be excluded
  523. * @var string type Searching type ('posts', 'topics')
  524. * @var string fields Searching fields ('titleonly', 'msgonly', 'firstpost', 'all')
  525. * @var string terms Searching terms ('all', 'any')
  526. * @var int sort_days Time, in days, of the oldest possible post to list
  527. * @var string sort_key The sort type used from the possible sort types
  528. * @var int topic_id Limit the search to this topic_id only
  529. * @var array ex_fid_ary Which forums not to search on
  530. * @var string post_visibility Post visibility data
  531. * @var array author_ary Array of user_id containing the users to filter the results to
  532. * @since 3.1.7-RC1
  533. */
  534. $vars = array(
  535. 'search_key_array',
  536. 'must_contain_ids',
  537. 'must_not_contain_ids',
  538. 'must_exclude_one_ids',
  539. 'type',
  540. 'fields',
  541. 'terms',
  542. 'sort_days',
  543. 'sort_key',
  544. 'topic_id',
  545. 'ex_fid_ary',
  546. 'post_visibility',
  547. 'author_ary',
  548. );
  549. extract($this->phpbb_dispatcher->trigger_event('core.search_native_by_keyword_modify_search_key', compact($vars)));
  550. $search_key = md5(implode('#', $search_key_array));
  551. // try reading the results from cache
  552. $total_results = 0;
  553. if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
  554. {
  555. return $total_results;
  556. }
  557. $id_ary = array();
  558. $sql_where = array();
  559. $m_num = 0;
  560. $w_num = 0;
  561. $sql_array = array(
  562. 'SELECT' => ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
  563. 'FROM' => array(
  564. SEARCH_WORDMATCH_TABLE => array(),
  565. SEARCH_WORDLIST_TABLE => array(),
  566. ),
  567. 'LEFT_JOIN' => array(array(
  568. 'FROM' => array(POSTS_TABLE => 'p'),
  569. 'ON' => 'm0.post_id = p.post_id',
  570. )),
  571. );
  572. $title_match = '';
  573. $left_join_topics = false;
  574. $group_by = true;
  575. // Build some display specific sql strings
  576. switch ($fields)
  577. {
  578. case 'titleonly':
  579. $title_match = 'title_match = 1';
  580. $group_by = false;
  581. // no break
  582. case 'firstpost':
  583. $left_join_topics = true;
  584. $sql_where[] = 'p.post_id = t.topic_first_post_id';
  585. break;
  586. case 'msgonly':
  587. $title_match = 'title_match = 0';
  588. $group_by = false;
  589. break;
  590. }
  591. if ($type == 'topics')
  592. {
  593. $left_join_topics = true;
  594. $group_by = true;
  595. }
  596. /**
  597. * @todo Add a query optimizer (handle stuff like "+(4|3) +4")
  598. */
  599. foreach ($this->must_contain_ids as $subquery)
  600. {
  601. if (is_array($subquery))
  602. {
  603. $group_by = true;
  604. $word_id_sql = array();
  605. $word_ids = array();
  606. foreach ($subquery as $id)
  607. {
  608. if (is_string($id))
  609. {
  610. $sql_array['LEFT_JOIN'][] = array(
  611. 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
  612. 'ON' => "w$w_num.word_text LIKE $id"
  613. );
  614. $word_ids[] = "w$w_num.word_id";
  615. $w_num++;
  616. }
  617. else
  618. {
  619. $word_ids[] = $id;
  620. }
  621. }
  622. $sql_where[] = $this->db->sql_in_set("m$m_num.word_id", $word_ids);
  623. unset($word_id_sql);
  624. unset($word_ids);
  625. }
  626. else if (is_string($subquery))
  627. {
  628. $sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
  629. $sql_where[] = "w$w_num.word_text LIKE $subquery";
  630. $sql_where[] = "m$m_num.word_id = w$w_num.word_id";
  631. $group_by = true;
  632. $w_num++;
  633. }
  634. else
  635. {
  636. $sql_where[] = "m$m_num.word_id = $subquery";
  637. }
  638. $sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
  639. if ($title_match)
  640. {
  641. $sql_where[] = "m$m_num.$title_match";
  642. }
  643. if ($m_num != 0)
  644. {
  645. $sql_where[] = "m$m_num.post_id = m0.post_id";
  646. }
  647. $m_num++;
  648. }
  649. foreach ($this->must_not_contain_ids as $key => $subquery)
  650. {
  651. if (is_string($subquery))
  652. {
  653. $sql_array['LEFT_JOIN'][] = array(
  654. 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
  655. 'ON' => "w$w_num.word_text LIKE $subquery"
  656. );
  657. $this->must_not_contain_ids[$key] = "w$w_num.word_id";
  658. $group_by = true;
  659. $w_num++;
  660. }
  661. }
  662. if (count($this->must_not_contain_ids))
  663. {
  664. $sql_array['LEFT_JOIN'][] = array(
  665. 'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
  666. 'ON' => $this->db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
  667. );
  668. $sql_where[] = "m$m_num.word_id IS NULL";
  669. $m_num++;
  670. }
  671. foreach ($this->must_exclude_one_ids as $ids)
  672. {
  673. $is_null_joins = array();
  674. foreach ($ids as $id)
  675. {
  676. if (is_string($id))
  677. {
  678. $sql_array['LEFT_JOIN'][] = array(
  679. 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
  680. 'ON' => "w$w_num.word_text LIKE $id"
  681. );
  682. $id = "w$w_num.word_id";
  683. $group_by = true;
  684. $w_num++;
  685. }
  686. $sql_array['LEFT_JOIN'][] = array(
  687. 'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
  688. 'ON' => "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
  689. );
  690. $is_null_joins[] = "m$m_num.word_id IS NULL";
  691. $m_num++;
  692. }
  693. $sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
  694. }
  695. $sql_where[] = $post_visibility;
  696. $search_query = $this->search_query;
  697. $must_exclude_one_ids = $this->must_exclude_one_ids;
  698. $must_not_contain_ids = $this->must_not_contain_ids;
  699. $must_contain_ids = $this->must_contain_ids;
  700. $sql_sort_table = $sql_sort_join = $sql_match = $sql_match_where = $sql_sort = '';
  701. /**
  702. * Allow changing the query used for counting for posts using fulltext_native
  703. *
  704. * @event core.search_native_keywords_count_query_before
  705. * @var string search_query The parsed keywords used for this search
  706. * @var array must_not_contain_ids Ids that cannot be taken into account for the results
  707. * @var array must_exclude_one_ids Ids that cannot be on the results
  708. * @var array must_contain_ids Ids that must be on the results
  709. * @var int total_results The previous result count for the format of the query
  710. * Set to 0 to force a re-count
  711. * @var array sql_array The data on how to search in the DB at this point
  712. * @var bool left_join_topics Whether or not TOPICS_TABLE should be CROSS JOIN'ED
  713. * @var array author_ary Array of user_id containing the users to filter the results to
  714. * @var string author_name An extra username to search on (!empty(author_ary) must be true, to be relevant)
  715. * @var array ex_fid_ary Which forums not to search on
  716. * @var int topic_id Limit the search to this topic_id only
  717. * @var string sql_sort_table Extra tables to include in the SQL query.
  718. * Used in conjunction with sql_sort_join
  719. * @var string sql_sort_join SQL conditions to join all the tables used together.
  720. * Used in conjunction with sql_sort_table
  721. * @var int sort_days Time, in days, of the oldest possible post to list
  722. * @var string sql_where An array of the current WHERE clause conditions
  723. * @var string sql_match Which columns to do the search on
  724. * @var string sql_match_where Extra conditions to use to properly filter the matching process
  725. * @var bool group_by Whether or not the SQL query requires a GROUP BY for the elements in the SELECT clause
  726. * @var string sort_by_sql The possible predefined sort types
  727. * @var string sort_key The sort type used from the possible sort types
  728. * @var string sort_dir "a" for ASC or "d" dor DESC for the sort order used
  729. * @var string sql_sort The result SQL when processing sort_by_sql + sort_key + sort_dir
  730. * @var int start How many posts to skip in the search results (used for pagination)
  731. * @since 3.1.5-RC1
  732. */
  733. $vars = array(
  734. 'search_query',
  735. 'must_not_contain_ids',
  736. 'must_exclude_one_ids',
  737. 'must_contain_ids',
  738. 'total_results',
  739. 'sql_array',
  740. 'left_join_topics',
  741. 'author_ary',
  742. 'author_name',
  743. 'ex_fid_ary',
  744. 'topic_id',
  745. 'sql_sort_table',
  746. 'sql_sort_join',
  747. 'sort_days',
  748. 'sql_where',
  749. 'sql_match',
  750. 'sql_match_where',
  751. 'group_by',
  752. 'sort_by_sql',
  753. 'sort_key',
  754. 'sort_dir',
  755. 'sql_sort',
  756. 'start',
  757. );
  758. extract($this->phpbb_dispatcher->trigger_event('core.search_native_keywords_count_query_before', compact($vars)));
  759. if ($topic_id)
  760. {
  761. $sql_where[] = 'p.topic_id = ' . $topic_id;
  762. }
  763. if (count($author_ary))
  764. {
  765. if ($author_name)
  766. {
  767. // first one matches post of registered users, second one guests and deleted users
  768. $sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
  769. }
  770. else
  771. {
  772. $sql_author = $this->db->sql_in_set('p.poster_id', $author_ary);
  773. }
  774. $sql_where[] = $sql_author;
  775. }
  776. if (count($ex_fid_ary))
  777. {
  778. $sql_where[] = $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true);
  779. }
  780. if ($sort_days)
  781. {
  782. $sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
  783. }
  784. $sql_array['WHERE'] = implode(' AND ', $sql_where);
  785. $is_mysql = false;
  786. // if the total result count is not cached yet, retrieve it from the db
  787. if (!$total_results)
  788. {
  789. $sql = '';
  790. $sql_array_count = $sql_array;
  791. if ($left_join_topics)
  792. {
  793. $sql_array_count['LEFT_JOIN'][] = array(
  794. 'FROM' => array(TOPICS_TABLE => 't'),
  795. 'ON' => 'p.topic_id = t.topic_id'
  796. );
  797. }
  798. switch ($this->db->get_sql_layer())
  799. {
  800. case 'mysqli':
  801. // 3.x does not support SQL_CALC_FOUND_ROWS
  802. // $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
  803. $is_mysql = true;
  804. break;
  805. case 'sqlite3':
  806. $sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
  807. $sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
  808. FROM (' . $this->db->sql_build_query('SELECT', $sql_array_count) . ')';
  809. // no break
  810. default:
  811. $sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
  812. $sql = (!$sql) ? $this->db->sql_build_query('SELECT', $sql_array_count) : $sql;
  813. $result = $this->db->sql_query($sql);
  814. $total_results = (int) $this->db->sql_fetchfield('total_results');
  815. $this->db->sql_freeresult($result);
  816. if (!$total_results)
  817. {
  818. return false;
  819. }
  820. break;
  821. }
  822. unset($sql_array_count, $sql);
  823. }
  824. // Build sql strings for sorting
  825. $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
  826. switch ($sql_sort[0])
  827. {
  828. case 'u':
  829. $sql_array['FROM'][USERS_TABLE] = 'u';
  830. $sql_where[] = 'u.user_id = p.poster_id ';
  831. break;
  832. case 't':
  833. $left_join_topics = true;
  834. break;
  835. case 'f':
  836. $sql_array['FROM'][FORUMS_TABLE] = 'f';
  837. $sql_where[] = 'f.forum_id = p.forum_id';
  838. break;
  839. }
  840. if ($left_join_topics)
  841. {
  842. $sql_array['LEFT_JOIN'][] = array(
  843. 'FROM' => array(TOPICS_TABLE => 't'),
  844. 'ON' => 'p.topic_id = t.topic_id'
  845. );
  846. }
  847. // if using mysql and the total result count is not calculated yet, get it from the db
  848. if (!$total_results && $is_mysql)
  849. {
  850. // Also count rows for the query as if there was not LIMIT. Add SQL_CALC_FOUND_ROWS to SQL
  851. $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
  852. }
  853. $sql_array['WHERE'] = implode(' AND ', $sql_where);
  854. $sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
  855. $sql_array['ORDER_BY'] = $sql_sort;
  856. unset($sql_where, $sql_sort, $group_by);
  857. $sql = $this->db->sql_build_query('SELECT', $sql_array);
  858. $result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
  859. while ($row = $this->db->sql_fetchrow($result))
  860. {
  861. $id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
  862. }
  863. $this->db->sql_freeresult($result);
  864. if (!$total_results && $is_mysql)
  865. {
  866. // Get the number of results as calculated by MySQL
  867. $sql_count = 'SELECT FOUND_ROWS() as total_results';
  868. $result = $this->db->sql_query($sql_count);
  869. $total_results = (int) $this->db->sql_fetchfield('total_results');
  870. $this->db->sql_freeresult($result);
  871. if (!$total_results)
  872. {
  873. return false;
  874. }
  875. }
  876. if ($start >= $total_results)
  877. {
  878. $start = floor(($total_results - 1) / $per_page) * $per_page;
  879. $result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
  880. while ($row = $this->db->sql_fetchrow($result))
  881. {
  882. $id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
  883. }
  884. $this->db->sql_freeresult($result);
  885. }
  886. // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
  887. $this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
  888. $id_ary = array_slice($id_ary, 0, (int) $per_page);
  889. return $total_results;
  890. }
  891. /**
  892. * Performs a search on an author's posts without caring about message contents. Depends on display specific params
  893. *
  894. * @param string $type contains either posts or topics depending on what should be searched for
  895. * @param boolean $firstpost_only if true, only topic starting posts will be considered
  896. * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query
  897. * @param string $sort_key is the key of $sort_by_sql for the selected sorting
  898. * @param string $sort_dir is either a or d representing ASC and DESC
  899. * @param string $sort_days specifies the maximum amount of days a post may be old
  900. * @param array $ex_fid_ary specifies an array of forum ids which should not be searched
  901. * @param string $post_visibility specifies which types of posts the user can view in which forums
  902. * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
  903. * @param array $author_ary an array of author ids
  904. * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match
  905. * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
  906. * @param int $start indicates the first index of the page
  907. * @param int $per_page number of ids each page is supposed to contain
  908. * @return boolean|int total number of results
  909. */
  910. public function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page)
  911. {
  912. // No author? No posts
  913. if (!count($author_ary))
  914. {
  915. return 0;
  916. }
  917. // generate a search_key from all the options to identify the results
  918. $search_key_array = array(
  919. '',
  920. $type,
  921. ($firstpost_only) ? 'firstpost' : '',
  922. '',
  923. '',
  924. $sort_days,
  925. $sort_key,
  926. $topic_id,
  927. implode(',', $ex_fid_ary),
  928. $post_visibility,
  929. implode(',', $author_ary),
  930. $author_name,
  931. );
  932. /**
  933. * Allow changing the search_key for cached results
  934. *
  935. * @event core.search_native_by_author_modify_search_key
  936. * @var array search_key_array Array with search parameters to generate the search_key
  937. * @var string type Searching type ('posts', 'topics')
  938. * @var boolean firstpost_only Flag indicating if only topic starting posts are considered
  939. * @var int sort_days Time, in days, of the oldest possible post to list
  940. * @var string sort_key The sort type used from the possible sort types
  941. * @var int topic_id Limit the search to this topic_id only
  942. * @var array ex_fid_ary Which forums not to search on
  943. * @var string post_visibility Post visibility data
  944. * @var array author_ary Array of user_id containing the users to filter the results to
  945. * @var string author_name The username to search on
  946. * @since 3.1.7-RC1
  947. */
  948. $vars = array(
  949. 'search_key_array',
  950. 'type',
  951. 'firstpost_only',
  952. 'sort_days',
  953. 'sort_key',
  954. 'topic_id',
  955. 'ex_fid_ary',
  956. 'post_visibility',
  957. 'author_ary',
  958. 'author_name',
  959. );
  960. extract($this->phpbb_dispatcher->trigger_event('core.search_native_by_author_modify_search_key', compact($vars)));
  961. $search_key = md5(implode('#', $search_key_array));
  962. // try reading the results from cache
  963. $total_results = 0;
  964. if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
  965. {
  966. return $total_results;
  967. }
  968. $id_ary = array();
  969. // Create some display specific sql strings
  970. if ($author_name)
  971. {
  972. // first one matches post of registered users, second one guests and deleted users
  973. $sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
  974. }
  975. else
  976. {
  977. $sql_author = $this->db->sql_in_set('p.poster_id', $author_ary);
  978. }
  979. $sql_fora = (count($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
  980. $sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
  981. $sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
  982. $sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
  983. $post_visibility = ($post_visibility) ? ' AND ' . $post_visibility : '';
  984. // Build sql strings for sorting
  985. $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
  986. $sql_sort_table = $sql_sort_join = '';
  987. switch ($sql_sort[0])
  988. {
  989. case 'u':
  990. $sql_sort_table = USERS_TABLE . ' u, ';
  991. $sql_sort_join = ' AND u.user_id = p.poster_id ';
  992. break;
  993. case 't':
  994. $sql_sort_table = ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
  995. $sql_sort_join = ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
  996. break;
  997. case 'f':
  998. $sql_sort_table = FORUMS_TABLE . ' f, ';
  999. $sql_sort_join = ' AND f.forum_id = p.forum_id ';
  1000. break;
  1001. }
  1002. $select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
  1003. $is_mysql = false;
  1004. /**
  1005. * Allow changing the query used to search for posts by author in fulltext_native
  1006. *
  1007. * @event core.search_native_author_count_query_before
  1008. * @var int total_results The previous result count for the format of the query.
  1009. * Set to 0 to force a re-count
  1010. * @var string type The type of search being made
  1011. * @var string select SQL SELECT clause for what to get
  1012. * @var string sql_sort_table CROSS JOIN'ed table to allow doing the sort chosen
  1013. * @var string sql_sort_join Condition to define how to join the CROSS JOIN'ed table specifyed in sql_sort_table
  1014. * @var array sql_author SQL WHERE condition for the post author ids
  1015. * @var int topic_id Limit the search to this topic_id only
  1016. * @var string sort_by_sql The possible predefined sort types
  1017. * @var string sort_key The sort type used from the possible sort types
  1018. * @var string sort_dir "a" for ASC or "d" dor DESC for the sort order used
  1019. * @var string sql_sort The result SQL when processing sort_by_sql + sort_key + sort_dir
  1020. * @var string sort_days Time, in days, that the oldest post showing can have
  1021. * @var string sql_time The SQL to search on the time specifyed by sort_days
  1022. * @var bool firstpost_only Wether or not to search only on the first post of the topics
  1023. * @var string sql_firstpost The SQL used in the WHERE claused to filter by firstpost.
  1024. * @var array ex_fid_ary Forum ids that must not be searched on
  1025. * @var array sql_fora SQL query for ex_fid_ary
  1026. * @var int start How many posts to skip in the search results (used for pagination)
  1027. * @since 3.1.5-RC1
  1028. */
  1029. $vars = array(
  1030. 'total_results',
  1031. 'type',
  1032. 'select',
  1033. 'sql_sort_table',
  1034. 'sql_sort_join',
  1035. 'sql_author',
  1036. 'topic_id',
  1037. 'sort_by_sql',
  1038. 'sort_key',
  1039. 'sort_dir',
  1040. 'sql_sort',
  1041. 'sort_days',
  1042. 'sql_time',
  1043. 'firstpost_only',
  1044. 'sql_firstpost',
  1045. 'ex_fid_ary',
  1046. 'sql_fora',
  1047. 'start',
  1048. );
  1049. extract($this->phpbb_dispatcher->trigger_event('core.search_native_author_count_query_before', compact($vars)));
  1050. // If the cache was completely empty count the results
  1051. if (!$total_results)
  1052. {
  1053. switch ($this->db->get_sql_layer())
  1054. {
  1055. case 'mysqli':
  1056. // $select = 'SQL_CALC_FOUND_ROWS ' . $select;
  1057. $is_mysql = true;
  1058. break;
  1059. default:
  1060. if ($type == 'posts')
  1061. {
  1062. $sql = 'SELECT COUNT(p.post_id) as total_results
  1063. FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
  1064. WHERE $sql_author
  1065. $sql_topic_id
  1066. $sql_firstpost
  1067. $post_visibility
  1068. $sql_fora
  1069. $sql_time";
  1070. }
  1071. else
  1072. {
  1073. if ($this->db->get_sql_layer() == 'sqlite3')
  1074. {
  1075. $sql = 'SELECT COUNT(topic_id) as total_results
  1076. FROM (SELECT DISTINCT t.topic_id';
  1077. }
  1078. else
  1079. {
  1080. $sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
  1081. }
  1082. $sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
  1083. WHERE $sql_author
  1084. $sql_topic_id
  1085. $sql_firstpost
  1086. $post_visibility
  1087. $sql_fora
  1088. AND t.topic_id = p.topic_id
  1089. $sql_time" . ($this->db->get_sql_layer() == 'sqlite3' ? ')' : '');
  1090. }
  1091. $result = $this->db->sql_query($sql);
  1092. $total_results = (int) $this->db->sql_fetchfield('total_results');
  1093. $this->db->sql_freeresult($result);
  1094. if (!$total_results)
  1095. {
  1096. return false;
  1097. }
  1098. break;
  1099. }
  1100. }
  1101. // Build the query for really selecting the post_ids
  1102. if ($type == 'posts')
  1103. {
  1104. $sql = "SELECT $select
  1105. FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
  1106. WHERE $sql_author
  1107. $sql_topic_id
  1108. $sql_firstpost
  1109. $post_visibility
  1110. $sql_fora
  1111. $sql_sort_join
  1112. $sql_time
  1113. ORDER BY $sql_sort";
  1114. $field = 'post_id';
  1115. }
  1116. else
  1117. {
  1118. $sql = "SELECT $select
  1119. FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
  1120. WHERE $sql_author
  1121. $sql_topic_id
  1122. $sql_firstpost
  1123. $post_visibility
  1124. $sql_fora
  1125. AND t.topic_id = p.topic_id
  1126. $sql_sort_join
  1127. $sql_time
  1128. GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
  1129. ORDER BY ' . $sql_sort;
  1130. $field = 'topic_id';
  1131. }
  1132. // Only read one block of posts from the db and then cache it
  1133. $result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
  1134. while ($row = $this->db->sql_fetchrow($result))
  1135. {
  1136. $id_ary[] = (int) $row[$field];
  1137. }
  1138. $this->db->sql_freeresult($result);
  1139. if (!$total_results && $is_mysql)
  1140. {
  1141. // Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
  1142. $sql_calc = str_replace('SELECT ' . $select, 'SELECT SQL_CALC_FOUND_ROWS ' . $select, $sql);
  1143. $result = $this->db->sql_query($sql_calc);
  1144. $this->db->sql_freeresult($result);
  1145. $sql_count = 'SELECT FOUND_ROWS() as total_results';
  1146. $result = $this->db->sql_query($sql_count);
  1147. $total_results = (int) $this->db->sql_fetchfield('total_results');
  1148. $this->db->sql_freeresult($result);
  1149. if (!$total_results)
  1150. {
  1151. return false;
  1152. }
  1153. }
  1154. if ($start >= $total_results)
  1155. {
  1156. $start = floor(($total_results - 1) / $per_page) * $per_page;
  1157. $result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
  1158. while ($row = $this->db->sql_fetchrow($result))
  1159. {
  1160. $id_ary[] = (int) $row[$field];
  1161. }
  1162. $this->db->sql_freeresult($result);
  1163. }
  1164. if (count($id_ary))
  1165. {
  1166. $this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
  1167. $id_ary = array_slice($id_ary, 0, $per_page);
  1168. return $total_results;
  1169. }
  1170. return false;
  1171. }
  1172. /**
  1173. * Split a text into words of a given length
  1174. *
  1175. * The text is converted to UTF-8, cleaned up, and split. Then, words that
  1176. * conform to the defined length range are returned in an array.
  1177. *
  1178. * NOTE: duplicates are NOT removed from the return array
  1179. *
  1180. * @param string $text Text to split, encoded in UTF-8
  1181. * @return array Array of UTF-8 words
  1182. */
  1183. public function split_message($text)
  1184. {
  1185. $match = $words = array();
  1186. /**
  1187. * Taken from the original code
  1188. */
  1189. // Do not index code
  1190. $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
  1191. // BBcode
  1192. $match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
  1193. $min = $this->word_length['min'];
  1194. $isset_min = $min - 1;
  1195. /**
  1196. * Clean up the string, remove HTML tags, remove BBCodes
  1197. */
  1198. $word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
  1199. while (strlen($word))
  1200. {
  1201. if (strlen($word) > 255 || strlen($word) <= $isset_min)
  1202. {
  1203. /**
  1204. * Words longer than 255 bytes are ignored. This will have to be
  1205. * changed whenever we change the length of search_wordlist.word_text
  1206. *
  1207. * Words shorter than $isset_min bytes are ignored, too
  1208. */
  1209. $word = strtok(' ');
  1210. continue;
  1211. }
  1212. $len = utf8_strlen($word);
  1213. /**
  1214. * Test whether the word is too short to be indexed.
  1215. *
  1216. * Note that this limit does NOT apply to CJK and Hangul
  1217. */
  1218. if ($len < $min)
  1219. {
  1220. /**
  1221. * Note: this could be optimized. If the codepoint is lower than Hangul's range
  1222. * we know that it will also be lower than CJK ranges
  1223. */
  1224. if ((strncmp($word, self::UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, self::UTF8_HANGUL_LAST, 3) > 0)
  1225. && (strncmp($word, self::UTF8_CJK_FIRST, 3) < 0 || strncmp($word, self::UTF8_CJK_LAST, 3) > 0)
  1226. && (strncmp($word, self::UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, self::UTF8_CJK_B_LAST, 4) > 0))
  1227. {
  1228. $word = strtok(' ');
  1229. continue;
  1230. }
  1231. }
  1232. $words[] = $word;
  1233. $word = strtok(' ');
  1234. }
  1235. return $words;
  1236. }
  1237. /**
  1238. * Updates wordlist and wordmatch tables when a message is posted or changed
  1239. *
  1240. * @param string $mode Contains the post mode: edit, post, reply, quote
  1241. * @param int $post_id The id of the post which is modified/created
  1242. * @param string &$message New or updated post content
  1243. * @param string &$subject New or updated post subject
  1244. * @param int $poster_id Post author's user id
  1245. * @param int $forum_id The id of the forum in which the post is located
  1246. */
  1247. public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
  1248. {
  1249. if (!$this->config['fulltext_native_load_upd'])
  1250. {
  1251. /**
  1252. * The search indexer is disabled, return
  1253. */
  1254. return;
  1255. }
  1256. // Split old and new post/subject to obtain array of 'words'
  1257. $split_text = $this->split_message($message);
  1258. $split_title = $this->split_message($subject);
  1259. $cur_words = array('post' => array(), 'title' => array());
  1260. $words = array();
  1261. if ($mode == 'edit')
  1262. {
  1263. $words['add']['post'] = array();
  1264. $words['add']['title'] = array();
  1265. $words['del']['post'] = array();
  1266. $words['del']['title'] = array();
  1267. $sql = 'SELECT w.word_id, w.word_text, m.title_match
  1268. FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
  1269. WHERE m.post_id = $post_id
  1270. AND w.word_id = m.word_id";
  1271. $result = $this->db->sql_query($sql);
  1272. while ($row = $this->db->sql_fetchrow($result))
  1273. {
  1274. $which = ($row['title_match']) ? 'title' : 'post';
  1275. $cur_words[$which][$row['word_text']] = $row['word_id'];
  1276. }
  1277. $this->db->sql_freeresult($result);
  1278. $words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
  1279. $words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
  1280. $words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
  1281. $words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
  1282. }
  1283. else
  1284. {
  1285. $words['add']['post'] = $split_text;
  1286. $words['add']['title'] = $split_title;
  1287. $words['del']['post'] = array();
  1288. $words['del']['title'] = array();
  1289. }
  1290. /**
  1291. * Event to modify method arguments and words before the native search index is updated
  1292. *
  1293. * @event core.search_native_index_before
  1294. * @var string mode Contains the post mode: edit, post, reply, quote
  1295. * @var int post_id The id of the post which is modified/created
  1296. * @var string message New or updated post content
  1297. * @var string subject New or updated post subject
  1298. * @var int poster_id Post author's user id
  1299. * @var int forum_id The id of the forum in which the post is located
  1300. * @var array words Grouped lists of words added to or remove from the index
  1301. * @var array split_text Array of words from the message
  1302. * @var array split_title Array of words from the title
  1303. * @var array cur_words Array of words currently in the index for comparing to new words
  1304. * when mode is edit. Empty for other modes.
  1305. * @since 3.2.3-RC1
  1306. */
  1307. $vars = array(
  1308. 'mode',
  1309. 'post_id',
  1310. 'message',
  1311. 'subject',
  1312. 'poster_id',
  1313. 'forum_id',
  1314. 'words',
  1315. 'split_text',
  1316. 'split_title',
  1317. 'cur_words',
  1318. );
  1319. extract($this->phpbb_dispatcher->trigger_event('core.search_native_index_before', compact($vars)));
  1320. unset($split_text);
  1321. unset($split_title);
  1322. // Get unique words from the above arrays
  1323. $unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
  1324. // We now have unique arrays of all words to be added and removed and
  1325. // individual arrays of added and removed words for text and title. What
  1326. // we need to do now is add the new words (if they don't already exist)
  1327. // and then add (or remove) matches between the words and this post
  1328. if (count($unique_add_words))
  1329. {
  1330. $sql = 'SELECT word_id, word_text
  1331. FROM ' . SEARCH_WORDLIST_TABLE . '
  1332. WHERE ' . $this->db->sql_in_set('word_text', $unique_add_words);
  1333. $result = $this->db->sql_query($sql);
  1334. $word_ids = array();
  1335. while ($row = $this->db->sql_fetchrow($result))
  1336. {
  1337. $word_ids[$row['word_text']] = $row['word_id'];
  1338. }
  1339. $this->db->sql_freeresult($result);
  1340. $new_words = array_diff($unique_add_words, array_keys($word_ids));
  1341. $this->db->sql_transaction('begin');
  1342. if (count($new_words))
  1343. {
  1344. $sql_ary = array();
  1345. foreach ($new_words as $word)
  1346. {
  1347. $sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
  1348. }
  1349. $this->db->sql_return_on_error(true);
  1350. $this->db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
  1351. $this->db->sql_return_on_error(false);
  1352. }
  1353. unset($new_words, $sql_ary);
  1354. }
  1355. else
  1356. {
  1357. $this->db->sql_transaction('begin');
  1358. }
  1359. // now update the search match table, remove links to removed words and add links to new words
  1360. foreach ($words['del'] as $word_in => $word_ary)
  1361. {
  1362. $title_match = ($word_in == 'title') ? 1 : 0;
  1363. if (count($word_ary))
  1364. {
  1365. $sql_in = array();
  1366. foreach ($word_ary as $word)
  1367. {
  1368. $sql_in[] = $cur_words[$word_in][$word];
  1369. }
  1370. $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
  1371. WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . '
  1372. AND post_id = ' . intval($post_id) . "
  1373. AND title_match = $title_match";
  1374. $this->db->sql_query($sql);
  1375. $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
  1376. SET word_count = word_count - 1
  1377. WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . '
  1378. AND word_count > 0';
  1379. $this->db->sql_query($sql);
  1380. unset($sql_in);
  1381. }
  1382. }
  1383. $this->db->sql_return_on_error(true);
  1384. foreach ($words['add'] as $word_in => $word_ary)
  1385. {
  1386. $title_match = ($word_in == 'title') ? 1 : 0;
  1387. if (count($word_ary))
  1388. {
  1389. $sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
  1390. SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
  1391. FROM ' . SEARCH_WORDLIST_TABLE . '
  1392. WHERE ' . $this->db->sql_in_set('word_text', $word_ary);
  1393. $this->db->sql_query($sql);
  1394. $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
  1395. SET word_count = word_count + 1
  1396. WHERE ' . $this->db->sql_in_set('word_text', $word_ary);
  1397. $this->db->sql_query($sql);
  1398. }
  1399. }
  1400. $this->db->sql_return_on_error(false);
  1401. $this->db->sql_transaction('commit');
  1402. // destroy cached search results containing any of the words removed or added
  1403. $this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
  1404. unset($unique_add_words);
  1405. unset($words);
  1406. unset($cur_words);
  1407. }
  1408. /**
  1409. * Removes entries from the wordmatch table for the specified post_ids
  1410. */
  1411. public function index_remove($post_ids, $author_ids, $forum_ids)
  1412. {
  1413. if (count($post_ids))
  1414. {
  1415. $sql = 'SELECT w.word_id, w.word_text, m.title_match
  1416. FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
  1417. WHERE ' . $this->db->sql_in_set('m.post_id', $post_ids) . '
  1418. AND w.word_id = m.word_id';
  1419. $result = $this->db->sql_query($sql);
  1420. $message_word_ids = $title_word_ids = $word_texts = array();
  1421. while ($row = $this->db->sql_fetchrow($result))
  1422. {
  1423. if ($row['title_match'])
  1424. {
  1425. $title_word_ids[] = $row['word_id'];
  1426. }
  1427. else
  1428. {
  1429. $message_word_ids[] = $row['word_id'];
  1430. }
  1431. $word_texts[] = $row['word_text'];
  1432. }
  1433. $this->db->sql_freeresult($result);
  1434. if (count($title_word_ids))
  1435. {
  1436. $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
  1437. SET word_count = word_count - 1
  1438. WHERE ' . $this->db->sql_in_set('word_id', $title_word_ids) . '
  1439. AND word_count > 0';
  1440. $this->db->sql_query($sql);
  1441. }
  1442. if (count($message_word_ids))
  1443. {
  1444. $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
  1445. SET word_count = word_count - 1
  1446. WHERE ' . $this->db->sql_in_set('word_id', $message_word_ids) . '
  1447. AND word_count > 0';
  1448. $this->db->sql_query($sql);
  1449. }
  1450. unset($title_word_ids);
  1451. unset($message_word_ids);
  1452. $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
  1453. WHERE ' . $this->db->sql_in_set('post_id', $post_ids);
  1454. $this->db->sql_query($sql);
  1455. }
  1456. $this->destroy_cache(array_unique($word_texts), array_unique($author_ids));
  1457. }
  1458. /**

Large files files are truncated, but you can click here to view the full file