PageRenderTime 260ms CodeModel.GetById 81ms app.highlight 125ms RepoModel.GetById 36ms app.codeStats 1ms

/wwwroot/phpbb/phpbb/search/fulltext_native.php

https://github.com/spring/spring-website
PHP | 1920 lines | 1343 code | 209 blank | 368 comment | 173 complexity | 38d86ec76d4da1798fcc987429553569 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1<?php
   2/**
   3*
   4* This file is part of the phpBB Forum Software package.
   5*
   6* @copyright (c) phpBB Limited <https://www.phpbb.com>
   7* @license GNU General Public License, version 2 (GPL-2.0)
   8*
   9* For full copyright and license information, please see
  10* the docs/CREDITS.txt file.
  11*
  12*/
  13
  14namespace phpbb\search;
  15
  16/**
  17* phpBB's own db driven fulltext search, version 2
  18*/
  19class fulltext_native extends \phpbb\search\base
  20{
  21	/**
  22	 * Associative array holding index stats
  23	 * @var array
  24	 */
  25	protected $stats = array();
  26
  27	/**
  28	 * Associative array stores the min and max word length to be searched
  29	 * @var array
  30	 */
  31	protected $word_length = array();
  32
  33	/**
  34	 * Contains tidied search query.
  35	 * Operators are prefixed in search query and common words excluded
  36	 * @var string
  37	 */
  38	protected $search_query;
  39
  40	/**
  41	 * Contains common words.
  42	 * Common words are words with length less/more than min/max length
  43	 * @var array
  44	 */
  45	protected $common_words = array();
  46
  47	/**
  48	 * Post ids of posts containing words that are to be included
  49	 * @var array
  50	 */
  51	protected $must_contain_ids = array();
  52
  53	/**
  54	 * Post ids of posts containing words that should not be included
  55	 * @var array
  56	 */
  57	protected $must_not_contain_ids = array();
  58
  59	/**
  60	 * Post ids of posts containing atleast one word that needs to be excluded
  61	 * @var array
  62	 */
  63	protected $must_exclude_one_ids = array();
  64
  65	/**
  66	 * Relative path to board root
  67	 * @var string
  68	 */
  69	protected $phpbb_root_path;
  70
  71	/**
  72	 * PHP Extension
  73	 * @var string
  74	 */
  75	protected $php_ext;
  76
  77	/**
  78	 * Config object
  79	 * @var \phpbb\config\config
  80	 */
  81	protected $config;
  82
  83	/**
  84	 * Database connection
  85	 * @var \phpbb\db\driver\driver_interface
  86	 */
  87	protected $db;
  88
  89	/**
  90	 * phpBB event dispatcher object
  91	 * @var \phpbb\event\dispatcher_interface
  92	 */
  93	protected $phpbb_dispatcher;
  94
  95	/**
  96	 * User object
  97	 * @var \phpbb\user
  98	 */
  99	protected $user;
 100
 101	/**
 102	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded
 103	*
 104	* @param	boolean|string	&$error	is passed by reference and should either be set to false on success or an error message on failure
 105	* @param	\phpbb\event\dispatcher_interface	$phpbb_dispatcher	Event dispatcher object
 106	*/
 107	public function __construct(&$error, $phpbb_root_path, $phpEx, $auth, $config, $db, $user, $phpbb_dispatcher)
 108	{
 109		$this->phpbb_root_path = $phpbb_root_path;
 110		$this->php_ext = $phpEx;
 111		$this->config = $config;
 112		$this->db = $db;
 113		$this->phpbb_dispatcher = $phpbb_dispatcher;
 114		$this->user = $user;
 115
 116		$this->word_length = array('min' => $this->config['fulltext_native_min_chars'], 'max' => $this->config['fulltext_native_max_chars']);
 117
 118		/**
 119		* Load the UTF tools
 120		*/
 121		if (!class_exists('utf_normalizer'))
 122		{
 123			include($this->phpbb_root_path . 'includes/utf/utf_normalizer.' . $this->php_ext);
 124		}
 125		if (!function_exists('utf8_decode_ncr'))
 126		{
 127			include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext);
 128		}
 129
 130		$error = false;
 131	}
 132
 133	/**
 134	* Returns the name of this search backend to be displayed to administrators
 135	*
 136	* @return string Name
 137	*/
 138	public function get_name()
 139	{
 140		return 'phpBB Native Fulltext';
 141	}
 142
 143	/**
 144	 * Returns the search_query
 145	 *
 146	 * @return string search query
 147	 */
 148	public function get_search_query()
 149	{
 150		return $this->search_query;
 151	}
 152
 153	/**
 154	 * Returns the common_words array
 155	 *
 156	 * @return array common words that are ignored by search backend
 157	 */
 158	public function get_common_words()
 159	{
 160		return $this->common_words;
 161	}
 162
 163	/**
 164	 * Returns the word_length array
 165	 *
 166	 * @return array min and max word length for searching
 167	 */
 168	public function get_word_length()
 169	{
 170		return $this->word_length;
 171	}
 172
 173	/**
 174	* This function fills $this->search_query with the cleaned user search query
 175	*
 176	* If $terms is 'any' then the words will be extracted from the search query
 177	* and combined with | inside brackets. They will afterwards be treated like
 178	* an standard search query.
 179	*
 180	* Then it analyses the query and fills the internal arrays $must_not_contain_ids,
 181	* $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search()
 182	*
 183	* @param	string	$keywords	contains the search query string as entered by the user
 184	* @param	string	$terms		is either 'all' (use search query as entered, default words to 'must be contained in post')
 185	* 	or 'any' (find all posts containing at least one of the given words)
 186	* @return	boolean				false if no valid keywords were found and otherwise true
 187	*/
 188	public function split_keywords($keywords, $terms)
 189	{
 190		$tokens = '+-|()*';
 191
 192		$keywords = trim($this->cleanup($keywords, $tokens));
 193
 194		// allow word|word|word without brackets
 195		if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
 196		{
 197			$keywords = '(' . $keywords . ')';
 198		}
 199
 200		$open_bracket = $space = false;
 201		for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
 202		{
 203			if ($open_bracket !== false)
 204			{
 205				switch ($keywords[$i])
 206				{
 207					case ')':
 208						if ($open_bracket + 1 == $i)
 209						{
 210							$keywords[$i - 1] = '|';
 211							$keywords[$i] = '|';
 212						}
 213						$open_bracket = false;
 214					break;
 215					case '(':
 216						$keywords[$i] = '|';
 217					break;
 218					case '+':
 219					case '-':
 220					case ' ':
 221						$keywords[$i] = '|';
 222					break;
 223					case '*':
 224						if ($i === 0 || ($keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0))
 225						{
 226							if ($i === $n - 1 || ($keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0))
 227							{
 228								$keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1);
 229							}
 230						}
 231					break;
 232				}
 233			}
 234			else
 235			{
 236				switch ($keywords[$i])
 237				{
 238					case ')':
 239						$keywords[$i] = ' ';
 240					break;
 241					case '(':
 242						$open_bracket = $i;
 243						$space = false;
 244					break;
 245					case '|':
 246						$keywords[$i] = ' ';
 247					break;
 248					case '-':
 249					case '+':
 250						$space = $keywords[$i];
 251					break;
 252					case ' ':
 253						if ($space !== false)
 254						{
 255							$keywords[$i] = $space;
 256						}
 257					break;
 258					default:
 259						$space = false;
 260				}
 261			}
 262		}
 263
 264		if ($open_bracket)
 265		{
 266			$keywords .= ')';
 267		}
 268
 269		$match = array(
 270			'#  +#',
 271			'#\|\|+#',
 272			'#(\+|\-)(?:\+|\-)+#',
 273			'#\(\|#',
 274			'#\|\)#',
 275		);
 276		$replace = array(
 277			' ',
 278			'|',
 279			'$1',
 280			'(',
 281			')',
 282		);
 283
 284		$keywords = preg_replace($match, $replace, $keywords);
 285		$num_keywords = sizeof(explode(' ', $keywords));
 286
 287		// We limit the number of allowed keywords to minimize load on the database
 288		if ($this->config['max_num_search_keywords'] && $num_keywords > $this->config['max_num_search_keywords'])
 289		{
 290			trigger_error($this->user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', (int) $this->config['max_num_search_keywords'], $num_keywords));
 291		}
 292
 293		// $keywords input format: each word separated by a space, words in a bracket are not separated
 294
 295		// the user wants to search for any word, convert the search query
 296		if ($terms == 'any')
 297		{
 298			$words = array();
 299
 300			preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
 301			if (sizeof($words[1]))
 302			{
 303				$keywords = '(' . implode('|', $words[1]) . ')';
 304			}
 305		}
 306
 307		// set the search_query which is shown to the user
 308		$this->search_query = $keywords;
 309
 310		$exact_words = array();
 311		preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
 312		$exact_words = $exact_words[1];
 313
 314		$common_ids = $words = array();
 315
 316		if (sizeof($exact_words))
 317		{
 318			$sql = 'SELECT word_id, word_text, word_common
 319				FROM ' . SEARCH_WORDLIST_TABLE . '
 320				WHERE ' . $this->db->sql_in_set('word_text', $exact_words) . '
 321				ORDER BY word_count ASC';
 322			$result = $this->db->sql_query($sql);
 323
 324			// store an array of words and ids, remove common words
 325			while ($row = $this->db->sql_fetchrow($result))
 326			{
 327				if ($row['word_common'])
 328				{
 329					$this->common_words[] = $row['word_text'];
 330					$common_ids[$row['word_text']] = (int) $row['word_id'];
 331					continue;
 332				}
 333
 334				$words[$row['word_text']] = (int) $row['word_id'];
 335			}
 336			$this->db->sql_freeresult($result);
 337		}
 338
 339		// Handle +, - without preceeding whitespace character
 340		$match		= array('#(\S)\+#', '#(\S)-#');
 341		$replace	= array('$1 +', '$1 +');
 342
 343		$keywords = preg_replace($match, $replace, $keywords);
 344
 345		// now analyse the search query, first split it using the spaces
 346		$query = explode(' ', $keywords);
 347
 348		$this->must_contain_ids = array();
 349		$this->must_not_contain_ids = array();
 350		$this->must_exclude_one_ids = array();
 351
 352		$mode = '';
 353		$ignore_no_id = true;
 354
 355		foreach ($query as $word)
 356		{
 357			if (empty($word))
 358			{
 359				continue;
 360			}
 361
 362			// words which should not be included
 363			if ($word[0] == '-')
 364			{
 365				$word = substr($word, 1);
 366
 367				// a group of which at least one may not be in the resulting posts
 368				if ($word[0] == '(')
 369				{
 370					$word = array_unique(explode('|', substr($word, 1, -1)));
 371					$mode = 'must_exclude_one';
 372				}
 373				// one word which should not be in the resulting posts
 374				else
 375				{
 376					$mode = 'must_not_contain';
 377				}
 378				$ignore_no_id = true;
 379			}
 380			// words which have to be included
 381			else
 382			{
 383				// no prefix is the same as a +prefix
 384				if ($word[0] == '+')
 385				{
 386					$word = substr($word, 1);
 387				}
 388
 389				// a group of words of which at least one word should be in every resulting post
 390				if ($word[0] == '(')
 391				{
 392					$word = array_unique(explode('|', substr($word, 1, -1)));
 393				}
 394				$ignore_no_id = false;
 395				$mode = 'must_contain';
 396			}
 397
 398			if (empty($word))
 399			{
 400				continue;
 401			}
 402
 403			// if this is an array of words then retrieve an id for each
 404			if (is_array($word))
 405			{
 406				$non_common_words = array();
 407				$id_words = array();
 408				foreach ($word as $i => $word_part)
 409				{
 410					if (strpos($word_part, '*') !== false)
 411					{
 412						$id_words[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
 413						$non_common_words[] = $word_part;
 414					}
 415					else if (isset($words[$word_part]))
 416					{
 417						$id_words[] = $words[$word_part];
 418						$non_common_words[] = $word_part;
 419					}
 420					else
 421					{
 422						$len = utf8_strlen($word_part);
 423						if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
 424						{
 425							$this->common_words[] = $word_part;
 426						}
 427					}
 428				}
 429				if (sizeof($id_words))
 430				{
 431					sort($id_words);
 432					if (sizeof($id_words) > 1)
 433					{
 434						$this->{$mode . '_ids'}[] = $id_words;
 435					}
 436					else
 437					{
 438						$mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
 439						$this->{$mode . '_ids'}[] = $id_words[0];
 440					}
 441				}
 442				// throw an error if we shall not ignore unexistant words
 443				else if (!$ignore_no_id && sizeof($non_common_words))
 444				{
 445					trigger_error(sprintf($this->user->lang['WORDS_IN_NO_POST'], implode($this->user->lang['COMMA_SEPARATOR'], $non_common_words)));
 446				}
 447				unset($non_common_words);
 448			}
 449			// else we only need one id
 450			else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
 451			{
 452				if ($wildcard)
 453				{
 454					$len = utf8_strlen(str_replace('*', '', $word));
 455					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
 456					{
 457						$this->{$mode . '_ids'}[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word)) . '\'';
 458					}
 459					else
 460					{
 461						$this->common_words[] = $word;
 462					}
 463				}
 464				else
 465				{
 466					$this->{$mode . '_ids'}[] = $words[$word];
 467				}
 468			}
 469			else
 470			{
 471				if (!isset($common_ids[$word]))
 472				{
 473					$len = utf8_strlen($word);
 474					if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
 475					{
 476						$this->common_words[] = $word;
 477					}
 478				}
 479			}
 480		}
 481
 482		// Return true if all words are not common words
 483		if (sizeof($exact_words) - sizeof($this->common_words) > 0)
 484		{
 485			return true;
 486		}
 487		return false;
 488	}
 489
 490	/**
 491	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first
 492	*
 493	* @param	string		$type				contains either posts or topics depending on what should be searched for
 494	* @param	string		$fields				contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
 495	* @param	string		$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
 496	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query
 497	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting
 498	* @param	string		$sort_dir			is either a or d representing ASC and DESC
 499	* @param	string		$sort_days			specifies the maximum amount of days a post may be old
 500	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched
 501	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums
 502	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 503	* @param	array		$author_ary			an array of author ids if the author should be ignored during the search the array is empty
 504	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match
 505	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 506	* @param	int			$start				indicates the first index of the page
 507	* @param	int			$per_page			number of ids each page is supposed to contain
 508	* @return	boolean|int						total number of results
 509	*/
 510	public function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page)
 511	{
 512		// No keywords? No posts.
 513		if (empty($this->search_query))
 514		{
 515			return false;
 516		}
 517
 518		// we can't search for negatives only
 519		if (empty($this->must_contain_ids))
 520		{
 521			return false;
 522		}
 523
 524		$must_contain_ids = $this->must_contain_ids;
 525		$must_not_contain_ids = $this->must_not_contain_ids;
 526		$must_exclude_one_ids = $this->must_exclude_one_ids;
 527
 528		sort($must_contain_ids);
 529		sort($must_not_contain_ids);
 530		sort($must_exclude_one_ids);
 531
 532		// generate a search_key from all the options to identify the results
 533		$search_key = md5(implode('#', array(
 534			serialize($must_contain_ids),
 535			serialize($must_not_contain_ids),
 536			serialize($must_exclude_one_ids),
 537			$type,
 538			$fields,
 539			$terms,
 540			$sort_days,
 541			$sort_key,
 542			$topic_id,
 543			implode(',', $ex_fid_ary),
 544			$post_visibility,
 545			implode(',', $author_ary),
 546			$author_name,
 547		)));
 548
 549		// try reading the results from cache
 550		$total_results = 0;
 551		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 552		{
 553			return $total_results;
 554		}
 555
 556		$id_ary = array();
 557
 558		$sql_where = array();
 559		$group_by = false;
 560		$m_num = 0;
 561		$w_num = 0;
 562
 563		$sql_array = array(
 564			'SELECT'	=> ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
 565			'FROM'		=> array(
 566				SEARCH_WORDMATCH_TABLE	=> array(),
 567				SEARCH_WORDLIST_TABLE	=> array(),
 568			),
 569			'LEFT_JOIN' => array(array(
 570				'FROM'	=> array(POSTS_TABLE => 'p'),
 571				'ON'	=> 'm0.post_id = p.post_id',
 572			)),
 573		);
 574
 575		$title_match = '';
 576		$left_join_topics = false;
 577		$group_by = true;
 578		// Build some display specific sql strings
 579		switch ($fields)
 580		{
 581			case 'titleonly':
 582				$title_match = 'title_match = 1';
 583				$group_by = false;
 584			// no break
 585			case 'firstpost':
 586				$left_join_topics = true;
 587				$sql_where[] = 'p.post_id = t.topic_first_post_id';
 588			break;
 589
 590			case 'msgonly':
 591				$title_match = 'title_match = 0';
 592				$group_by = false;
 593			break;
 594		}
 595
 596		if ($type == 'topics')
 597		{
 598			$left_join_topics = true;
 599			$group_by = true;
 600		}
 601
 602		/**
 603		* @todo Add a query optimizer (handle stuff like "+(4|3) +4")
 604		*/
 605
 606		foreach ($this->must_contain_ids as $subquery)
 607		{
 608			if (is_array($subquery))
 609			{
 610				$group_by = true;
 611
 612				$word_id_sql = array();
 613				$word_ids = array();
 614				foreach ($subquery as $id)
 615				{
 616					if (is_string($id))
 617					{
 618						$sql_array['LEFT_JOIN'][] = array(
 619							'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 620							'ON'	=> "w$w_num.word_text LIKE $id"
 621						);
 622						$word_ids[] = "w$w_num.word_id";
 623
 624						$w_num++;
 625					}
 626					else
 627					{
 628						$word_ids[] = $id;
 629					}
 630				}
 631
 632				$sql_where[] = $this->db->sql_in_set("m$m_num.word_id", $word_ids);
 633
 634				unset($word_id_sql);
 635				unset($word_ids);
 636			}
 637			else if (is_string($subquery))
 638			{
 639				$sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
 640
 641				$sql_where[] = "w$w_num.word_text LIKE $subquery";
 642				$sql_where[] = "m$m_num.word_id = w$w_num.word_id";
 643
 644				$group_by = true;
 645				$w_num++;
 646			}
 647			else
 648			{
 649				$sql_where[] = "m$m_num.word_id = $subquery";
 650			}
 651
 652			$sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
 653
 654			if ($title_match)
 655			{
 656				$sql_where[] = "m$m_num.$title_match";
 657			}
 658
 659			if ($m_num != 0)
 660			{
 661				$sql_where[] = "m$m_num.post_id = m0.post_id";
 662			}
 663			$m_num++;
 664		}
 665
 666		foreach ($this->must_not_contain_ids as $key => $subquery)
 667		{
 668			if (is_string($subquery))
 669			{
 670				$sql_array['LEFT_JOIN'][] = array(
 671					'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 672					'ON'	=> "w$w_num.word_text LIKE $subquery"
 673				);
 674
 675				$this->must_not_contain_ids[$key] = "w$w_num.word_id";
 676
 677				$group_by = true;
 678				$w_num++;
 679			}
 680		}
 681
 682		if (sizeof($this->must_not_contain_ids))
 683		{
 684			$sql_array['LEFT_JOIN'][] = array(
 685				'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 686				'ON'	=> $this->db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
 687			);
 688
 689			$sql_where[] = "m$m_num.word_id IS NULL";
 690			$m_num++;
 691		}
 692
 693		foreach ($this->must_exclude_one_ids as $ids)
 694		{
 695			$is_null_joins = array();
 696			foreach ($ids as $id)
 697			{
 698				if (is_string($id))
 699				{
 700					$sql_array['LEFT_JOIN'][] = array(
 701						'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 702						'ON'	=> "w$w_num.word_text LIKE $id"
 703					);
 704					$id = "w$w_num.word_id";
 705
 706					$group_by = true;
 707					$w_num++;
 708				}
 709
 710				$sql_array['LEFT_JOIN'][] = array(
 711					'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 712					'ON'	=> "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
 713				);
 714				$is_null_joins[] = "m$m_num.word_id IS NULL";
 715
 716				$m_num++;
 717			}
 718			$sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
 719		}
 720
 721		$sql_where[] = $post_visibility;
 722
 723		$search_query = $this->search_query;
 724		$must_exclude_one_ids = $this->must_exclude_one_ids;
 725		$must_not_contain_ids = $this->must_not_contain_ids;
 726		$must_contain_ids = $this->must_contain_ids;
 727
 728		/**
 729		* Allow changing the query used for counting for posts using fulltext_native
 730		*
 731		* @event core.search_native_keywords_count_query_before
 732		* @var	string	search_query			The parsed keywords used for this search
 733		* @var	array	must_not_contain_ids	Ids that cannot be taken into account for the results
 734		* @var	array	must_exclude_one_ids	Ids that cannot be on the results
 735		* @var	array	must_contain_ids		Ids that must be on the results
 736		* @var	int		result_count			The previous result count for the format of the query
 737		*										Set to 0 to force a re-count
 738		* @var	bool	join_topic				Weather or not TOPICS_TABLE should be CROSS JOIN'ED
 739		* @var	array	author_ary				Array of user_id containing the users to filter the results to
 740		* @var	string	author_name				An extra username to search on (!empty(author_ary) must be true, to be relevant)
 741		* @var	array	ex_fid_ary				Which forums not to search on
 742		* @var	int		topic_id				Limit the search to this topic_id only
 743		* @var	string	sql_sort_table			Extra tables to include in the SQL query.
 744		*										Used in conjunction with sql_sort_join
 745		* @var	string	sql_sort_join			SQL conditions to join all the tables used together.
 746		*										Used in conjunction with sql_sort_table
 747		* @var	int		sort_days				Time, in days, of the oldest possible post to list
 748		* @var	string	sql_where				An array of the current WHERE clause conditions
 749		* @var	string	sql_match				Which columns to do the search on
 750		* @var	string	sql_match_where			Extra conditions to use to properly filter the matching process
 751		* @var	string	group_by				Whether or not the SQL query requires a GROUP BY for the elements in the SELECT clause
 752		* @var	string	sort_by_sql				The possible predefined sort types
 753		* @var	string	sort_key				The sort type used from the possible sort types
 754		* @var	string	sort_dir				"a" for ASC or "d" dor DESC for the sort order used
 755		* @var	string	sql_sort				The result SQL when processing sort_by_sql + sort_key + sort_dir
 756		* @var	int		start					How many posts to skip in the search results (used for pagination)
 757		* @since 3.1.5-RC1
 758		*/
 759		$vars = array(
 760			'search_query',
 761			'must_not_contain_ids',
 762			'must_exclude_one_ids',
 763			'must_contain_ids',
 764			'result_count',
 765			'join_topic',
 766			'author_ary',
 767			'author_name',
 768			'ex_fid_ary',
 769			'topic_id',
 770			'sql_sort_table',
 771			'sql_sort_join',
 772			'sort_days',
 773			'sql_where',
 774			'sql_match',
 775			'sql_match_where',
 776			'group_by',
 777			'sort_by_sql',
 778			'sort_key',
 779			'sort_dir',
 780			'sql_sort',
 781			'start',
 782		);
 783		extract($this->phpbb_dispatcher->trigger_event('core.search_native_keywords_count_query_before', compact($vars)));
 784
 785		if ($topic_id)
 786		{
 787			$sql_where[] = 'p.topic_id = ' . $topic_id;
 788		}
 789
 790		if (sizeof($author_ary))
 791		{
 792			if ($author_name)
 793			{
 794				// first one matches post of registered users, second one guests and deleted users
 795				$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
 796			}
 797			else
 798			{
 799				$sql_author = $this->db->sql_in_set('p.poster_id', $author_ary);
 800			}
 801			$sql_where[] = $sql_author;
 802		}
 803
 804		if (sizeof($ex_fid_ary))
 805		{
 806			$sql_where[] = $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true);
 807		}
 808
 809		if ($sort_days)
 810		{
 811			$sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
 812		}
 813
 814		$sql_array['WHERE'] = implode(' AND ', $sql_where);
 815
 816		$is_mysql = false;
 817		// if the total result count is not cached yet, retrieve it from the db
 818		if (!$total_results)
 819		{
 820			$sql = '';
 821			$sql_array_count = $sql_array;
 822
 823			if ($left_join_topics)
 824			{
 825				$sql_array_count['LEFT_JOIN'][] = array(
 826					'FROM'	=> array(TOPICS_TABLE => 't'),
 827					'ON'	=> 'p.topic_id = t.topic_id'
 828				);
 829			}
 830
 831			switch ($this->db->get_sql_layer())
 832			{
 833				case 'mysql4':
 834				case 'mysqli':
 835
 836					// 3.x does not support SQL_CALC_FOUND_ROWS
 837					// $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
 838					$is_mysql = true;
 839
 840				break;
 841
 842				case 'sqlite':
 843				case 'sqlite3':
 844					$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
 845					$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
 846							FROM (' . $this->db->sql_build_query('SELECT', $sql_array_count) . ')';
 847
 848				// no break
 849
 850				default:
 851					$sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
 852					$sql = (!$sql) ? $this->db->sql_build_query('SELECT', $sql_array_count) : $sql;
 853
 854					$result = $this->db->sql_query($sql);
 855					$total_results = (int) $this->db->sql_fetchfield('total_results');
 856					$this->db->sql_freeresult($result);
 857
 858					if (!$total_results)
 859					{
 860						return false;
 861					}
 862				break;
 863			}
 864
 865			unset($sql_array_count, $sql);
 866		}
 867
 868		// Build sql strings for sorting
 869		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 870
 871		switch ($sql_sort[0])
 872		{
 873			case 'u':
 874				$sql_array['FROM'][USERS_TABLE] = 'u';
 875				$sql_where[] = 'u.user_id = p.poster_id ';
 876			break;
 877
 878			case 't':
 879				$left_join_topics = true;
 880			break;
 881
 882			case 'f':
 883				$sql_array['FROM'][FORUMS_TABLE] = 'f';
 884				$sql_where[] = 'f.forum_id = p.forum_id';
 885			break;
 886		}
 887
 888		if ($left_join_topics)
 889		{
 890			$sql_array['LEFT_JOIN'][] = array(
 891				'FROM'	=> array(TOPICS_TABLE => 't'),
 892				'ON'	=> 'p.topic_id = t.topic_id'
 893			);
 894		}
 895
 896		// if using mysql and the total result count is not calculated yet, get it from the db
 897		if (!$total_results && $is_mysql)
 898		{
 899			// Also count rows for the query as if there was not LIMIT. Add SQL_CALC_FOUND_ROWS to SQL
 900			$sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
 901		}
 902
 903		$sql_array['WHERE'] = implode(' AND ', $sql_where);
 904		$sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
 905		$sql_array['ORDER_BY'] = $sql_sort;
 906
 907		unset($sql_where, $sql_sort, $group_by);
 908
 909		$sql = $this->db->sql_build_query('SELECT', $sql_array);
 910		$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
 911
 912		while ($row = $this->db->sql_fetchrow($result))
 913		{
 914			$id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
 915		}
 916		$this->db->sql_freeresult($result);
 917
 918		if (!$total_results && $is_mysql)
 919		{
 920			// Get the number of results as calculated by MySQL
 921			$sql_count = 'SELECT FOUND_ROWS() as total_results';
 922			$result = $this->db->sql_query($sql_count);
 923			$total_results = (int) $this->db->sql_fetchfield('total_results');
 924			$this->db->sql_freeresult($result);
 925
 926			if (!$total_results)
 927			{
 928				return false;
 929			}
 930		}
 931
 932		if ($start >= $total_results)
 933		{
 934			$start = floor(($total_results - 1) / $per_page) * $per_page;
 935
 936			$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
 937
 938			while ($row = $this->db->sql_fetchrow($result))
 939			{
 940				$id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
 941			}
 942			$this->db->sql_freeresult($result);
 943
 944		}
 945
 946		// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
 947		$this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
 948		$id_ary = array_slice($id_ary, 0, (int) $per_page);
 949
 950		return $total_results;
 951	}
 952
 953	/**
 954	* Performs a search on an author's posts without caring about message contents. Depends on display specific params
 955	*
 956	* @param	string		$type				contains either posts or topics depending on what should be searched for
 957	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered
 958	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query
 959	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting
 960	* @param	string		$sort_dir			is either a or d representing ASC and DESC
 961	* @param	string		$sort_days			specifies the maximum amount of days a post may be old
 962	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched
 963	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums
 964	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 965	* @param	array		$author_ary			an array of author ids
 966	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match
 967	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 968	* @param	int			$start				indicates the first index of the page
 969	* @param	int			$per_page			number of ids each page is supposed to contain
 970	* @return	boolean|int						total number of results
 971	*/
 972	public function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page)
 973	{
 974		// No author? No posts
 975		if (!sizeof($author_ary))
 976		{
 977			return 0;
 978		}
 979
 980		// generate a search_key from all the options to identify the results
 981		$search_key = md5(implode('#', array(
 982			'',
 983			$type,
 984			($firstpost_only) ? 'firstpost' : '',
 985			'',
 986			'',
 987			$sort_days,
 988			$sort_key,
 989			$topic_id,
 990			implode(',', $ex_fid_ary),
 991			$post_visibility,
 992			implode(',', $author_ary),
 993			$author_name,
 994		)));
 995
 996		// try reading the results from cache
 997		$total_results = 0;
 998		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 999		{
1000			return $total_results;
1001		}
1002
1003		$id_ary = array();
1004
1005		// Create some display specific sql strings
1006		if ($author_name)
1007		{
1008			// first one matches post of registered users, second one guests and deleted users
1009			$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
1010		}
1011		else
1012		{
1013			$sql_author = $this->db->sql_in_set('p.poster_id', $author_ary);
1014		}
1015		$sql_fora		= (sizeof($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
1016		$sql_time		= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
1017		$sql_topic_id	= ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
1018		$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
1019		$post_visibility = ($post_visibility) ? ' AND ' . $post_visibility : '';
1020
1021		// Build sql strings for sorting
1022		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
1023		$sql_sort_table = $sql_sort_join = '';
1024		switch ($sql_sort[0])
1025		{
1026			case 'u':
1027				$sql_sort_table	= USERS_TABLE . ' u, ';
1028				$sql_sort_join	= ' AND u.user_id = p.poster_id ';
1029			break;
1030
1031			case 't':
1032				$sql_sort_table	= ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
1033				$sql_sort_join	= ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
1034			break;
1035
1036			case 'f':
1037				$sql_sort_table	= FORUMS_TABLE . ' f, ';
1038				$sql_sort_join	= ' AND f.forum_id = p.forum_id ';
1039			break;
1040		}
1041
1042		$select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
1043		$is_mysql = false;
1044
1045		/**
1046		* Allow changing the query used to search for posts by author in fulltext_native
1047		*
1048		* @event core.search_native_author_count_query_before
1049		* @var	int		total_results		The previous result count for the format of the query.
1050		*									Set to 0 to force a re-count
1051		* @var	string	select				SQL SELECT clause for what to get
1052		* @var	string	sql_sort_table		CROSS JOIN'ed table to allow doing the sort chosen
1053		* @var	string	sql_sort_join		Condition to define how to join the CROSS JOIN'ed table specifyed in sql_sort_table
1054		* @var	array	sql_author			SQL WHERE condition for the post author ids
1055		* @var	int		topic_id			Limit the search to this topic_id only
1056		* @var	string	sort_by_sql			The possible predefined sort types
1057		* @var	string	sort_key			The sort type used from the possible sort types
1058		* @var	string	sort_dir			"a" for ASC or "d" dor DESC for the sort order used
1059		* @var	string	sql_sort			The result SQL when processing sort_by_sql + sort_key + sort_dir
1060		* @var	string	sort_days			Time, in days, that the oldest post showing can have
1061		* @var	string	sql_time			The SQL to search on the time specifyed by sort_days
1062		* @var	bool	firstpost_only		Wether or not to search only on the first post of the topics
1063		* @var	array	ex_fid_ary			Forum ids that must not be searched on
1064		* @var	array	sql_fora			SQL query for ex_fid_ary
1065		* @var	int		start				How many posts to skip in the search results (used for pagination)
1066		* @since 3.1.5-RC1
1067		*/
1068		$vars = array(
1069			'total_results',
1070			'select',
1071			'sql_sort_table',
1072			'sql_sort_join',
1073			'sql_author',
1074			'topic_id',
1075			'sort_by_sql',
1076			'sort_key',
1077			'sort_dir',
1078			'sql_sort',
1079			'sort_days',
1080			'sql_time',
1081			'firstpost_only',
1082			'ex_fid_ary',
1083			'sql_fora',
1084			'start',
1085		);
1086		extract($this->phpbb_dispatcher->trigger_event('core.search_native_author_count_query_before', compact($vars)));
1087
1088		// If the cache was completely empty count the results
1089		if (!$total_results)
1090		{
1091			switch ($this->db->get_sql_layer())
1092			{
1093				case 'mysql4':
1094				case 'mysqli':
1095//					$select = 'SQL_CALC_FOUND_ROWS ' . $select;
1096					$is_mysql = true;
1097				break;
1098
1099				default:
1100					if ($type == 'posts')
1101					{
1102						$sql = 'SELECT COUNT(p.post_id) as total_results
1103							FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
1104							WHERE $sql_author
1105								$sql_topic_id
1106								$sql_firstpost
1107								$post_visibility
1108								$sql_fora
1109								$sql_time";
1110					}
1111					else
1112					{
1113						if ($this->db->get_sql_layer() == 'sqlite' || $this->db->get_sql_layer() == 'sqlite3')
1114						{
1115							$sql = 'SELECT COUNT(topic_id) as total_results
1116								FROM (SELECT DISTINCT t.topic_id';
1117						}
1118						else
1119						{
1120							$sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
1121						}
1122
1123						$sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
1124							WHERE $sql_author
1125								$sql_topic_id
1126								$sql_firstpost
1127								$post_visibility
1128								$sql_fora
1129								AND t.topic_id = p.topic_id
1130								$sql_time" . (($this->db->get_sql_layer() == 'sqlite' || $this->db->get_sql_layer() == 'sqlite3') ? ')' : '');
1131					}
1132					$result = $this->db->sql_query($sql);
1133
1134					$total_results = (int) $this->db->sql_fetchfield('total_results');
1135					$this->db->sql_freeresult($result);
1136
1137					if (!$total_results)
1138					{
1139						return false;
1140					}
1141				break;
1142			}
1143		}
1144
1145		// Build the query for really selecting the post_ids
1146		if ($type == 'posts')
1147		{
1148			$sql = "SELECT $select
1149				FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
1150				WHERE $sql_author
1151					$sql_topic_id
1152					$sql_firstpost
1153					$post_visibility
1154					$sql_fora
1155					$sql_sort_join
1156					$sql_time
1157				ORDER BY $sql_sort";
1158			$field = 'post_id';
1159		}
1160		else
1161		{
1162			$sql = "SELECT $select
1163				FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
1164				WHERE $sql_author
1165					$sql_topic_id
1166					$sql_firstpost
1167					$post_visibility
1168					$sql_fora
1169					AND t.topic_id = p.topic_id
1170					$sql_sort_join
1171					$sql_time
1172				GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
1173				ORDER BY ' . $sql_sort;
1174			$field = 'topic_id';
1175		}
1176
1177		// Only read one block of posts from the db and then cache it
1178		$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
1179
1180		while ($row = $this->db->sql_fetchrow($result))
1181		{
1182			$id_ary[] = (int) $row[$field];
1183		}
1184		$this->db->sql_freeresult($result);
1185
1186		if (!$total_results && $is_mysql)
1187		{
1188			// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
1189			$sql_calc = str_replace('SELECT ' . $select, 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id', $sql);
1190
1191			$result = $this->db->sql_query($sql_calc);
1192			$this->db->sql_freeresult($result);
1193
1194			$sql_count = 'SELECT FOUND_ROWS() as total_results';
1195			$result = $this->db->sql_query($sql_count);
1196			$total_results = (int) $this->db->sql_fetchfield('total_results');
1197			$this->db->sql_freeresult($result);
1198
1199			if (!$total_results)
1200			{
1201				return false;
1202			}
1203		}
1204
1205		if ($start >= $total_results)
1206		{
1207			$start = floor(($total_results - 1) / $per_page) * $per_page;
1208
1209			$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start);
1210
1211			while ($row = $this->db->sql_fetchrow($result))
1212			{
1213				$id_ary[] = (int) $row[$field];
1214			}
1215			$this->db->sql_freeresult($result);
1216		}
1217
1218		if (sizeof($id_ary))
1219		{
1220			$this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
1221			$id_ary = array_slice($id_ary, 0, $per_page);
1222
1223			return $total_results;
1224		}
1225		return false;
1226	}
1227
1228	/**
1229	* Split a text into words of a given length
1230	*
1231	* The text is converted to UTF-8, cleaned up, and split. Then, words that
1232	* conform to the defined length range are returned in an array.
1233	*
1234	* NOTE: duplicates are NOT removed from the return array
1235	*
1236	* @param	string	$text	Text to split, encoded in UTF-8
1237	* @return	array			Array of UTF-8 words
1238	*/
1239	public function split_message($text)
1240	{
1241		$match = $words = array();
1242
1243		/**
1244		* Taken from the original code
1245		*/
1246		// Do not index code
1247		$match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
1248		// BBcode
1249		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
1250
1251		$min = $this->word_length['min'];
1252		$max = $this->word_length['max'];
1253
1254		$isset_min = $min - 1;
1255
1256		/**
1257		* Clean up the string, remove HTML tags, remove BBCodes
1258		*/
1259		$word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
1260
1261		while (strlen($word))
1262		{
1263			if (strlen($word) > 255 || strlen($word) <= $isset_min)
1264			{
1265				/**
1266				* Words longer than 255 bytes are ignored. This will have to be
1267				* changed whenever we change the length of search_wordlist.word_text
1268				*
1269				* Words shorter than $isset_min bytes are ignored, too
1270				*/
1271				$word = strtok(' ');
1272				continue;
1273			}
1274
1275			$len = utf8_strlen($word);
1276
1277			/**
1278			* Test whether the word is too short to be indexed.
1279			*
1280			* Note that this limit does NOT apply to CJK and Hangul
1281			*/
1282			if ($len < $min)
1283			{
1284				/**
1285				* Note: this could be optimized. If the codepoint is lower than Hangul's range
1286				* we know that it will also be lower than CJK ranges
1287				*/
1288				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
1289					&& (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
1290					&& (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
1291				{
1292					$word = strtok(' ');
1293					continue;
1294				}
1295			}
1296
1297			$words[] = $word;
1298			$word = strtok(' ');
1299		}
1300
1301		return $words;
1302	}
1303
1304	/**
1305	* Updates wordlist and wordmatch tables when a message is posted or changed
1306	*
1307	* @param	string	$mode		Contains the post mode: edit, post, reply, quote
1308	* @param	int		$post_id	The id of the post which is modified/created
1309	* @param	string	&$message	New or updated post content
1310	* @param	string	&$subject	New or updated post subject
1311	* @param	int		$poster_id	Post author's user id
1312	* @param	int		$forum_id	The id of the forum in which the post is located
1313	*/
1314	public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
1315	{
1316		if (!$this->config['fulltext_native_load_upd'])
1317		{
1318			/**
1319			* The search indexer is disabled, return
1320			*/
1321			return;
1322		}
1323
1324		// Split old and new post/subject to obtain array of 'words'
1325		$split_text = $this->split_message($message);
1326		$split_title = $this->split_message($subject);
1327
1328		$cur_words = array('post' => array(), 'title' => array());
1329
1330		$words = array();
1331		if ($mode == 'edit')
1332		{
1333			$words['add']['post'] = array();
1334			$words['add']['title'] = array();
1335			$words['del']['post'] = array();
1336			$words['del']['title'] = array();
1337
1338			$sql = 'SELECT w.word_id, w.word_text, m.title_match
1339				FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
1340				WHERE m.post_id = $post_id
1341					AND w.word_id = m.word_id";
1342			$result = $this->db->sql_query($sql);
1343
1344			while ($row = $this->db->sql_fetchrow($result))
1345			{
1346				$which = ($row['title_match']) ? 'title' : 'post';
1347				$cur_words[$which][$row['word_text']] = $row['word_id'];
1348			}
1349			$this->db->sql_freeresult($result);
1350
1351			$words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
1352			$words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
1353			$words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
1354			$words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
1355		}
1356		else
1357		{
1358			$words['add']['post'] = $split_text;
1359			$words['add']['title'] = $split_title;
1360			$words['del']['post'] = array();
1361			$words['del']['title'] = array();
1362		}
1363		unset($split_text);
1364		unset($split_title);
1365
1366		// Get unique words from the above arrays
1367		$unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
1368
1369		// We now have unique arrays of all words to be added and removed and
1370		// individual arrays of added and removed words for text and title. What
1371		// we need to do now is add the new words (if they don't already exist)
1372		// and then add (or remove) matches between the words and this post
1373		if (sizeof($unique_add_words))
1374		{
1375			$sql = 'SELECT word_id, word_text
1376				FROM ' . SEARCH_WORDLIST_TABLE . '
1377				WHERE ' . $this->db->sql_in_set('word_text', $unique_add_words);
1378			$result = $this->db->sql_query($sql);
1379
1380			$word_ids = array();
1381			while ($row = $this->db->sql_fetchrow($result))
1382			{
1383				$word_ids[$row['word_text']] = $row['word_id'];
1384			}
1385			$this->db->sql_freeresult($result);
1386			$new_words = array_diff($unique_add_words, array_keys($word_ids));
1387
1388			$this->db->sql_transaction('begin');
1389			if (sizeof($new_words))
1390			{
1391				$sql_ary = array();
1392
1393				foreach ($new_words as $word)
1394				{
1395					$sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
1396				}
1397				$this->db->sql_return_on_error(true);
1398				$this->db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
1399				$this->db->sql_return_on_error(false);
1400			}
1401			unset($new_words, $sql_ary);
1402		}
1403		else
1404		{
1405			$this->db->sql_transaction('begin');
1406		}
1407
1408		// now update the search match table, remove links to removed words and add links to new words
1409		foreach ($words['del'] as $word_in => $word_ary)
1410		{
1411			$title_match = ($word_in == 'title') ? 1 : 0;
1412
1413			if (sizeof($word_ary))
1414			{
1415				$sql_in = array();
1416				foreach ($word_ary as $word)
1417				{
1418					$sql_in[] = $cur_words[$word_in][$word];
1419				}
1420
1421				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1422					WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . '
1423						AND post_id = ' . intval($post_id) . "
1424						AND title_match = $title_match";
1425				$this->db->sql_query($sql);
1426
1427				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1428					SET word_count = word_count - 1
1429					WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . '
1430						AND word_count > 0';
1431				$this->db->sql_query($sql);
1432
1433				unset($sql_in);
1434			}
1435		}
1436
1437		$this->db->sql_return_on_error(true);
1438		foreach ($words['add'] as $word_in => $word_ary)
1439		{
1440			$title_match = ($word_in == 'title') ? 1 : 0;
1441
1442			if (sizeof($word_ary))
1443			{
1444				$sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
1445					SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
1446					FROM ' . SEARCH_WORDLIST_TABLE . '
1447					WHERE ' . $this->db->sql_in_set('word_text', $word_ary);
1448				$this->db->sql_query($sql);
1449
1450				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1451					SET word_count = word_count + 1
1452					WHERE ' . $this->db->sql_in_set('word_text', $word_ary);
1453				$this->db->sql_query($sql);
1454			}
1455		}
1456		$this->db->sql_return_on_error(false);
1457
1458		$this->db->sql_transaction('commit');
1459
1460		// destroy cached search results containing any of the words removed or added
1461		$this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
1462
1463		unset($unique_add_words);
1464		unset($words);
1465		unset($cur_words);
1466	}
1467
1468	/**
1469	* Removes entries from the wordmatch table for the specified post_ids
1470	*/
1471	public function index_remove($post_ids, $author_ids, $forum_ids)
1472	{
1473		if (sizeof($post_ids))
1474		{
1475			$sql = 'SELECT w.word_id, w.word_text, m.title_match
1476				FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
1477				WHERE ' . $this->db->sql_in_set('m.post_id', $post_ids) . '
1478					AND w.word_id = m.word_id';
1479			$result = $this->db->sql_query($sql);
1480
1481			$message_word_ids = $title_word_ids = $word_texts = array();
1482			while ($row = $this->db->sql_fetchrow($result))
1483			{
1484				if ($row['title_match'])
1485				{
1486					$title_word_ids[] = $row['word_id'];
1487				}
1488				else
1489				{
1490					$message_word_ids[] = $row['word_id'];
1491				}
1492				$word_texts[] = $row['word_text'];
1493			}
1494			$this->db->sql_freeresult($result);
1495
1496			if (sizeof($title_word_ids))
1497			{
1498				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1499					SET word_count = word_count - 1
1500					WHERE ' . $this->db->sql_in_set('word_id', $title_word_ids) . '
1501						AND word_count > 0';
1502				$this->db->sql_query($sql);
1503			}
1504
1505			if (sizeof($message_word_ids))
1506			{
1507				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1508					SET word_count = word_count - 1
1509					WHERE ' . $this->db->sql_in_set('word_id', $message_word_ids) . '
1510						AND word_count > 0';
1511				$this->db->sql_query($sql);
1512			}
1513
1514			unset($title_word_ids);
1515			unset($message_word_ids);
1516
1517			$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1518				WHERE ' . $this->db->sql_in_set('post_id', $post_ids);
1519			$this->db->sql_query($sql);
1520		}
1521
1522		$this->destroy_cache(array_unique($word_texts), array_unique($author_ids));
1523	}
1524
1525	/**
1526	* Tidy up indexes: Tag 'common words' and remove
1527	* words no longer referenced in the match table
1528	*/
1529	public function tidy()
1530	{
1531		// Is the fulltext indexer disabled? If yes then we need not
1532		// carry on ... it's okay ... I know when I'm not wanted boo hoo
1533		if (!$this->config['fulltext_native_load_upd'])
1534		{
1535			set_config('search_last_gc', time(), true);
1536			return;
1537		}
1538
1539		$destroy_cache_words = array();
1540
1541		// Remove common words
1542		if ($this->config['num_posts'] >= 100 && $this->config['fulltext_native_common_thres'])
1543		{
1544			$common_threshold = ((double) $this->config['fulltext_native_common_thres']) / 100.0;
1545			// First, get the IDs of common words
1546			$sql = 'SELECT word_id, word_text
1547				FROM ' . SEARCH_WORDLIST_TABLE . '
1548				WHERE word_count > ' . floor($this->config['num_posts'] * $common_threshold) . '
1549					OR word_common = 1';
1550			$result = $this->db->sql_query($sql);
1551
1552			$sql_in = array();
1553			while ($row = $this->db->sql_fetchrow($result))
1554			{
1555				$sql_in[] = $row['word_id'];
1556				$destroy_cache_words[] = $row['word_text'];
1557			}
1558			$this->db->sql_freeresult($result);
1559
1560			if (sizeof($sql_in))
1561			{
1562				// Flag the words
1563				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1564					SET word_common = 1
1565					WHERE ' . $this->db->sql_in_set('word_id', $sql_in);
1566				$this->db->sql_query($sql);
1567
1568				// by setting search_last_gc to the new time here we make sure that if a user reloads because the
1569				// following query takes too long, he won't run into it again
1570				set_config('search_last_gc', time(), true);
1571
1572				// Delete the matches
1573				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1574					WHERE ' . $this->db->sql_in_set('word_id', $sql_in);
1575				$this->db->sql_query($sql);
1576			}
1577			unset($sql_in);
1578		}
1579
1580		if (sizeof($destroy_cache_words))
1581		{
1582			// destroy cached search results containing any of the words that are now common or were removed
1583			$this->destroy_cache(array_unique($destroy_cache_words));
1584		}
1585
1586		set_config('search_last_gc', time(), true);
1587	}
1588
1589	/**
1590	* Deletes all words from the index
1591	*/
1592	public function delete_index($acp_module, $u_action)
1593	{
1594		switch ($this->db->get_sql_layer())
1595		{
1596			case 'sqlite':
1597			case 'sqlite3':
1598				$this->db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
1599				$this->db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);
1600				$this->db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE);
1601			break;
1602
1603			default:
1604				$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE);
1605				$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE);
1606				$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
1607			break;
1608		}
1609	}
1610
1611	/**
1612	* Returns true if both FULLTEXT indexes exist
1613	*/
1614	public function index_created()
1615	{
1616		if (!sizeof($this->stats))
1617		{
1618			$this->get_stats();
1619		}
1620
1621		return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
1622	}
1623
1624	/**
1625	* Returns an associative array containing information about the indexes
1626	*/
1627	public function index_stats()
1628	{
1629		if (!sizeof($this->stats))
1630		{
1631			$this->get_stats();
1632		}
1633
1634		return array(
1635			$this->user->lang['TOTAL_WORDS']		=> $this->stats['total_words'],
1636			$this->user->lang['TOTAL_MATCHES']	=> $this->stats['total_matches']);
1637	}
1638
1639	protected function get_stats()
1640	{
1641		$this->stats['total_words']		= $this->db->get_estimated_row_count(SEARCH_WORDLIST_TABLE);
1642		$this->stats['total_matches']	= $this->db->get_estimated_row_count(SEARCH_WORDMATCH_TABLE);
1643	}
1644
1645	/**
1646	* Clean up a text to remove non-alphanumeric characters
1647	*
1648	* This method receives a UTF-8 string, normalizes and validates it, replaces all
1649	* non-alphanumeric characters with strings then returns the result.
1650	*
1651	* Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
1652	*
1653	* @param	string	$text			Text to split, in UTF-8 (not normalized or sanitized)
1654	* @param	string	$allowed_chars	String of special chars to allow
1655	* @param	string	$encoding		Text encoding
1656	* @return	string					Cleaned up text, only alphanumeric chars are left
1657	*
1658	* @todo \normalizer::cleanup being able to be used?
1659	*/
1660	protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
1661	{
1662		static $conv = array(), $conv_loaded = array();
1663		$words = $allow = array();
1664
1665		// Convert the text to UTF-8
1666		$encoding = strtolower($encoding);
1667		if ($encoding != 'utf-8')
1668		{
1669			$text = utf8_recode($text, $encoding);
1670		}
1671
1672		$utf_len_mask = array(
1673			"\xC0"	=>	2,
1674			"\xD0"	=>	2,
1675			"\xE0"	=>	3,
1676			"\xF0"	=>	4
1677		);
1678
1679		/**
1680		* Replace HTML entities and NCRs
1681		*/
1682		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
1683
1684		/**
1685		* Load the UTF-8 normalizer
1686		*
1687		* If we use it more widely, an instance of that class should be held in a
1688		* a global variable instead
1689		*/
1690		\utf_normalizer::nfc($text);
1691
1692		/**
1693		* The first thing we do is:
1694		*
1695		* - convert ASCII-7 letters to lowercase
1696		* - remove the ASCII-7 non-alpha characters
1697		* - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
1698		*   0xC1 and 0xF5-0xFF
1699		*
1700		* @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
1701		*/
1702		$sb_match	= "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\…

Large files files are truncated, but you can click here to view the full file