PageRenderTime 107ms CodeModel.GetById 22ms app.highlight 53ms RepoModel.GetById 15ms app.codeStats 1ms

/forum/includes/search/fulltext_native.php

https://github.com/GreyTeardrop/socionicasys-forum
PHP | 1776 lines | 1322 code | 198 blank | 256 comment | 174 complexity | e65b395162b1ee26c9b82d6008e70449 MD5 | raw file
   1<?php
   2/**
   3*
   4* @package search
   5* @version $Id$
   6* @copyright (c) 2005 phpBB Group
   7* @license http://opensource.org/licenses/gpl-license.php GNU Public License
   8*
   9*/
  10
  11/**
  12* @ignore
  13*/
  14if (!defined('IN_PHPBB'))
  15{
  16	exit;
  17}
  18
  19/**
  20* @ignore
  21*/
  22include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
  23
  24/**
  25* fulltext_native
  26* phpBB's own db driven fulltext search, version 2
  27* @package search
  28*/
  29class fulltext_native extends search_backend
  30{
  31	var $stats = array();
  32	var $word_length = array();
  33	var $search_query;
  34	var $common_words = array();
  35
  36	var $must_contain_ids = array();
  37	var $must_not_contain_ids = array();
  38	var $must_exclude_one_ids = array();
  39
  40	/**
  41	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded.
  42	*
  43	* @param	boolean|string	&$error	is passed by reference and should either be set to false on success or an error message on failure.
  44	*
  45	* @access	public
  46	*/
  47	function fulltext_native(&$error)
  48	{
  49		global $phpbb_root_path, $phpEx, $config;
  50
  51		$this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
  52
  53		/**
  54		* Load the UTF tools
  55		*/
  56		if (!class_exists('utf_normalizer'))
  57		{
  58			include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
  59		}
  60
  61
  62		$error = false;
  63	}
  64
  65	/**
  66	* This function fills $this->search_query with the cleaned user search query.
  67	*
  68	* If $terms is 'any' then the words will be extracted from the search query
  69	* and combined with | inside brackets. They will afterwards be treated like
  70	* an standard search query.
  71	*
  72	* Then it analyses the query and fills the internal arrays $must_not_contain_ids,
  73	* $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search().
  74	*
  75	* @param	string	$keywords	contains the search query string as entered by the user
  76	* @param	string	$terms		is either 'all' (use search query as entered, default words to 'must be contained in post')
  77	* 	or 'any' (find all posts containing at least one of the given words)
  78	* @return	boolean				false if no valid keywords were found and otherwise true
  79	*
  80	* @access	public
  81	*/
  82	function split_keywords($keywords, $terms)
  83	{
  84		global $db, $user, $config;
  85
  86		$tokens = '+-|()*';
  87
  88		$keywords = trim($this->cleanup($keywords, $tokens));
  89
  90		// allow word|word|word without brackets
  91		if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
  92		{
  93			$keywords = '(' . $keywords . ')';
  94		}
  95
  96		$open_bracket = $space = false;
  97		for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
  98		{
  99			if ($open_bracket !== false)
 100			{
 101				switch ($keywords[$i])
 102				{
 103					case ')':
 104						if ($open_bracket + 1 == $i)
 105						{
 106							$keywords[$i - 1] = '|';
 107							$keywords[$i] = '|';
 108						}
 109						$open_bracket = false;
 110					break;
 111					case '(':
 112						$keywords[$i] = '|';
 113					break;
 114					case '+':
 115					case '-':
 116					case ' ':
 117						$keywords[$i] = '|';
 118					break;
 119					case '*':
 120						if ($i === 0 || ($keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0))
 121						{
 122							if ($i === $n - 1 || ($keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0))
 123							{
 124								$keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1);
 125							}
 126						}
 127					break;
 128				}
 129			}
 130			else
 131			{
 132				switch ($keywords[$i])
 133				{
 134					case ')':
 135						$keywords[$i] = ' ';
 136					break;
 137					case '(':
 138						$open_bracket = $i;
 139						$space = false;
 140					break;
 141					case '|':
 142						$keywords[$i] = ' ';
 143					break;
 144					case '-':
 145					case '+':
 146						$space = $keywords[$i];
 147					break;
 148					case ' ':
 149						if ($space !== false)
 150						{
 151							$keywords[$i] = $space;
 152						}
 153					break;
 154					default:
 155						$space = false;
 156				}
 157			}
 158		}
 159
 160		if ($open_bracket)
 161		{
 162			$keywords .= ')';
 163		}
 164
 165		$match = array(
 166			'#  +#',
 167			'#\|\|+#',
 168			'#(\+|\-)(?:\+|\-)+#',
 169			'#\(\|#',
 170			'#\|\)#',
 171		);
 172		$replace = array(
 173			' ',
 174			'|',
 175			'$1',
 176			'(',
 177			')',
 178		);
 179
 180		$keywords = preg_replace($match, $replace, $keywords);
 181		$num_keywords = sizeof(explode(' ', $keywords));
 182
 183		// We limit the number of allowed keywords to minimize load on the database
 184		if ($config['max_num_search_keywords'] && $num_keywords > $config['max_num_search_keywords'])
 185		{
 186			trigger_error($user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', $config['max_num_search_keywords'], $num_keywords));
 187		}
 188
 189		// $keywords input format: each word separated by a space, words in a bracket are not separated
 190
 191		// the user wants to search for any word, convert the search query
 192		if ($terms == 'any')
 193		{
 194			$words = array();
 195
 196			preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
 197			if (sizeof($words[1]))
 198			{
 199				$keywords = '(' . implode('|', $words[1]) . ')';
 200			}
 201		}
 202
 203		// set the search_query which is shown to the user
 204		$this->search_query = $keywords;
 205
 206		$exact_words = array();
 207		preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
 208		$exact_words = $exact_words[1];
 209
 210		$common_ids = $words = array();
 211
 212		if (sizeof($exact_words))
 213		{
 214			$sql = 'SELECT word_id, word_text, word_common
 215				FROM ' . SEARCH_WORDLIST_TABLE . '
 216				WHERE ' . $db->sql_in_set('word_text', $exact_words) . '
 217				ORDER BY word_count ASC';
 218			$result = $db->sql_query($sql);
 219
 220			// store an array of words and ids, remove common words
 221			while ($row = $db->sql_fetchrow($result))
 222			{
 223				if ($row['word_common'])
 224				{
 225					$this->common_words[] = $row['word_text'];
 226					$common_ids[$row['word_text']] = (int) $row['word_id'];
 227					continue;
 228				}
 229
 230				$words[$row['word_text']] = (int) $row['word_id'];
 231			}
 232			$db->sql_freeresult($result);
 233		}
 234		unset($exact_words);
 235
 236		// now analyse the search query, first split it using the spaces
 237		$query = explode(' ', $keywords);
 238
 239		$this->must_contain_ids = array();
 240		$this->must_not_contain_ids = array();
 241		$this->must_exclude_one_ids = array();
 242
 243		$mode = '';
 244		$ignore_no_id = true;
 245
 246		foreach ($query as $word)
 247		{
 248			if (empty($word))
 249			{
 250				continue;
 251			}
 252
 253			// words which should not be included
 254			if ($word[0] == '-')
 255			{
 256				$word = substr($word, 1);
 257
 258				// a group of which at least one may not be in the resulting posts
 259				if ($word[0] == '(')
 260				{
 261					$word = array_unique(explode('|', substr($word, 1, -1)));
 262					$mode = 'must_exclude_one';
 263				}
 264				// one word which should not be in the resulting posts
 265				else
 266				{
 267					$mode = 'must_not_contain';
 268				}
 269				$ignore_no_id = true;
 270			}
 271			// words which have to be included
 272			else
 273			{
 274				// no prefix is the same as a +prefix
 275				if ($word[0] == '+')
 276				{
 277					$word = substr($word, 1);
 278				}
 279
 280				// a group of words of which at least one word should be in every resulting post
 281				if ($word[0] == '(')
 282				{
 283					$word = array_unique(explode('|', substr($word, 1, -1)));
 284				}
 285				$ignore_no_id = false;
 286				$mode = 'must_contain';
 287			}
 288
 289			if (empty($word))
 290			{
 291				continue;
 292			}
 293
 294			// if this is an array of words then retrieve an id for each
 295			if (is_array($word))
 296			{
 297				$non_common_words = array();
 298				$id_words = array();
 299				foreach ($word as $i => $word_part)
 300				{
 301					if (strpos($word_part, '*') !== false)
 302					{
 303						$id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
 304						$non_common_words[] = $word_part;
 305					}
 306					else if (isset($words[$word_part]))
 307					{
 308						$id_words[] = $words[$word_part];
 309						$non_common_words[] = $word_part;
 310					}
 311					else
 312					{
 313						$len = utf8_strlen($word_part);
 314						if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
 315						{
 316							$this->common_words[] = $word_part;
 317						}
 318					}
 319				}
 320				if (sizeof($id_words))
 321				{
 322					sort($id_words);
 323					if (sizeof($id_words) > 1)
 324					{
 325						$this->{$mode . '_ids'}[] = $id_words;
 326					}
 327					else
 328					{
 329						$mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
 330						$this->{$mode . '_ids'}[] = $id_words[0];
 331					}
 332				}
 333				// throw an error if we shall not ignore unexistant words
 334				else if (!$ignore_no_id && sizeof($non_common_words))
 335				{
 336					trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $non_common_words)));
 337				}
 338				unset($non_common_words);
 339			}
 340			// else we only need one id
 341			else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
 342			{
 343				if ($wildcard)
 344				{
 345					$len = utf8_strlen(str_replace('*', '', $word));
 346					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
 347					{
 348						$this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\'';
 349					}
 350					else
 351					{
 352						$this->common_words[] = $word;
 353					}
 354				}
 355				else
 356				{
 357					$this->{$mode . '_ids'}[] = $words[$word];
 358				}
 359			}
 360			// throw an error if we shall not ignore unexistant words
 361			else if (!$ignore_no_id)
 362			{
 363				if (!isset($common_ids[$word]))
 364				{
 365					$len = utf8_strlen($word);
 366					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
 367					{
 368						trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word));
 369					}
 370					else
 371					{
 372						$this->common_words[] = $word;
 373					}
 374				}
 375			}
 376			else
 377			{
 378				$len = utf8_strlen($word);
 379				if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
 380				{
 381					$this->common_words[] = $word;
 382				}
 383			}
 384		}
 385
 386		// we can't search for negatives only
 387		if (!sizeof($this->must_contain_ids))
 388		{
 389			return false;
 390		}
 391
 392		if (!empty($this->search_query))
 393		{
 394			return true;
 395		}
 396		return false;
 397	}
 398
 399	/**
 400	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
 401	*
 402	* @param	string		$type				contains either posts or topics depending on what should be searched for
 403	* @param	string		$fields				contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
 404	* @param	string		$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
 405	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query
 406	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting
 407	* @param	string		$sort_dir			is either a or d representing ASC and DESC
 408	* @param	string		$sort_days			specifies the maximum amount of days a post may be old
 409	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched
 410	* @param	array		$m_approve_fid_ary	specifies an array of forum ids in which the searcher is allowed to view unapproved posts
 411	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 412	* @param	array		$author_ary			an array of author ids if the author should be ignored during the search the array is empty
 413	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match
 414	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 415	* @param	int			$start				indicates the first index of the page
 416	* @param	int			$per_page			number of ids each page is supposed to contain
 417	* @return	boolean|int						total number of results
 418	*
 419	* @access	public
 420	*/
 421	function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page)
 422	{
 423		global $config, $db;
 424
 425		// No keywords? No posts.
 426		if (empty($this->search_query))
 427		{
 428			return false;
 429		}
 430
 431		$must_contain_ids = $this->must_contain_ids;
 432		$must_not_contain_ids = $this->must_not_contain_ids;
 433		$must_exclude_one_ids = $this->must_exclude_one_ids;
 434
 435		sort($must_contain_ids);
 436		sort($must_not_contain_ids);
 437		sort($must_exclude_one_ids);
 438
 439		// generate a search_key from all the options to identify the results
 440		$search_key = md5(implode('#', array(
 441			serialize($must_contain_ids),
 442			serialize($must_not_contain_ids),
 443			serialize($must_exclude_one_ids),
 444			$type,
 445			$fields,
 446			$terms,
 447			$sort_days,
 448			$sort_key,
 449			$topic_id,
 450			implode(',', $ex_fid_ary),
 451			implode(',', $m_approve_fid_ary),
 452			implode(',', $author_ary),
 453			$author_name,
 454		)));
 455
 456		// try reading the results from cache
 457		$total_results = 0;
 458		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 459		{
 460			return $total_results;
 461		}
 462
 463		$id_ary = array();
 464
 465		$sql_where = array();
 466		$group_by = false;
 467		$m_num = 0;
 468		$w_num = 0;
 469
 470		$sql_array = array(
 471			'SELECT'	=> ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
 472			'FROM'		=> array(
 473				SEARCH_WORDMATCH_TABLE	=> array(),
 474				SEARCH_WORDLIST_TABLE	=> array(),
 475			),
 476			'LEFT_JOIN' => array(array(
 477				'FROM'	=> array(POSTS_TABLE => 'p'),
 478				'ON'	=> 'm0.post_id = p.post_id',
 479			)),
 480		);
 481
 482		$title_match = '';
 483		$left_join_topics = false;
 484		$group_by = true;
 485		// Build some display specific sql strings
 486		switch ($fields)
 487		{
 488			case 'titleonly':
 489				$title_match = 'title_match = 1';
 490				$group_by = false;
 491			// no break
 492			case 'firstpost':
 493				$left_join_topics = true;
 494				$sql_where[] = 'p.post_id = t.topic_first_post_id';
 495			break;
 496
 497			case 'msgonly':
 498				$title_match = 'title_match = 0';
 499				$group_by = false;
 500			break;
 501		}
 502
 503		if ($type == 'topics')
 504		{
 505			$left_join_topics = true;
 506			$group_by = true;
 507		}
 508
 509		/**
 510		* @todo Add a query optimizer (handle stuff like "+(4|3) +4")
 511		*/
 512
 513		foreach ($this->must_contain_ids as $subquery)
 514		{
 515			if (is_array($subquery))
 516			{
 517				$group_by = true;
 518
 519				$word_id_sql = array();
 520				$word_ids = array();
 521				foreach ($subquery as $id)
 522				{
 523					if (is_string($id))
 524					{
 525						$sql_array['LEFT_JOIN'][] = array(
 526							'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 527							'ON'	=> "w$w_num.word_text LIKE $id"
 528						);
 529						$word_ids[] = "w$w_num.word_id";
 530
 531						$w_num++;
 532					}
 533					else
 534					{
 535						$word_ids[] = $id;
 536					}
 537				}
 538
 539				$sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids);
 540
 541				unset($word_id_sql);
 542				unset($word_ids);
 543			}
 544			else if (is_string($subquery))
 545			{
 546				$sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
 547
 548				$sql_where[] = "w$w_num.word_text LIKE $subquery";
 549				$sql_where[] = "m$m_num.word_id = w$w_num.word_id";
 550
 551				$group_by = true;
 552				$w_num++;
 553			}
 554			else
 555			{
 556				$sql_where[] = "m$m_num.word_id = $subquery";
 557			}
 558
 559			$sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
 560
 561			if ($title_match)
 562			{
 563				$sql_where[] = "m$m_num.$title_match";
 564			}
 565
 566			if ($m_num != 0)
 567			{
 568				$sql_where[] = "m$m_num.post_id = m0.post_id";
 569			}
 570			$m_num++;
 571		}
 572
 573		foreach ($this->must_not_contain_ids as $key => $subquery)
 574		{
 575			if (is_string($subquery))
 576			{
 577				$sql_array['LEFT_JOIN'][] = array(
 578					'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 579					'ON'	=> "w$w_num.word_text LIKE $subquery"
 580				);
 581
 582				$this->must_not_contain_ids[$key] = "w$w_num.word_id";
 583
 584				$group_by = true;
 585				$w_num++;
 586			}
 587		}
 588
 589		if (sizeof($this->must_not_contain_ids))
 590		{
 591			$sql_array['LEFT_JOIN'][] = array(
 592				'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 593				'ON'	=> $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
 594			);
 595
 596			$sql_where[] = "m$m_num.word_id IS NULL";
 597			$m_num++;
 598		}
 599
 600		foreach ($this->must_exclude_one_ids as $ids)
 601		{
 602			$is_null_joins = array();
 603			foreach ($ids as $id)
 604			{
 605				if (is_string($id))
 606				{
 607					$sql_array['LEFT_JOIN'][] = array(
 608						'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 609						'ON'	=> "w$w_num.word_text LIKE $id"
 610					);
 611					$id = "w$w_num.word_id";
 612
 613					$group_by = true;
 614					$w_num++;
 615				}
 616
 617				$sql_array['LEFT_JOIN'][] = array(
 618					'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 619					'ON'	=> "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
 620				);
 621				$is_null_joins[] = "m$m_num.word_id IS NULL";
 622
 623				$m_num++;
 624			}
 625			$sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
 626		}
 627
 628		if (!sizeof($m_approve_fid_ary))
 629		{
 630			$sql_where[] = 'p.post_approved = 1';
 631		}
 632		else if ($m_approve_fid_ary !== array(-1))
 633		{
 634			$sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
 635		}
 636
 637		if ($topic_id)
 638		{
 639			$sql_where[] = 'p.topic_id = ' . $topic_id;
 640		}
 641
 642		if (sizeof($author_ary))
 643		{
 644			if ($author_name)
 645			{
 646				// first one matches post of registered users, second one guests and deleted users
 647				$sql_author = '(' . $db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
 648			}
 649			else
 650			{
 651				$sql_author = $db->sql_in_set('p.poster_id', $author_ary);
 652			}
 653			$sql_where[] = $sql_author;
 654		}
 655
 656		if (sizeof($ex_fid_ary))
 657		{
 658			$sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true);
 659		}
 660
 661		if ($sort_days)
 662		{
 663			$sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
 664		}
 665
 666		$sql_array['WHERE'] = implode(' AND ', $sql_where);
 667
 668		$is_mysql = false;
 669		// if the total result count is not cached yet, retrieve it from the db
 670		if (!$total_results)
 671		{
 672			$sql = '';
 673			$sql_array_count = $sql_array;
 674
 675			if ($left_join_topics)
 676			{
 677				$sql_array_count['LEFT_JOIN'][] = array(
 678					'FROM'	=> array(TOPICS_TABLE => 't'),
 679					'ON'	=> 'p.topic_id = t.topic_id'
 680				);
 681			}
 682
 683			switch ($db->sql_layer)
 684			{
 685				case 'mysql4':
 686				case 'mysqli':
 687
 688					// 3.x does not support SQL_CALC_FOUND_ROWS
 689					// $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
 690					$is_mysql = true;
 691
 692				break;
 693
 694				case 'sqlite':
 695					$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
 696					$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
 697							FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')';
 698
 699				// no break
 700
 701				default:
 702					$sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
 703					$sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql;
 704
 705					$result = $db->sql_query($sql);
 706					$total_results = (int) $db->sql_fetchfield('total_results');
 707					$db->sql_freeresult($result);
 708
 709					if (!$total_results)
 710					{
 711						return false;
 712					}
 713				break;
 714			}
 715
 716			unset($sql_array_count, $sql);
 717		}
 718
 719		// Build sql strings for sorting
 720		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 721
 722		switch ($sql_sort[0])
 723		{
 724			case 'u':
 725				$sql_array['FROM'][USERS_TABLE] = 'u';
 726				$sql_where[] = 'u.user_id = p.poster_id ';
 727			break;
 728
 729			case 't':
 730				$left_join_topics = true;
 731			break;
 732
 733			case 'f':
 734				$sql_array['FROM'][FORUMS_TABLE] = 'f';
 735				$sql_where[] = 'f.forum_id = p.forum_id';
 736			break;
 737		}
 738
 739		if ($left_join_topics)
 740		{
 741			$sql_array['LEFT_JOIN'][] = array(
 742				'FROM'	=> array(TOPICS_TABLE => 't'),
 743				'ON'	=> 'p.topic_id = t.topic_id'
 744			);
 745		}
 746
 747		$sql_array['WHERE'] = implode(' AND ', $sql_where);
 748		$sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
 749		$sql_array['ORDER_BY'] = $sql_sort;
 750
 751		unset($sql_where, $sql_sort, $group_by);
 752
 753		$sql = $db->sql_build_query('SELECT', $sql_array);
 754		$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
 755
 756		while ($row = $db->sql_fetchrow($result))
 757		{
 758			$id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
 759		}
 760		$db->sql_freeresult($result);
 761
 762		if (!sizeof($id_ary))
 763		{
 764			return false;
 765		}
 766
 767		// if we use mysql and the total result count is not cached yet, retrieve it from the db
 768		if (!$total_results && $is_mysql)
 769		{
 770			// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
 771			$sql_array_copy = $sql_array;
 772			$sql_array_copy['SELECT'] = 'SQL_CALC_FOUND_ROWS p.post_id ';
 773
 774			$sql = $db->sql_build_query('SELECT', $sql_array_copy);
 775			unset($sql_array_copy);
 776
 777			$db->sql_query($sql);
 778			$db->sql_freeresult($result);
 779
 780			$sql = 'SELECT FOUND_ROWS() as total_results';
 781			$result = $db->sql_query($sql);
 782			$total_results = (int) $db->sql_fetchfield('total_results');
 783			$db->sql_freeresult($result);
 784
 785			if (!$total_results)
 786			{
 787				return false;
 788			}
 789		}
 790
 791		// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
 792		$this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
 793		$id_ary = array_slice($id_ary, 0, (int) $per_page);
 794
 795		return $total_results;
 796	}
 797
 798	/**
 799	* Performs a search on an author's posts without caring about message contents. Depends on display specific params
 800	*
 801	* @param	string		$type				contains either posts or topics depending on what should be searched for
 802	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered
 803	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query
 804	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting
 805	* @param	string		$sort_dir			is either a or d representing ASC and DESC
 806	* @param	string		$sort_days			specifies the maximum amount of days a post may be old
 807	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched
 808	* @param	array		$m_approve_fid_ary	specifies an array of forum ids in which the searcher is allowed to view unapproved posts
 809	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 810	* @param	array		$author_ary			an array of author ids
 811	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match
 812	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 813	* @param	int			$start				indicates the first index of the page
 814	* @param	int			$per_page			number of ids each page is supposed to contain
 815	* @return	boolean|int						total number of results
 816	*
 817	* @access	public
 818	*/
 819	function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page)
 820	{
 821		global $config, $db;
 822
 823		// No author? No posts.
 824		if (!sizeof($author_ary))
 825		{
 826			return 0;
 827		}
 828
 829		// generate a search_key from all the options to identify the results
 830		$search_key = md5(implode('#', array(
 831			'',
 832			$type,
 833			($firstpost_only) ? 'firstpost' : '',
 834			'',
 835			'',
 836			$sort_days,
 837			$sort_key,
 838			$topic_id,
 839			implode(',', $ex_fid_ary),
 840			implode(',', $m_approve_fid_ary),
 841			implode(',', $author_ary),
 842			$author_name,
 843		)));
 844
 845		// try reading the results from cache
 846		$total_results = 0;
 847		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 848		{
 849			return $total_results;
 850		}
 851
 852		$id_ary = array();
 853
 854		// Create some display specific sql strings
 855		if ($author_name)
 856		{
 857			// first one matches post of registered users, second one guests and deleted users
 858			$sql_author = '(' . $db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
 859		}
 860		else
 861		{
 862			$sql_author = $db->sql_in_set('p.poster_id', $author_ary);
 863		}
 864		$sql_fora		= (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
 865		$sql_time		= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
 866		$sql_topic_id	= ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
 867		$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
 868
 869		// Build sql strings for sorting
 870		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 871		$sql_sort_table = $sql_sort_join = '';
 872		switch ($sql_sort[0])
 873		{
 874			case 'u':
 875				$sql_sort_table	= USERS_TABLE . ' u, ';
 876				$sql_sort_join	= ' AND u.user_id = p.poster_id ';
 877			break;
 878
 879			case 't':
 880				$sql_sort_table	= ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
 881				$sql_sort_join	= ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
 882			break;
 883
 884			case 'f':
 885				$sql_sort_table	= FORUMS_TABLE . ' f, ';
 886				$sql_sort_join	= ' AND f.forum_id = p.forum_id ';
 887			break;
 888		}
 889
 890		if (!sizeof($m_approve_fid_ary))
 891		{
 892			$m_approve_fid_sql = ' AND p.post_approved = 1';
 893		}
 894		else if ($m_approve_fid_ary == array(-1))
 895		{
 896			$m_approve_fid_sql = '';
 897		}
 898		else
 899		{
 900			$m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
 901		}
 902
 903		$select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
 904		$is_mysql = false;
 905
 906		// If the cache was completely empty count the results
 907		if (!$total_results)
 908		{
 909			switch ($db->sql_layer)
 910			{
 911				case 'mysql4':
 912				case 'mysqli':
 913//					$select = 'SQL_CALC_FOUND_ROWS ' . $select;
 914					$is_mysql = true;
 915				break;
 916
 917				default:
 918					if ($type == 'posts')
 919					{
 920						$sql = 'SELECT COUNT(p.post_id) as total_results
 921							FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
 922							WHERE $sql_author
 923								$sql_topic_id
 924								$sql_firstpost
 925								$m_approve_fid_sql
 926								$sql_fora
 927								$sql_time";
 928					}
 929					else
 930					{
 931						if ($db->sql_layer == 'sqlite')
 932						{
 933							$sql = 'SELECT COUNT(topic_id) as total_results
 934								FROM (SELECT DISTINCT t.topic_id';
 935						}
 936						else
 937						{
 938							$sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
 939						}
 940
 941						$sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
 942							WHERE $sql_author
 943								$sql_topic_id
 944								$sql_firstpost
 945								$m_approve_fid_sql
 946								$sql_fora
 947								AND t.topic_id = p.topic_id
 948								$sql_time" . (($db->sql_layer == 'sqlite') ? ')' : '');
 949					}
 950					$result = $db->sql_query($sql);
 951
 952					$total_results = (int) $db->sql_fetchfield('total_results');
 953					$db->sql_freeresult($result);
 954
 955					if (!$total_results)
 956					{
 957						return false;
 958					}
 959				break;
 960			}
 961		}
 962
 963		// Build the query for really selecting the post_ids
 964		if ($type == 'posts')
 965		{
 966			$sql = "SELECT $select
 967				FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
 968				WHERE $sql_author
 969					$sql_topic_id
 970					$sql_firstpost
 971					$m_approve_fid_sql
 972					$sql_fora
 973					$sql_sort_join
 974					$sql_time
 975				ORDER BY $sql_sort";
 976			$field = 'post_id';
 977		}
 978		else
 979		{
 980			$sql = "SELECT $select
 981				FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
 982				WHERE $sql_author
 983					$sql_topic_id
 984					$sql_firstpost
 985					$m_approve_fid_sql
 986					$sql_fora
 987					AND t.topic_id = p.topic_id
 988					$sql_sort_join
 989					$sql_time
 990				GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
 991				ORDER BY ' . $sql_sort;
 992			$field = 'topic_id';
 993		}
 994
 995		// Only read one block of posts from the db and then cache it
 996		$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
 997
 998		while ($row = $db->sql_fetchrow($result))
 999		{
1000			$id_ary[] = (int) $row[$field];
1001		}
1002		$db->sql_freeresult($result);
1003
1004		if (!$total_results && $is_mysql)
1005		{
1006			// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
1007			$sql = str_replace('SELECT ' . $select, 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id', $sql);
1008
1009			$db->sql_query($sql);
1010			$db->sql_freeresult($result);
1011
1012			$sql = 'SELECT FOUND_ROWS() as total_results';
1013			$result = $db->sql_query($sql);
1014			$total_results = (int) $db->sql_fetchfield('total_results');
1015			$db->sql_freeresult($result);
1016
1017			if (!$total_results)
1018			{
1019				return false;
1020			}
1021		}
1022
1023		if (sizeof($id_ary))
1024		{
1025			$this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
1026			$id_ary = array_slice($id_ary, 0, $per_page);
1027
1028			return $total_results;
1029		}
1030		return false;
1031	}
1032
1033	/**
1034	* Split a text into words of a given length
1035	*
1036	* The text is converted to UTF-8, cleaned up, and split. Then, words that
1037	* conform to the defined length range are returned in an array.
1038	*
1039	* NOTE: duplicates are NOT removed from the return array
1040	*
1041	* @param	string	$text	Text to split, encoded in UTF-8
1042	* @return	array			Array of UTF-8 words
1043	*
1044	* @access	private
1045	*/
1046	function split_message($text)
1047	{
1048		global $phpbb_root_path, $phpEx, $user;
1049
1050		$match = $words = array();
1051
1052		/**
1053		* Taken from the original code
1054		*/
1055		// Do not index code
1056		$match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
1057		// BBcode
1058		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
1059
1060		$min = $this->word_length['min'];
1061		$max = $this->word_length['max'];
1062
1063		$isset_min = $min - 1;
1064
1065		/**
1066		* Clean up the string, remove HTML tags, remove BBCodes
1067		*/
1068		$word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
1069
1070		while (strlen($word))
1071		{
1072			if (strlen($word) > 255 || strlen($word) <= $isset_min)
1073			{
1074				/**
1075				* Words longer than 255 bytes are ignored. This will have to be
1076				* changed whenever we change the length of search_wordlist.word_text
1077				*
1078				* Words shorter than $isset_min bytes are ignored, too
1079				*/
1080				$word = strtok(' ');
1081				continue;
1082			}
1083
1084			$len = utf8_strlen($word);
1085
1086			/**
1087			* Test whether the word is too short to be indexed.
1088			*
1089			* Note that this limit does NOT apply to CJK and Hangul
1090			*/
1091			if ($len < $min)
1092			{
1093				/**
1094				* Note: this could be optimized. If the codepoint is lower than Hangul's range
1095				* we know that it will also be lower than CJK ranges
1096				*/
1097				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
1098				 && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
1099				 && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
1100				{
1101					$word = strtok(' ');
1102					continue;
1103				}
1104			}
1105
1106			$words[] = $word;
1107			$word = strtok(' ');
1108		}
1109
1110		return $words;
1111	}
1112
1113	/**
1114	* Updates wordlist and wordmatch tables when a message is posted or changed
1115	*
1116	* @param	string	$mode		Contains the post mode: edit, post, reply, quote
1117	* @param	int		$post_id	The id of the post which is modified/created
1118	* @param	string	&$message	New or updated post content
1119	* @param	string	&$subject	New or updated post subject
1120	* @param	int		$poster_id	Post author's user id
1121	* @param	int		$forum_id	The id of the forum in which the post is located
1122	*
1123	* @access	public
1124	*/
1125	function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
1126	{
1127		global $config, $db, $user;
1128
1129		if (!$config['fulltext_native_load_upd'])
1130		{
1131			/**
1132			* The search indexer is disabled, return
1133			*/
1134			return;
1135		}
1136
1137		// Split old and new post/subject to obtain array of 'words'
1138		$split_text = $this->split_message($message);
1139		$split_title = $this->split_message($subject);
1140		// www.phpBB-SEO.com SEO TOOLKIT BEGIN - Enable search_ignore_words
1141		$this->filter_nums($split_text);
1142		$this->filter_nums($split_title);
1143		$this->get_ignore_words();
1144		$split_text = array_diff($split_text, $this->ignore_words);
1145		$split_title = array_diff($split_title, $this->ignore_words);
1146		// www.phpBB-SEO.com SEO TOOLKIT END - Enable search_ignore_words
1147		$cur_words = array('post' => array(), 'title' => array());
1148
1149		$words = array();
1150		if ($mode == 'edit')
1151		{
1152			$words['add']['post'] = array();
1153			$words['add']['title'] = array();
1154			$words['del']['post'] = array();
1155			$words['del']['title'] = array();
1156
1157			$sql = 'SELECT w.word_id, w.word_text, m.title_match
1158				FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
1159				WHERE m.post_id = $post_id
1160					AND w.word_id = m.word_id";
1161			$result = $db->sql_query($sql);
1162
1163			while ($row = $db->sql_fetchrow($result))
1164			{
1165				$which = ($row['title_match']) ? 'title' : 'post';
1166				$cur_words[$which][$row['word_text']] = $row['word_id'];
1167			}
1168			$db->sql_freeresult($result);
1169
1170			$words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
1171			$words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
1172			$words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
1173			$words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
1174		}
1175		else
1176		{
1177			$words['add']['post'] = $split_text;
1178			$words['add']['title'] = $split_title;
1179			$words['del']['post'] = array();
1180			$words['del']['title'] = array();
1181		}
1182		unset($split_text);
1183		unset($split_title);
1184
1185		// Get unique words from the above arrays
1186		$unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
1187
1188		// We now have unique arrays of all words to be added and removed and
1189		// individual arrays of added and removed words for text and title. What
1190		// we need to do now is add the new words (if they don't already exist)
1191		// and then add (or remove) matches between the words and this post
1192		if (sizeof($unique_add_words))
1193		{
1194			$sql = 'SELECT word_id, word_text
1195				FROM ' . SEARCH_WORDLIST_TABLE . '
1196				WHERE ' . $db->sql_in_set('word_text', $unique_add_words);
1197			$result = $db->sql_query($sql);
1198
1199			$word_ids = array();
1200			while ($row = $db->sql_fetchrow($result))
1201			{
1202				$word_ids[$row['word_text']] = $row['word_id'];
1203			}
1204			$db->sql_freeresult($result);
1205			$new_words = array_diff($unique_add_words, array_keys($word_ids));
1206
1207			$db->sql_transaction('begin');
1208			if (sizeof($new_words))
1209			{
1210				$sql_ary = array();
1211
1212				foreach ($new_words as $word)
1213				{
1214					$sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
1215				}
1216				$db->sql_return_on_error(true);
1217				$db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
1218				$db->sql_return_on_error(false);
1219			}
1220			unset($new_words, $sql_ary);
1221		}
1222		else
1223		{
1224			$db->sql_transaction('begin');
1225		}
1226
1227		// now update the search match table, remove links to removed words and add links to new words
1228		foreach ($words['del'] as $word_in => $word_ary)
1229		{
1230			$title_match = ($word_in == 'title') ? 1 : 0;
1231
1232			if (sizeof($word_ary))
1233			{
1234				$sql_in = array();
1235				foreach ($word_ary as $word)
1236				{
1237					$sql_in[] = $cur_words[$word_in][$word];
1238				}
1239
1240				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1241					WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1242						AND post_id = ' . intval($post_id) . "
1243						AND title_match = $title_match";
1244				$db->sql_query($sql);
1245
1246				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1247					SET word_count = word_count - 1
1248					WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1249						AND word_count > 0';
1250				$db->sql_query($sql);
1251
1252				unset($sql_in);
1253			}
1254		}
1255
1256		$db->sql_return_on_error(true);
1257		foreach ($words['add'] as $word_in => $word_ary)
1258		{
1259			$title_match = ($word_in == 'title') ? 1 : 0;
1260
1261			if (sizeof($word_ary))
1262			{
1263				$sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
1264					SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
1265					FROM ' . SEARCH_WORDLIST_TABLE . '
1266					WHERE ' . $db->sql_in_set('word_text', $word_ary);
1267				$db->sql_query($sql);
1268
1269				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1270					SET word_count = word_count + 1
1271					WHERE ' . $db->sql_in_set('word_text', $word_ary);
1272				$db->sql_query($sql);
1273			}
1274		}
1275		$db->sql_return_on_error(false);
1276
1277		$db->sql_transaction('commit');
1278
1279		// destroy cached search results containing any of the words removed or added
1280		$this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
1281
1282		unset($unique_add_words);
1283		unset($words);
1284		unset($cur_words);
1285	}
1286
1287	/**
1288	* Removes entries from the wordmatch table for the specified post_ids
1289	*/
1290	function index_remove($post_ids, $author_ids, $forum_ids)
1291	{
1292		global $db;
1293
1294		if (sizeof($post_ids))
1295		{
1296			$sql = 'SELECT w.word_id, w.word_text, m.title_match
1297				FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
1298				WHERE ' . $db->sql_in_set('m.post_id', $post_ids) . '
1299					AND w.word_id = m.word_id';
1300			$result = $db->sql_query($sql);
1301
1302			$message_word_ids = $title_word_ids = $word_texts = array();
1303			while ($row = $db->sql_fetchrow($result))
1304			{
1305				if ($row['title_match'])
1306				{
1307					$title_word_ids[] = $row['word_id'];
1308				}
1309				else
1310				{
1311					$message_word_ids[] = $row['word_id'];
1312				}
1313				$word_texts[] = $row['word_text'];
1314			}
1315			$db->sql_freeresult($result);
1316
1317			if (sizeof($title_word_ids))
1318			{
1319				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1320					SET word_count = word_count - 1
1321					WHERE ' . $db->sql_in_set('word_id', $title_word_ids) . '
1322						AND word_count > 0';
1323				$db->sql_query($sql);
1324			}
1325
1326			if (sizeof($message_word_ids))
1327			{
1328				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1329					SET word_count = word_count - 1
1330					WHERE ' . $db->sql_in_set('word_id', $message_word_ids) . '
1331						AND word_count > 0';
1332				$db->sql_query($sql);
1333			}
1334
1335			unset($title_word_ids);
1336			unset($message_word_ids);
1337
1338			$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1339				WHERE ' . $db->sql_in_set('post_id', $post_ids);
1340			$db->sql_query($sql);
1341		}
1342
1343		$this->destroy_cache(array_unique($word_texts), $author_ids);
1344	}
1345
1346	/**
1347	* Tidy up indexes: Tag 'common words' and remove
1348	* words no longer referenced in the match table
1349	*/
1350	function tidy()
1351	{
1352		global $db, $config;
1353
1354		// Is the fulltext indexer disabled? If yes then we need not
1355		// carry on ... it's okay ... I know when I'm not wanted boo hoo
1356		if (!$config['fulltext_native_load_upd'])
1357		{
1358			set_config('search_last_gc', time(), true);
1359			return;
1360		}
1361
1362		$destroy_cache_words = array();
1363
1364		// Remove common words
1365		if ($config['num_posts'] >= 100 && $config['fulltext_native_common_thres'])
1366		{
1367			$common_threshold = ((double) $config['fulltext_native_common_thres']) / 100.0;
1368			// First, get the IDs of common words
1369			$sql = 'SELECT word_id, word_text
1370				FROM ' . SEARCH_WORDLIST_TABLE . '
1371				WHERE word_count > ' . floor($config['num_posts'] * $common_threshold) . '
1372					OR word_common = 1';
1373			$result = $db->sql_query($sql);
1374
1375			$sql_in = array();
1376			while ($row = $db->sql_fetchrow($result))
1377			{
1378				$sql_in[] = $row['word_id'];
1379				$destroy_cache_words[] = $row['word_text'];
1380			}
1381			$db->sql_freeresult($result);
1382
1383			if (sizeof($sql_in))
1384			{
1385				// Flag the words
1386				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1387					SET word_common = 1
1388					WHERE ' . $db->sql_in_set('word_id', $sql_in);
1389				$db->sql_query($sql);
1390
1391				// by setting search_last_gc to the new time here we make sure that if a user reloads because the
1392				// following query takes too long, he won't run into it again
1393				set_config('search_last_gc', time(), true);
1394
1395				// Delete the matches
1396				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1397					WHERE ' . $db->sql_in_set('word_id', $sql_in);
1398				$db->sql_query($sql);
1399			}
1400			unset($sql_in);
1401		}
1402
1403		if (sizeof($destroy_cache_words))
1404		{
1405			// destroy cached search results containing any of the words that are now common or were removed
1406			$this->destroy_cache(array_unique($destroy_cache_words));
1407		}
1408
1409		set_config('search_last_gc', time(), true);
1410	}
1411
1412	/**
1413	* Deletes all words from the index
1414	*/
1415	function delete_index($acp_module, $u_action)
1416	{
1417		global $db;
1418
1419		switch ($db->sql_layer)
1420		{
1421			case 'sqlite':
1422			case 'firebird':
1423				$db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
1424				$db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);
1425				$db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE);
1426			break;
1427
1428			default:
1429				$db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE);
1430				$db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE);
1431				$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
1432			break;
1433		}
1434	}
1435
1436	/**
1437	* Returns true if both FULLTEXT indexes exist
1438	*/
1439	function index_created()
1440	{
1441		if (!sizeof($this->stats))
1442		{
1443			$this->get_stats();
1444		}
1445
1446		return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
1447	}
1448
1449	/**
1450	* Returns an associative array containing information about the indexes
1451	*/
1452	function index_stats()
1453	{
1454		global $user;
1455
1456		if (!sizeof($this->stats))
1457		{
1458			$this->get_stats();
1459		}
1460
1461		return array(
1462			$user->lang['TOTAL_WORDS']		=> $this->stats['total_words'],
1463			$user->lang['TOTAL_MATCHES']	=> $this->stats['total_matches']);
1464	}
1465
1466	function get_stats()
1467	{
1468		global $db;
1469
1470		$sql = 'SELECT COUNT(*) as total_words
1471			FROM ' . SEARCH_WORDLIST_TABLE;
1472		$result = $db->sql_query($sql);
1473		$this->stats['total_words'] = (int) $db->sql_fetchfield('total_words');
1474		$db->sql_freeresult($result);
1475
1476		$sql = 'SELECT COUNT(*) as total_matches
1477			FROM ' . SEARCH_WORDMATCH_TABLE;
1478		$result = $db->sql_query($sql);
1479		$this->stats['total_matches'] = (int) $db->sql_fetchfield('total_matches');
1480		$db->sql_freeresult($result);
1481	}
1482
1483	/**
1484	* Clean up a text to remove non-alphanumeric characters
1485	*
1486	* This method receives a UTF-8 string, normalizes and validates it, replaces all
1487	* non-alphanumeric characters with strings then returns the result.
1488	*
1489	* Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
1490	*
1491	* @param	string	$text			Text to split, in UTF-8 (not normalized or sanitized)
1492	* @param	string	$allowed_chars	String of special chars to allow
1493	* @param	string	$encoding		Text encoding
1494	* @return	string					Cleaned up text, only alphanumeric chars are left
1495	*
1496	* @todo normalizer::cleanup being able to be used?
1497	*/
1498	function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
1499	{
1500		global $phpbb_root_path, $phpEx;
1501		static $conv = array(), $conv_loaded = array();
1502		$words = $allow = array();
1503
1504		// Convert the text to UTF-8
1505		$encoding = strtolower($encoding);
1506		if ($encoding != 'utf-8')
1507		{
1508			$text = utf8_recode($text, $encoding);
1509		}
1510
1511		$utf_len_mask = array(
1512			"\xC0"	=>	2,
1513			"\xD0"	=>	2,
1514			"\xE0"	=>	3,
1515			"\xF0"	=>	4
1516		);
1517
1518		/**
1519		* Replace HTML entities and NCRs
1520		*/
1521		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
1522
1523		/**
1524		* Load the UTF-8 normalizer
1525		*
1526		* If we use it more widely, an instance of that class should be held in a
1527		* a global variable instead
1528		*/
1529		utf_normalizer::nfc($text);
1530
1531		/**
1532		* The first thing we do is:
1533		*
1534		* - convert ASCII-7 letters to lowercase
1535		* - remove the ASCII-7 non-alpha characters
1536		* - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
1537		*   0xC1 and 0xF5-0xFF
1538		*
1539		* @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
1540		*/
1541		$sb_match	= "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
1542		$sb_replace	= 'istcpamelrdojbnhfgvwuqkyxz                                                                              ';
1543
1544		/**
1545		* This is the list of legal ASCII chars, it is automatically extended
1546		* with ASCII chars from $allowed_chars
1547		*/
1548		$legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
1549
1550		/**
1551		* Prepare an array containing the extra chars to allow
1552		*/
1553		if (isset($allowed_chars[0]))
1554		{
1555			$pos = 0;
1556			$len = strlen($allowed_chars);
1557			do
1558			{
1559				$c = $allowed_chars[$pos];
1560
1561				if ($c < "\x80")
1562				{
1563					/**
1564					* ASCII char
1565					*/
1566					$sb_pos = strpos($sb_match, $c);
1567					if (is_int($sb_pos))
1568					{
1569						/**
1570						* Remove the char from $sb_match and its corresponding
1571						* replacement in $sb_replace
1572						*/
1573						$sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
1574						$sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
1575						$legal_ascii .= $c;
1576					}
1577
1578					++$pos;
1579				}
1580				else
1581				{
1582					/**
1583					* UTF-8 char
1584					*/
1585					$utf_len = $utf_len_mask[$c & "\xF0"];
1586					$allow[substr($allowed_chars, $pos, $utf_len)] = 1;
1587					$pos += $utf_len;
1588				}
1589			}
1590			while ($pos < $len);
1591		}
1592
1593		$text = strtr($text, $sb_match, $sb_replace);
1594		$ret = '';
1595
1596		$pos = 0;
1597		$len = strlen($text);
1598
1599		do
1600		{
1601			/**
1602			* Do all consecutive ASCII chars at once
1603			*/
1604			if ($spn = strspn($text, $legal_ascii, $pos))
1605			{
1606				$ret .= substr($text, $pos, $spn);
1607				$pos += $spn;
1608			}
1609
1610			if ($pos >= $len)
1611			{
1612				return $ret;
1613			}
1614
1615			/**
1616			* Capture the UTF char
1617			*/
1618			$utf_len = $utf_len_mask[$text[$pos] & "\xF0"];
1619			$utf_char = substr($text, $pos, $utf_len);
1620			$pos += $utf_len;
1621
1622			if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
1623			 || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
1624			 || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))
1625			{
1626				/**
1627				* All characters within these ranges are valid
1628				*
1629				* We separate them with a space in order to index each character
1630				* individually
1631				*/
1632				$ret .= ' ' . $utf_char . ' ';
1633				continue;
1634			}
1635
1636			if (isset($allow[$utf_char]))
1637			{
1638				/**
1639				* The char is explicitly allowed
1640				*/
1641				$ret .= $utf_char;
1642				continue;
1643			}
1644
1645			if (isset($conv[$utf_char]))
1646			{
1647				/**
1648				* The char is mapped to something, maybe to itself actually
1649				*/
1650				$ret .= $conv[$utf_char];
1651				continue;
1652			}
1653
1654			/**
1655			* The char isn't mapped, but did we load its conversion table?
1656			*
1657			* The search indexer table is split into blocks. The block number of
1658			* each char is equal to its codepoint right-shifted for 11 bits. It
1659			* means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
1660			* 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
1661			* all UTF chars encoded in 2 bytes are in the same first block.
1662			*/
1663			if (isset($utf_char[2]))
1664			{
1665				if (isset($utf_char[3]))
1666				{
1667					/**
1668					* 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
1669					* 0000 0111 0011 1111 0010 0000
1670					*/
1671					$idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5);
1672				}
1673				else
1674				{
1675					/**
1676					* 1110 nnnn 10nx xxxx 10xx xxxx
1677					* 0000 0111 0010 0000
1678					*/
1679					$idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5);
1680				}
1681			}
1682			else
1683			{
1684				/**
1685				* 110x xxxx 10xx xxxx
1686				* 0000 0000 0000 0000
1687				*/
1688				$idx = 0;
1689			}
1690
1691			/**
1692			* Check if the required conv table has been loaded already
1693			*/
1694			if (!isset($conv_loaded[$idx]))
1695			{
1696				$conv_loaded[$idx] = 1;
1697				$file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx;
1698
1699				if (file_exists($file))
1700				{
1701					$conv += include($file);
1702				}
1703			}
1704
1705			if (isset($conv[$utf_char]))
1706			{
1707				$ret .= $conv[$utf_char];
1708			}
1709			else
1710			{
1711				/**
1712				* We add an entry to the conversion table so that we
1713				* don't have to convert to codepoint and perform the checks
1714				* that are above this block
1715				*/
1716				$conv[$utf_char] = ' ';
1717				$ret .= ' ';
1718			}
1719		}
1720		while (1);
1721
1722		return $ret;
1723	}
1724
1725	/**
1726	* Returns a list of options for the ACP to display
1727	*/
1728	function acp()
1729	{
1730		global $user, $config;
1731
1732
1733		/**
1734		* if we need any options, copied from fulltext_native for now, will have to be adjusted or removed
1735		*/
1736
1737		$tpl = '
1738		<dl>
1739			<dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt>
1740			<dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['NO'] . '</label></dd>
1741		</dl>
1742		<dl>
1743			<dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1744			<dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd>
1745		</dl>
1746		<dl>
1747			<dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1748			<dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd>
1749		</dl>
1750		<dl>
1751			<dt><label for="fulltext_native_common_thres">' . $user->lang['COMMON_WORD_THRESHOLD'] . ':</label><br /><span>' . $user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>
1752			<dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $config['fulltext_native_common_thres'] . '" /> %</dd>
1753		</dl>
1754		';
1755
1756		// These are fields required in the config table
1757		return array(
1758			'tpl'		=> $tpl,
1759			'config'	=> array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100')
1760		);
1761	}
1762	// www.phpBB-SEO.com SEO TOOLKIT BEGIN - Enable search_ignore_words
1763	/**
1764	* Get rid of integers values in $input array
1765	*/
1766	function filter_nums(&$input) {
1767		foreach ($input as $key => $word) {
1768			if (preg_match('`^[0-9]+$`', $word)) {
1769				unset($input[$key]);
1770			}
1771		}
1772	}
1773	// www.phpBB-SEO.com SEO TOOLKIT END - Enable search_ignore_words
1774}
1775
1776?>