PageRenderTime 279ms CodeModel.GetById 101ms app.highlight 108ms RepoModel.GetById 53ms app.codeStats 0ms

/phpBB3/includes/search/fulltext_native.php

http://pbb-png1.googlecode.com/
PHP | 1690 lines | 1255 code | 190 blank | 245 comment | 163 complexity | c086b6b939c8a46e8e419c0f83550108 MD5 | raw file
   1<?php
   2/**
   3*
   4* @package search
   5* @version $Id: fulltext_native.php 9173 2008-12-04 17:01:39Z naderman $
   6* @copyright (c) 2005 phpBB Group
   7* @license http://opensource.org/licenses/gpl-license.php GNU Public License
   8*
   9*/
  10
  11/**
  12* @ignore
  13*/
  14if (!defined('IN_PHPBB'))
  15{
  16	exit;
  17}
  18
  19/**
  20* @ignore
  21*/
  22include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
  23
  24/**
  25* fulltext_native
  26* phpBB's own db driven fulltext search, version 2
  27* @package search
  28*/
  29class fulltext_native extends search_backend
  30{
  31	var $stats = array();
  32	var $word_length = array();
  33	var $search_query;
  34	var $common_words = array();
  35
  36	var $must_contain_ids = array();
  37	var $must_not_contain_ids = array();
  38	var $must_exclude_one_ids = array();
  39
  40	/**
  41	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded.
  42	*
  43	* @param	boolean|string	&$error	is passed by reference and should either be set to false on success or an error message on failure.
  44	*
  45	* @access	public
  46	*/
  47	function fulltext_native(&$error)
  48	{
  49		global $phpbb_root_path, $phpEx, $config;
  50
  51		$this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
  52
  53		/**
  54		* Load the UTF tools
  55		*/
  56		if (!class_exists('utf_normalizer'))
  57		{
  58			include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
  59		}
  60
  61
  62		$error = false;
  63	}
  64
  65	/**
  66	* This function fills $this->search_query with the cleaned user search query.
  67	*
  68	* If $terms is 'any' then the words will be extracted from the search query
  69	* and combined with | inside brackets. They will afterwards be treated like
  70	* an standard search query.
  71	*
  72	* Then it analyses the query and fills the internal arrays $must_not_contain_ids,
  73	* $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search().
  74	*
  75	* @param	string	$keywords	contains the search query string as entered by the user
  76	* @param	string	$terms		is either 'all' (use search query as entered, default words to 'must be contained in post')
  77	* 	or 'any' (find all posts containing at least one of the given words)
  78	* @return	boolean				false if no valid keywords were found and otherwise true
  79	*
  80	* @access	public
  81	*/
  82	function split_keywords($keywords, $terms)
  83	{
  84		global $db, $user;
  85
  86		$keywords = trim($this->cleanup($keywords, '+-|()*'));
  87
  88		// allow word|word|word without brackets
  89		if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
  90		{
  91			$keywords = '(' . $keywords . ')';
  92		}
  93
  94		$open_bracket = $space = false;
  95		for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
  96		{
  97			if ($open_bracket !== false)
  98			{
  99				switch ($keywords[$i])
 100				{
 101					case ')':
 102						if ($open_bracket + 1 == $i)
 103						{
 104							$keywords[$i - 1] = '|';
 105							$keywords[$i] = '|';
 106						}
 107						$open_bracket = false;
 108					break;
 109					case '(':
 110						$keywords[$i] = '|';
 111					break;
 112					case '+':
 113					case '-':
 114					case ' ':
 115						$keywords[$i] = '|';
 116					break;
 117				}
 118			}
 119			else
 120			{
 121				switch ($keywords[$i])
 122				{
 123					case ')':
 124						$keywords[$i] = ' ';
 125					break;
 126					case '(':
 127						$open_bracket = $i;
 128						$space = false;
 129					break;
 130					case '|':
 131						$keywords[$i] = ' ';
 132					break;
 133					case '-':
 134					case '+':
 135						$space = $keywords[$i];
 136					break;
 137					case ' ':
 138						if ($space !== false)
 139						{
 140							$keywords[$i] = $space;
 141						}
 142					break;
 143					default:
 144						$space = false;
 145				}
 146			}
 147		}
 148
 149		if ($open_bracket)
 150		{
 151			$keywords .= ')';
 152		}
 153
 154		$match = array(
 155			'#  +#',
 156			'#\|\|+#',
 157			'#(\+|\-)(?:\+|\-)+#',
 158			'#\(\|#',
 159			'#\|\)#',
 160		);
 161		$replace = array(
 162			' ',
 163			'|',
 164			'$1',
 165			'(',
 166			')',
 167		);
 168
 169		$keywords = preg_replace($match, $replace, $keywords);
 170
 171		// $keywords input format: each word separated by a space, words in a bracket are not separated
 172
 173		// the user wants to search for any word, convert the search query
 174		if ($terms == 'any')
 175		{
 176			$words = array();
 177
 178			preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
 179			if (sizeof($words[1]))
 180			{
 181				$keywords = '(' . implode('|', $words[1]) . ')';
 182			}
 183		}
 184
 185		// set the search_query which is shown to the user
 186		$this->search_query = $keywords;
 187
 188		$exact_words = array();
 189		preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
 190		$exact_words = $exact_words[1];
 191
 192		$common_ids = $words = array();
 193
 194		if (sizeof($exact_words))
 195		{
 196			$sql = 'SELECT word_id, word_text, word_common
 197				FROM ' . SEARCH_WORDLIST_TABLE . '
 198				WHERE ' . $db->sql_in_set('word_text', $exact_words);
 199			$result = $db->sql_query($sql);
 200
 201			// store an array of words and ids, remove common words
 202			while ($row = $db->sql_fetchrow($result))
 203			{
 204				if ($row['word_common'])
 205				{
 206					$this->common_words[] = $row['word_text'];
 207					$common_ids[$row['word_text']] = (int) $row['word_id'];
 208					continue;
 209				}
 210
 211				$words[$row['word_text']] = (int) $row['word_id'];
 212			}
 213			$db->sql_freeresult($result);
 214		}
 215		unset($exact_words);
 216
 217		// now analyse the search query, first split it using the spaces
 218		$query = explode(' ', $keywords);
 219
 220		$this->must_contain_ids = array();
 221		$this->must_not_contain_ids = array();
 222		$this->must_exclude_one_ids = array();
 223
 224		$mode = '';
 225		$ignore_no_id = true;
 226
 227		foreach ($query as $word)
 228		{
 229			if (empty($word))
 230			{
 231				continue;
 232			}
 233
 234			// words which should not be included
 235			if ($word[0] == '-')
 236			{
 237				$word = substr($word, 1);
 238
 239				// a group of which at least one may not be in the resulting posts
 240				if ($word[0] == '(')
 241				{
 242					$word = array_unique(explode('|', substr($word, 1, -1)));
 243					$mode = 'must_exclude_one';
 244				}
 245				// one word which should not be in the resulting posts
 246				else
 247				{
 248					$mode = 'must_not_contain';
 249				}
 250				$ignore_no_id = true;
 251			}
 252			// words which have to be included
 253			else
 254			{
 255				// no prefix is the same as a +prefix
 256				if ($word[0] == '+')
 257				{
 258					$word = substr($word, 1);
 259				}
 260
 261				// a group of words of which at least one word should be in every resulting post
 262				if ($word[0] == '(')
 263				{
 264					$word = array_unique(explode('|', substr($word, 1, -1)));
 265				}
 266				$ignore_no_id = false;
 267				$mode = 'must_contain';
 268			}
 269
 270			if (empty($word))
 271			{
 272				continue;
 273			}
 274
 275			// if this is an array of words then retrieve an id for each
 276			if (is_array($word))
 277			{
 278				$non_common_words = array();
 279				$id_words = array();
 280				foreach ($word as $i => $word_part)
 281				{
 282					if (strpos($word_part, '*') !== false)
 283					{
 284						$id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
 285						$non_common_words[] = $word_part;
 286					}
 287					else if (isset($words[$word_part]))
 288					{
 289						$id_words[] = $words[$word_part];
 290						$non_common_words[] = $word_part;
 291					}
 292					else
 293					{
 294						$len = utf8_strlen($word_part);
 295						if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
 296						{
 297							$this->common_words[] = $word_part;
 298						}
 299					}
 300				}
 301				if (sizeof($id_words))
 302				{
 303					sort($id_words);
 304					if (sizeof($id_words) > 1)
 305					{
 306						$this->{$mode . '_ids'}[] = $id_words;
 307					}
 308					else
 309					{
 310						$mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
 311						$this->{$mode . '_ids'}[] = $id_words[0];
 312					}
 313				}
 314				// throw an error if we shall not ignore unexistant words
 315				else if (!$ignore_no_id && sizeof($non_common_words))
 316				{
 317					trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $non_common_words)));
 318				}
 319				unset($non_common_words);
 320			}
 321			// else we only need one id
 322			else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
 323			{
 324				if ($wildcard)
 325				{
 326					$len = utf8_strlen(str_replace('*', '', $word));
 327					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
 328					{
 329						$this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\'';
 330					}
 331					else
 332					{
 333						$this->common_words[] = $word;
 334					}
 335				}
 336				else
 337				{
 338					$this->{$mode . '_ids'}[] = $words[$word];
 339				}
 340			}
 341			// throw an error if we shall not ignore unexistant words
 342			else if (!$ignore_no_id)
 343			{
 344				if (!isset($common_ids[$word]))
 345				{
 346					$len = utf8_strlen($word);
 347					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
 348					{
 349						trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word));
 350					}
 351					else
 352					{
 353						$this->common_words[] = $word;
 354					}
 355				}
 356			}
 357			else
 358			{
 359				$len = utf8_strlen($word);
 360				if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
 361				{
 362					$this->common_words[] = $word;
 363				}
 364			}
 365		}
 366
 367		// we can't search for negatives only
 368		if (!sizeof($this->must_contain_ids))
 369		{
 370			return false;
 371		}
 372
 373		sort($this->must_contain_ids);
 374		sort($this->must_not_contain_ids);
 375		sort($this->must_exclude_one_ids);
 376
 377		if (!empty($this->search_query))
 378		{
 379			return true;
 380		}
 381		return false;
 382	}
 383
 384	/**
 385	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
 386	*
 387	* @param	string		$type				contains either posts or topics depending on what should be searched for
 388	* @param	string		&$fields			contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
 389	* @param	string		&$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
 390	* @param	array		&$sort_by_sql		contains SQL code for the ORDER BY part of a query
 391	* @param	string		&$sort_key			is the key of $sort_by_sql for the selected sorting
 392	* @param	string		&$sort_dir			is either a or d representing ASC and DESC
 393	* @param	string		&$sort_days			specifies the maximum amount of days a post may be old
 394	* @param	array		&$ex_fid_ary		specifies an array of forum ids which should not be searched
 395	* @param	array		&$m_approve_fid_ary	specifies an array of forum ids in which the searcher is allowed to view unapproved posts
 396	* @param	int			&$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 397	* @param	array		&$author_ary		an array of author ids if the author should be ignored during the search the array is empty
 398	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 399	* @param	int			$start				indicates the first index of the page
 400	* @param	int			$per_page			number of ids each page is supposed to contain
 401	* @return	boolean|int						total number of results
 402	*
 403	* @access	public
 404	*/
 405	function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
 406	{
 407		global $config, $db;
 408
 409		// No keywords? No posts.
 410		if (empty($this->search_query))
 411		{
 412			return false;
 413		}
 414
 415		// generate a search_key from all the options to identify the results
 416		$search_key = md5(implode('#', array(
 417			serialize($this->must_contain_ids),
 418			serialize($this->must_not_contain_ids),
 419			serialize($this->must_exclude_one_ids),
 420			$type,
 421			$fields,
 422			$terms,
 423			$sort_days,
 424			$sort_key,
 425			$topic_id,
 426			implode(',', $ex_fid_ary),
 427			implode(',', $m_approve_fid_ary),
 428			implode(',', $author_ary)
 429		)));
 430
 431		// try reading the results from cache
 432		$total_results = 0;
 433		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 434		{
 435			return $total_results;
 436		}
 437
 438		$id_ary = array();
 439
 440		$sql_where = array();
 441		$group_by = false;
 442		$m_num = 0;
 443		$w_num = 0;
 444
 445		$sql_array = array(
 446			'SELECT'	=> ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
 447			'FROM'		=> array(
 448				SEARCH_WORDMATCH_TABLE	=> array(),
 449				SEARCH_WORDLIST_TABLE	=> array(),
 450			),
 451			'LEFT_JOIN' => array(array(
 452				'FROM'	=> array(POSTS_TABLE => 'p'),
 453				'ON'	=> 'm0.post_id = p.post_id',
 454			)),
 455		);
 456
 457		$title_match = '';
 458		$left_join_topics = false;
 459		$group_by = true;
 460		// Build some display specific sql strings
 461		switch ($fields)
 462		{
 463			case 'titleonly':
 464				$title_match = 'title_match = 1';
 465				$group_by = false;
 466			// no break
 467			case 'firstpost':
 468				$left_join_topics = true;
 469				$sql_where[] = 'p.post_id = t.topic_first_post_id';
 470			break;
 471
 472			case 'msgonly':
 473				$title_match = 'title_match = 0';
 474				$group_by = false;
 475			break;
 476		}
 477
 478		if ($type == 'topics')
 479		{
 480			$left_join_topics = true;
 481			$group_by = true;
 482		}
 483
 484		/**
 485		* @todo Add a query optimizer (handle stuff like "+(4|3) +4")
 486		*/
 487
 488		foreach ($this->must_contain_ids as $subquery)
 489		{
 490			if (is_array($subquery))
 491			{
 492				$group_by = true;
 493
 494				$word_id_sql = array();
 495				$word_ids = array();
 496				foreach ($subquery as $id)
 497				{
 498					if (is_string($id))
 499					{
 500						$sql_array['LEFT_JOIN'][] = array(
 501							'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 502							'ON'	=> "w$w_num.word_text LIKE $id"
 503						);
 504						$word_ids[] = "w$w_num.word_id";
 505
 506						$w_num++;
 507					}
 508					else
 509					{
 510						$word_ids[] = $id;
 511					}
 512				}
 513
 514				$sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids);
 515
 516				unset($word_id_sql);
 517				unset($word_ids);
 518			}
 519			else if (is_string($subquery))
 520			{
 521				$sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
 522
 523				$sql_where[] = "w$w_num.word_text LIKE $subquery";
 524				$sql_where[] = "m$m_num.word_id = w$w_num.word_id";
 525
 526				$group_by = true;
 527				$w_num++;
 528			}
 529			else
 530			{
 531				$sql_where[] = "m$m_num.word_id = $subquery";
 532			}
 533
 534			$sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
 535
 536			if ($title_match)
 537			{
 538				$sql_where[] = "m$m_num.$title_match";
 539			}
 540
 541			if ($m_num != 0)
 542			{
 543				$sql_where[] = "m$m_num.post_id = m0.post_id";
 544			}
 545			$m_num++;
 546		}
 547
 548		foreach ($this->must_not_contain_ids as $key => $subquery)
 549		{
 550			if (is_string($subquery))
 551			{
 552				$sql_array['LEFT_JOIN'][] = array(
 553					'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 554					'ON'	=> "w$w_num.word_text LIKE $subquery"
 555				);
 556
 557				$this->must_not_contain_ids[$key] = "w$w_num.word_id";
 558
 559				$group_by = true;
 560				$w_num++;
 561			}
 562		}
 563
 564		if (sizeof($this->must_not_contain_ids))
 565		{
 566			$sql_array['LEFT_JOIN'][] = array(
 567				'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 568				'ON'	=> $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
 569			);
 570
 571			$sql_where[] = "m$m_num.word_id IS NULL";
 572			$m_num++;
 573		}
 574
 575		foreach ($this->must_exclude_one_ids as $ids)
 576		{
 577			$is_null_joins = array();
 578			foreach ($ids as $id)
 579			{
 580				if (is_string($id))
 581				{
 582					$sql_array['LEFT_JOIN'][] = array(
 583						'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 584						'ON'	=> "w$w_num.word_text LIKE $id"
 585					);
 586					$id = "w$w_num.word_id";
 587
 588					$group_by = true;
 589					$w_num++;
 590				}
 591
 592				$sql_array['LEFT_JOIN'][] = array(
 593					'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 594					'ON'	=> "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
 595				);
 596				$is_null_joins[] = "m$m_num.word_id IS NULL";
 597
 598				$m_num++;
 599			}
 600			$sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
 601		}
 602
 603		if (!sizeof($m_approve_fid_ary))
 604		{
 605			$sql_where[] = 'p.post_approved = 1';
 606		}
 607		else if ($m_approve_fid_ary !== array(-1))
 608		{
 609			$sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
 610		}
 611
 612		if ($topic_id)
 613		{
 614			$sql_where[] = 'p.topic_id = ' . $topic_id;
 615		}
 616
 617		if (sizeof($author_ary))
 618		{
 619			$sql_where[] = $db->sql_in_set('p.poster_id', $author_ary);
 620		}
 621
 622		if (sizeof($ex_fid_ary))
 623		{
 624			$sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true);
 625		}
 626
 627		if ($sort_days)
 628		{
 629			$sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
 630		}
 631
 632		$sql_array['WHERE'] = implode(' AND ', $sql_where);
 633
 634		$is_mysql = false;
 635		// if the total result count is not cached yet, retrieve it from the db
 636		if (!$total_results)
 637		{
 638			$sql = '';
 639			$sql_array_count = $sql_array;
 640
 641			switch ($db->sql_layer)
 642			{
 643				case 'mysql4':
 644				case 'mysqli':
 645
 646					// 3.x does not support SQL_CALC_FOUND_ROWS
 647					$sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
 648					$is_mysql = true;
 649
 650				break;
 651
 652				case 'sqlite':
 653					$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
 654					$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
 655							FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')';
 656
 657				// no break
 658
 659				default:
 660					$sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
 661					$sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql;
 662
 663					$result = $db->sql_query($sql);
 664					$total_results = (int) $db->sql_fetchfield('total_results');
 665					$db->sql_freeresult($result);
 666
 667					if (!$total_results)
 668					{
 669						return false;
 670					}
 671				break;
 672			}
 673
 674			unset($sql_array_count, $sql);
 675		}
 676
 677		// Build sql strings for sorting
 678		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 679
 680		switch ($sql_sort[0])
 681		{
 682			case 'u':
 683				$sql_array['FROM'][USERS_TABLE] = 'u';
 684				$sql_where[] = 'u.user_id = p.poster_id ';
 685			break;
 686
 687			case 't':
 688				$left_join_topics = true;
 689			break;
 690
 691			case 'f':
 692				$sql_array['FROM'][FORUMS_TABLE] = 'f';
 693				$sql_where[] = 'f.forum_id = p.forum_id';
 694			break;
 695		}
 696		
 697		if ($left_join_topics)
 698		{
 699			$sql_array['LEFT_JOIN'][$left_join_topics] = array(
 700				'FROM'	=> array(TOPICS_TABLE => 't'),
 701				'ON'	=> 'p.topic_id = t.topic_id'
 702			);
 703		}
 704
 705		$sql_array['WHERE'] = implode(' AND ', $sql_where);
 706		$sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
 707		$sql_array['ORDER_BY'] = $sql_sort;
 708
 709		unset($sql_where, $sql_sort, $group_by);
 710
 711		$sql = $db->sql_build_query('SELECT', $sql_array);
 712		$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
 713
 714		while ($row = $db->sql_fetchrow($result))
 715		{
 716			$id_ary[] = $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
 717		}
 718		$db->sql_freeresult($result);
 719
 720		if (!sizeof($id_ary))
 721		{
 722			return false;
 723		}
 724
 725		// if we use mysql and the total result count is not cached yet, retrieve it from the db
 726		if (!$total_results && $is_mysql)
 727		{
 728			$sql = 'SELECT FOUND_ROWS() as total_results';
 729			$result = $db->sql_query($sql);
 730			$total_results = (int) $db->sql_fetchfield('total_results');
 731			$db->sql_freeresult($result);
 732
 733			if (!$total_results)
 734			{
 735				return false;
 736			}
 737		}
 738
 739		// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
 740		$this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
 741		$id_ary = array_slice($id_ary, 0, (int) $per_page);
 742
 743		return $total_results;
 744	}
 745
 746	/**
 747	* Performs a search on an author's posts without caring about message contents. Depends on display specific params
 748	*
 749	* @param	string		$type				contains either posts or topics depending on what should be searched for
 750	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered
 751	* @param	array		&$sort_by_sql		contains SQL code for the ORDER BY part of a query
 752	* @param	string		&$sort_key			is the key of $sort_by_sql for the selected sorting
 753	* @param	string		&$sort_dir			is either a or d representing ASC and DESC
 754	* @param	string		&$sort_days			specifies the maximum amount of days a post may be old
 755	* @param	array		&$ex_fid_ary		specifies an array of forum ids which should not be searched
 756	* @param	array		&$m_approve_fid_ary	specifies an array of forum ids in which the searcher is allowed to view unapproved posts
 757	* @param	int			&$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 758	* @param	array		&$author_ary		an array of author ids
 759	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 760	* @param	int			$start				indicates the first index of the page
 761	* @param	int			$per_page			number of ids each page is supposed to contain
 762	* @return	boolean|int						total number of results
 763	*
 764	* @access	public
 765	*/
 766	function author_search($type, $firstpost_only, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
 767	{
 768		global $config, $db;
 769
 770		// No author? No posts.
 771		if (!sizeof($author_ary))
 772		{
 773			return 0;
 774		}
 775
 776		// generate a search_key from all the options to identify the results
 777		$search_key = md5(implode('#', array(
 778			'',
 779			$type,
 780			($firstpost_only) ? 'firstpost' : '',
 781			'',
 782			'',
 783			$sort_days,
 784			$sort_key,
 785			$topic_id,
 786			implode(',', $ex_fid_ary),
 787			implode(',', $m_approve_fid_ary),
 788			implode(',', $author_ary)
 789		)));
 790
 791		// try reading the results from cache
 792		$total_results = 0;
 793		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 794		{
 795			return $total_results;
 796		}
 797
 798		$id_ary = array();
 799
 800		// Create some display specific sql strings
 801		$sql_author		= $db->sql_in_set('p.poster_id', $author_ary);
 802		$sql_fora		= (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
 803		$sql_time		= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
 804		$sql_topic_id	= ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
 805		$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
 806
 807		// Build sql strings for sorting
 808		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 809		$sql_sort_table = $sql_sort_join = '';
 810		switch ($sql_sort[0])
 811		{
 812			case 'u':
 813				$sql_sort_table	= USERS_TABLE . ' u, ';
 814				$sql_sort_join	= ' AND u.user_id = p.poster_id ';
 815			break;
 816
 817			case 't':
 818				$sql_sort_table	= ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
 819				$sql_sort_join	= ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
 820			break;
 821
 822			case 'f':
 823				$sql_sort_table	= FORUMS_TABLE . ' f, ';
 824				$sql_sort_join	= ' AND f.forum_id = p.forum_id ';
 825			break;
 826		}
 827
 828		if (!sizeof($m_approve_fid_ary))
 829		{
 830			$m_approve_fid_sql = ' AND p.post_approved = 1';
 831		}
 832		else if ($m_approve_fid_ary == array(-1))
 833		{
 834			$m_approve_fid_sql = '';
 835		}
 836		else
 837		{
 838			$m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
 839		}
 840
 841		$select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
 842		$is_mysql = false;
 843
 844		// If the cache was completely empty count the results
 845		if (!$total_results)
 846		{
 847			switch ($db->sql_layer)
 848			{
 849				case 'mysql4':
 850				case 'mysqli':
 851					$select = 'SQL_CALC_FOUND_ROWS ' . $select;
 852					$is_mysql = true;
 853				break;
 854
 855				default:
 856					if ($type == 'posts')
 857					{
 858						$sql = 'SELECT COUNT(p.post_id) as total_results
 859							FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
 860							WHERE $sql_author
 861								$sql_topic_id
 862								$sql_firstpost
 863								$m_approve_fid_sql
 864								$sql_fora
 865								$sql_time";
 866					}
 867					else
 868					{
 869						if ($db->sql_layer == 'sqlite')
 870						{
 871							$sql = 'SELECT COUNT(topic_id) as total_results
 872								FROM (SELECT DISTINCT t.topic_id';
 873						}
 874						else
 875						{
 876							$sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
 877						}
 878
 879						$sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
 880							WHERE $sql_author
 881								$sql_topic_id
 882								$sql_firstpost
 883								$m_approve_fid_sql
 884								$sql_fora
 885								AND t.topic_id = p.topic_id
 886								$sql_time" . (($db->sql_layer == 'sqlite') ? ')' : '');
 887					}
 888					$result = $db->sql_query($sql);
 889
 890					$total_results = (int) $db->sql_fetchfield('total_results');
 891					$db->sql_freeresult($result);
 892
 893					if (!$total_results)
 894					{
 895						return false;
 896					}
 897				break;
 898			}
 899		}
 900
 901		// Build the query for really selecting the post_ids
 902		if ($type == 'posts')
 903		{
 904			$sql = "SELECT $select
 905				FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
 906				WHERE $sql_author
 907					$sql_topic_id
 908					$sql_firstpost
 909					$m_approve_fid_sql
 910					$sql_fora
 911					$sql_sort_join
 912					$sql_time
 913				ORDER BY $sql_sort";
 914			$field = 'post_id';
 915		}
 916		else
 917		{
 918			$sql = "SELECT $select
 919				FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
 920				WHERE $sql_author
 921					$sql_topic_id
 922					$sql_firstpost
 923					$m_approve_fid_sql
 924					$sql_fora
 925					AND t.topic_id = p.topic_id
 926					$sql_sort_join
 927					$sql_time
 928				GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
 929				ORDER BY ' . $sql_sort;
 930			$field = 'topic_id';
 931		}
 932
 933		// Only read one block of posts from the db and then cache it
 934		$result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
 935
 936		while ($row = $db->sql_fetchrow($result))
 937		{
 938			$id_ary[] = $row[$field];
 939		}
 940		$db->sql_freeresult($result);
 941
 942		if (!$total_results && $is_mysql)
 943		{
 944			$sql = 'SELECT FOUND_ROWS() as total_results';
 945			$result = $db->sql_query($sql);
 946			$total_results = (int) $db->sql_fetchfield('total_results');
 947			$db->sql_freeresult($result);
 948
 949			if (!$total_results)
 950			{
 951				return false;
 952			}
 953		}
 954
 955		if (sizeof($id_ary))
 956		{
 957			$this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
 958			$id_ary = array_slice($id_ary, 0, $per_page);
 959
 960			return $total_results;
 961		}
 962		return false;
 963	}
 964
 965	/**
 966	* Split a text into words of a given length
 967	*
 968	* The text is converted to UTF-8, cleaned up, and split. Then, words that
 969	* conform to the defined length range are returned in an array.
 970	*
 971	* NOTE: duplicates are NOT removed from the return array
 972	*
 973	* @param	string	$text	Text to split, encoded in UTF-8
 974	* @return	array			Array of UTF-8 words
 975	*
 976	* @access	private
 977	*/
 978	function split_message($text)
 979	{
 980		global $phpbb_root_path, $phpEx, $user;
 981
 982		$match = $words = array();
 983
 984		/**
 985		* Taken from the original code
 986		*/
 987		// Do not index code
 988		$match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
 989		// BBcode
 990		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
 991
 992		$min = $this->word_length['min'];
 993		$max = $this->word_length['max'];
 994
 995		$isset_min = $min - 1;
 996
 997		/**
 998		* Clean up the string, remove HTML tags, remove BBCodes
 999		*/
1000		$word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
1001
1002		while (strlen($word))
1003		{
1004			if (strlen($word) > 255 || strlen($word) <= $isset_min)
1005			{
1006				/**
1007				* Words longer than 255 bytes are ignored. This will have to be
1008				* changed whenever we change the length of search_wordlist.word_text
1009				*
1010				* Words shorter than $isset_min bytes are ignored, too
1011				*/
1012				$word = strtok(' ');
1013				continue;
1014			}
1015
1016			$len = utf8_strlen($word);
1017
1018			/**
1019			* Test whether the word is too short to be indexed.
1020			*
1021			* Note that this limit does NOT apply to CJK and Hangul
1022			*/
1023			if ($len < $min)
1024			{
1025				/**
1026				* Note: this could be optimized. If the codepoint is lower than Hangul's range
1027				* we know that it will also be lower than CJK ranges
1028				*/
1029				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
1030				 && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
1031				 && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
1032				{
1033					$word = strtok(' ');
1034					continue;
1035				}
1036			}
1037
1038			$words[] = $word;
1039			$word = strtok(' ');
1040		}
1041
1042		return $words;
1043	}
1044
1045	/**
1046	* Updates wordlist and wordmatch tables when a message is posted or changed
1047	*
1048	* @param	string	$mode		Contains the post mode: edit, post, reply, quote
1049	* @param	int		$post_id	The id of the post which is modified/created
1050	* @param	string	&$message	New or updated post content
1051	* @param	string	&$subject	New or updated post subject
1052	* @param	int		$poster_id	Post author's user id
1053	* @param	int		$forum_id	The id of the forum in which the post is located
1054	*
1055	* @access	public
1056	*/
1057	function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
1058	{
1059		global $config, $db, $user;
1060
1061		if (!$config['fulltext_native_load_upd'])
1062		{
1063			/**
1064			* The search indexer is disabled, return
1065			*/
1066			return;
1067		}
1068
1069		// Split old and new post/subject to obtain array of 'words'
1070		$split_text = $this->split_message($message);
1071		$split_title = $this->split_message($subject);
1072
1073		$cur_words = array('post' => array(), 'title' => array());
1074
1075		$words = array();
1076		if ($mode == 'edit')
1077		{
1078			$words['add']['post'] = array();
1079			$words['add']['title'] = array();
1080			$words['del']['post'] = array();
1081			$words['del']['title'] = array();
1082
1083			$sql = 'SELECT w.word_id, w.word_text, m.title_match
1084				FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
1085				WHERE m.post_id = $post_id
1086					AND w.word_id = m.word_id";
1087			$result = $db->sql_query($sql);
1088
1089			while ($row = $db->sql_fetchrow($result))
1090			{
1091				$which = ($row['title_match']) ? 'title' : 'post';
1092				$cur_words[$which][$row['word_text']] = $row['word_id'];
1093			}
1094			$db->sql_freeresult($result);
1095
1096			$words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
1097			$words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
1098			$words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
1099			$words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
1100		}
1101		else
1102		{
1103			$words['add']['post'] = $split_text;
1104			$words['add']['title'] = $split_title;
1105			$words['del']['post'] = array();
1106			$words['del']['title'] = array();
1107		}
1108		unset($split_text);
1109		unset($split_title);
1110
1111		// Get unique words from the above arrays
1112		$unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
1113		
1114		// We now have unique arrays of all words to be added and removed and
1115		// individual arrays of added and removed words for text and title. What
1116		// we need to do now is add the new words (if they don't already exist)
1117		// and then add (or remove) matches between the words and this post
1118		if (sizeof($unique_add_words))
1119		{
1120			$sql = 'SELECT word_id, word_text
1121				FROM ' . SEARCH_WORDLIST_TABLE . '
1122				WHERE ' . $db->sql_in_set('word_text', $unique_add_words);
1123			$result = $db->sql_query($sql);
1124
1125			$word_ids = array();
1126			while ($row = $db->sql_fetchrow($result))
1127			{
1128				$word_ids[$row['word_text']] = $row['word_id'];
1129			}
1130			$db->sql_freeresult($result);
1131			$new_words = array_diff($unique_add_words, array_keys($word_ids));
1132
1133			$db->sql_transaction('begin');
1134			if (sizeof($new_words))
1135			{
1136				$sql_ary = array();
1137
1138				foreach ($new_words as $word)
1139				{
1140					$sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
1141				}
1142				$db->sql_return_on_error(true);
1143				$db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
1144				$db->sql_return_on_error(false);
1145			}
1146			unset($new_words, $sql_ary);
1147		}
1148		else
1149		{
1150			$db->sql_transaction('begin');
1151		}
1152
1153		// now update the search match table, remove links to removed words and add links to new words
1154		foreach ($words['del'] as $word_in => $word_ary)
1155		{
1156			$title_match = ($word_in == 'title') ? 1 : 0;
1157
1158			if (sizeof($word_ary))
1159			{
1160				$sql_in = array();
1161				foreach ($word_ary as $word)
1162				{
1163					$sql_in[] = $cur_words[$word_in][$word];
1164				}
1165
1166				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1167					WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1168						AND post_id = ' . intval($post_id) . "
1169						AND title_match = $title_match";
1170				$db->sql_query($sql);
1171
1172				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1173					SET word_count = word_count - 1
1174					WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1175						AND word_count > 0';
1176				$db->sql_query($sql);
1177
1178				unset($sql_in);
1179			}
1180		}
1181
1182		$db->sql_return_on_error(true);
1183		foreach ($words['add'] as $word_in => $word_ary)
1184		{
1185			$title_match = ($word_in == 'title') ? 1 : 0;
1186
1187			if (sizeof($word_ary))
1188			{
1189				$sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
1190					SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
1191					FROM ' . SEARCH_WORDLIST_TABLE . '
1192					WHERE ' . $db->sql_in_set('word_text', $word_ary);
1193				$db->sql_query($sql);
1194
1195				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1196					SET word_count = word_count + 1
1197					WHERE ' . $db->sql_in_set('word_text', $word_ary);
1198				$db->sql_query($sql);
1199			}
1200		}
1201		$db->sql_return_on_error(false);
1202
1203		$db->sql_transaction('commit');
1204
1205		// destroy cached search results containing any of the words removed or added
1206		$this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
1207
1208		unset($unique_add_words);
1209		unset($words);
1210		unset($cur_words);
1211	}
1212
1213	/**
1214	* Removes entries from the wordmatch table for the specified post_ids
1215	*/
1216	function index_remove($post_ids, $author_ids, $forum_ids)
1217	{
1218		global $db;
1219
1220		if (sizeof($post_ids))
1221		{
1222			$sql = 'SELECT w.word_id, w.word_text, m.title_match
1223				FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
1224				WHERE ' . $db->sql_in_set('m.post_id', $post_ids) . '
1225					AND w.word_id = m.word_id';
1226			$result = $db->sql_query($sql);
1227
1228			$message_word_ids = $title_word_ids = $word_texts = array();
1229			while ($row = $db->sql_fetchrow($result))
1230			{
1231				if ($row['title_match'])
1232				{
1233					$title_word_ids[] = $row['word_id'];
1234				}
1235				else
1236				{
1237					$message_word_ids[] = $row['word_id'];
1238				}
1239				$word_texts[] = $row['word_text'];
1240			}
1241			$db->sql_freeresult($result);
1242
1243			if (sizeof($title_word_ids))
1244			{
1245				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1246					SET word_count = word_count - 1
1247					WHERE ' . $db->sql_in_set('word_id', $title_word_ids) . '
1248						AND word_count > 0';
1249				$db->sql_query($sql);
1250			}
1251
1252			if (sizeof($message_word_ids))
1253			{
1254				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1255					SET word_count = word_count - 1
1256					WHERE ' . $db->sql_in_set('word_id', $message_word_ids) . '
1257						AND word_count > 0';
1258				$db->sql_query($sql);
1259			}
1260
1261			unset($title_word_ids);
1262			unset($message_word_ids);
1263
1264			$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1265				WHERE ' . $db->sql_in_set('post_id', $post_ids);
1266			$db->sql_query($sql);
1267		}
1268
1269		$this->destroy_cache(array_unique($word_texts), $author_ids);
1270	}
1271
1272	/**
1273	* Tidy up indexes: Tag 'common words' and remove
1274	* words no longer referenced in the match table
1275	*/
1276	function tidy()
1277	{
1278		global $db, $config;
1279
1280		// Is the fulltext indexer disabled? If yes then we need not
1281		// carry on ... it's okay ... I know when I'm not wanted boo hoo
1282		if (!$config['fulltext_native_load_upd'])
1283		{
1284			set_config('search_last_gc', time(), true);
1285			return;
1286		}
1287
1288		$destroy_cache_words = array();
1289
1290		// Remove common words
1291		if ($config['num_posts'] >= 100 && $config['fulltext_native_common_thres'])
1292		{
1293			$common_threshold = ((double) $config['fulltext_native_common_thres']) / 100.0;
1294			// First, get the IDs of common words
1295			$sql = 'SELECT word_id, word_text
1296				FROM ' . SEARCH_WORDLIST_TABLE . '
1297				WHERE word_count > ' . floor($config['num_posts'] * $common_threshold) . '
1298					OR word_common = 1';
1299			$result = $db->sql_query($sql);
1300
1301			$sql_in = array();
1302			while ($row = $db->sql_fetchrow($result))
1303			{
1304				$sql_in[] = $row['word_id'];
1305				$destroy_cache_words[] = $row['word_text'];
1306			}
1307			$db->sql_freeresult($result);
1308
1309			if (sizeof($sql_in))
1310			{
1311				// Flag the words
1312				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1313					SET word_common = 1
1314					WHERE ' . $db->sql_in_set('word_id', $sql_in);
1315				$db->sql_query($sql);
1316
1317				// by setting search_last_gc to the new time here we make sure that if a user reloads because the
1318				// following query takes too long, he won't run into it again
1319				set_config('search_last_gc', time(), true);
1320
1321				// Delete the matches
1322				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1323					WHERE ' . $db->sql_in_set('word_id', $sql_in);
1324				$db->sql_query($sql);
1325			}
1326			unset($sql_in);
1327		}
1328
1329		if (sizeof($destroy_cache_words))
1330		{
1331			// destroy cached search results containing any of the words that are now common or were removed
1332			$this->destroy_cache(array_unique($destroy_cache_words));
1333		}
1334
1335		set_config('search_last_gc', time(), true);
1336	}
1337
1338	/**
1339	* Deletes all words from the index
1340	*/
1341	function delete_index($acp_module, $u_action)
1342	{
1343		global $db;
1344
1345		switch ($db->sql_layer)
1346		{
1347			case 'sqlite':
1348			case 'firebird':
1349				$db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
1350				$db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);
1351				$db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE);
1352			break;
1353
1354			default:
1355				$db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE);
1356				$db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE);
1357				$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
1358			break;
1359		}
1360	}
1361
1362	/**
1363	* Returns true if both FULLTEXT indexes exist
1364	*/
1365	function index_created()
1366	{
1367		if (!sizeof($this->stats))
1368		{
1369			$this->get_stats();
1370		}
1371
1372		return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
1373	}
1374
1375	/**
1376	* Returns an associative array containing information about the indexes
1377	*/
1378	function index_stats()
1379	{
1380		global $user;
1381
1382		if (!sizeof($this->stats))
1383		{
1384			$this->get_stats();
1385		}
1386
1387		return array(
1388			$user->lang['TOTAL_WORDS']		=> $this->stats['total_words'],
1389			$user->lang['TOTAL_MATCHES']	=> $this->stats['total_matches']);
1390	}
1391
1392	function get_stats()
1393	{
1394		global $db;
1395
1396		$sql = 'SELECT COUNT(*) as total_words
1397			FROM ' . SEARCH_WORDLIST_TABLE;
1398		$result = $db->sql_query($sql);
1399		$this->stats['total_words'] = (int) $db->sql_fetchfield('total_words');
1400		$db->sql_freeresult($result);
1401
1402		$sql = 'SELECT COUNT(*) as total_matches
1403			FROM ' . SEARCH_WORDMATCH_TABLE;
1404		$result = $db->sql_query($sql);
1405		$this->stats['total_matches'] = (int) $db->sql_fetchfield('total_matches');
1406		$db->sql_freeresult($result);
1407	}
1408
1409	/**
1410	* Clean up a text to remove non-alphanumeric characters
1411	*
1412	* This method receives a UTF-8 string, normalizes and validates it, replaces all
1413	* non-alphanumeric characters with strings then returns the result.
1414	*
1415	* Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
1416	*
1417	* @param	string	$text			Text to split, in UTF-8 (not normalized or sanitized)
1418	* @param	string	$allowed_chars	String of special chars to allow
1419	* @param	string	$encoding		Text encoding
1420	* @return	string					Cleaned up text, only alphanumeric chars are left
1421	*
1422	* @todo normalizer::cleanup being able to be used?
1423	*/
1424	function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
1425	{
1426		global $phpbb_root_path, $phpEx;
1427		static $conv = array(), $conv_loaded = array();
1428		$words = $allow = array();
1429
1430		// Convert the text to UTF-8
1431		$encoding = strtolower($encoding);
1432		if ($encoding != 'utf-8')
1433		{
1434			$text = utf8_recode($text, $encoding);
1435		}
1436
1437		$utf_len_mask = array(
1438			"\xC0"	=>	2,
1439			"\xD0"	=>	2,
1440			"\xE0"	=>	3,
1441			"\xF0"	=>	4
1442		);
1443
1444		/**
1445		* Replace HTML entities and NCRs
1446		*/
1447		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
1448
1449		/**
1450		* Load the UTF-8 normalizer
1451		*
1452		* If we use it more widely, an instance of that class should be held in a
1453		* a global variable instead
1454		*/
1455		utf_normalizer::nfc($text);
1456
1457		/**
1458		* The first thing we do is:
1459		*
1460		* - convert ASCII-7 letters to lowercase
1461		* - remove the ASCII-7 non-alpha characters
1462		* - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
1463		*   0xC1 and 0xF5-0xFF
1464		*
1465		* @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
1466		*/
1467		$sb_match	= "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
1468		$sb_replace	= 'istcpamelrdojbnhfgvwuqkyxz                                                                              ';
1469
1470		/**
1471		* This is the list of legal ASCII chars, it is automatically extended
1472		* with ASCII chars from $allowed_chars
1473		*/
1474		$legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
1475
1476		/**
1477		* Prepare an array containing the extra chars to allow
1478		*/
1479		if (isset($allowed_chars[0]))
1480		{
1481			$pos = 0;
1482			$len = strlen($allowed_chars);
1483			do
1484			{
1485				$c = $allowed_chars[$pos];
1486
1487				if ($c < "\x80")
1488				{
1489					/**
1490					* ASCII char
1491					*/
1492					$sb_pos = strpos($sb_match, $c);
1493					if (is_int($sb_pos))
1494					{
1495						/**
1496						* Remove the char from $sb_match and its corresponding
1497						* replacement in $sb_replace
1498						*/
1499						$sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
1500						$sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
1501						$legal_ascii .= $c;
1502					}
1503
1504					++$pos;
1505				}
1506				else
1507				{
1508					/**
1509					* UTF-8 char
1510					*/
1511					$utf_len = $utf_len_mask[$c & "\xF0"];
1512					$allow[substr($allowed_chars, $pos, $utf_len)] = 1;
1513					$pos += $utf_len;
1514				}
1515			}
1516			while ($pos < $len);
1517		}
1518
1519		$text = strtr($text, $sb_match, $sb_replace);
1520		$ret = '';
1521
1522		$pos = 0;
1523		$len = strlen($text);
1524
1525		do
1526		{
1527			/**
1528			* Do all consecutive ASCII chars at once
1529			*/
1530			if ($spn = strspn($text, $legal_ascii, $pos))
1531			{
1532				$ret .= substr($text, $pos, $spn);
1533				$pos += $spn;
1534			}
1535
1536			if ($pos >= $len)
1537			{
1538				return $ret;
1539			}
1540
1541			/**
1542			* Capture the UTF char
1543			*/
1544			$utf_len = $utf_len_mask[$text[$pos] & "\xF0"];
1545			$utf_char = substr($text, $pos, $utf_len);
1546			$pos += $utf_len;
1547
1548			if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
1549			 || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
1550			 || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))
1551			{
1552				/**
1553				* All characters within these ranges are valid
1554				*
1555				* We separate them with a space in order to index each character
1556				* individually
1557				*/
1558				$ret .= ' ' . $utf_char . ' ';
1559				continue;
1560			}
1561
1562			if (isset($allow[$utf_char]))
1563			{
1564				/**
1565				* The char is explicitly allowed
1566				*/
1567				$ret .= $utf_char;
1568				continue;
1569			}
1570
1571			if (isset($conv[$utf_char]))
1572			{
1573				/**
1574				* The char is mapped to something, maybe to itself actually
1575				*/
1576				$ret .= $conv[$utf_char];
1577				continue;
1578			}
1579
1580			/**
1581			* The char isn't mapped, but did we load its conversion table?
1582			*
1583			* The search indexer table is split into blocks. The block number of
1584			* each char is equal to its codepoint right-shifted for 11 bits. It
1585			* means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
1586			* 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
1587			* all UTF chars encoded in 2 bytes are in the same first block.
1588			*/
1589			if (isset($utf_char[2]))
1590			{
1591				if (isset($utf_char[3]))
1592				{
1593					/**
1594					* 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
1595					* 0000 0111 0011 1111 0010 0000
1596					*/
1597					$idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5);
1598				}
1599				else
1600				{
1601					/**
1602					* 1110 nnnn 10nx xxxx 10xx xxxx
1603					* 0000 0111 0010 0000
1604					*/
1605					$idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5);
1606				}
1607			}
1608			else
1609			{
1610				/**
1611				* 110x xxxx 10xx xxxx
1612				* 0000 0000 0000 0000
1613				*/
1614				$idx = 0;
1615			}
1616
1617			/**
1618			* Check if the required conv table has been loaded already
1619			*/
1620			if (!isset($conv_loaded[$idx]))
1621			{
1622				$conv_loaded[$idx] = 1;
1623				$file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx;
1624
1625				if (file_exists($file))
1626				{
1627					$conv += include($file);
1628				}
1629			}
1630
1631			if (isset($conv[$utf_char]))
1632			{
1633				$ret .= $conv[$utf_char];
1634			}
1635			else
1636			{
1637				/**
1638				* We add an entry to the conversion table so that we
1639				* don't have to convert to codepoint and perform the checks
1640				* that are above this block
1641				*/
1642				$conv[$utf_char] = ' ';
1643				$ret .= ' ';
1644			}
1645		}
1646		while (1);
1647
1648		return $ret;
1649	}
1650
1651	/**
1652	* Returns a list of options for the ACP to display
1653	*/
1654	function acp()
1655	{
1656		global $user, $config;
1657
1658
1659		/**
1660		* if we need any options, copied from fulltext_native for now, will have to be adjusted or removed
1661		*/
1662
1663		$tpl = '
1664		<dl>
1665			<dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt>
1666			<dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['NO'] . '</label></dd>
1667		</dl>
1668		<dl>
1669			<dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1670			<dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd>
1671		</dl>
1672		<dl>
1673			<dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1674			<dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd>
1675		</dl>
1676		<dl>
1677			<dt><label for="fulltext_native_common_thres">' . $user->lang['COMMON_WORD_THRESHOLD'] . ':</label><br /><span>' . $user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>
1678			<dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $config['fulltext_native_common_thres'] . '" /> %</dd>
1679		</dl>
1680		';
1681
1682		// These are fields required in the config table
1683		return array(
1684			'tpl'		=> $tpl,
1685			'config'	=> array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100')
1686		);
1687	}
1688}
1689
1690?>