PageRenderTime 56ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/phpBB/phpbb/search/fulltext_sphinx.php

http://github.com/phpbb/phpbb
PHP | 1058 lines | 651 code | 117 blank | 290 comment | 67 complexity | fed641d7e1b8758934ca5bbd89d1d33a MD5 | raw file
Possible License(s): GPL-3.0, AGPL-1.0
  1. <?php
  2. /**
  3. *
  4. * This file is part of the phpBB Forum Software package.
  5. *
  6. * @copyright (c) phpBB Limited <https://www.phpbb.com>
  7. * @license GNU General Public License, version 2 (GPL-2.0)
  8. *
  9. * For full copyright and license information, please see
  10. * the docs/CREDITS.txt file.
  11. *
  12. */
  13. namespace phpbb\search;
  14. define('SPHINX_MAX_MATCHES', 20000);
  15. define('SPHINX_CONNECT_RETRIES', 3);
  16. define('SPHINX_CONNECT_WAIT_TIME', 300);
  17. /**
  18. * Fulltext search based on the sphinx search daemon
  19. */
  20. class fulltext_sphinx
  21. {
  22. /**
  23. * Associative array holding index stats
  24. * @var array
  25. */
  26. protected $stats = array();
  27. /**
  28. * Holds the words entered by user, obtained by splitting the entered query on whitespace
  29. * @var array
  30. */
  31. protected $split_words = array();
  32. /**
  33. * Holds unique sphinx id
  34. * @var string
  35. */
  36. protected $id;
  37. /**
  38. * Stores the names of both main and delta sphinx indexes
  39. * separated by a semicolon
  40. * @var string
  41. */
  42. protected $indexes;
  43. /**
  44. * Sphinx searchd client object
  45. * @var SphinxClient
  46. */
  47. protected $sphinx;
  48. /**
  49. * Relative path to board root
  50. * @var string
  51. */
  52. protected $phpbb_root_path;
  53. /**
  54. * PHP Extension
  55. * @var string
  56. */
  57. protected $php_ext;
  58. /**
  59. * Auth object
  60. * @var \phpbb\auth\auth
  61. */
  62. protected $auth;
  63. /**
  64. * Config object
  65. * @var \phpbb\config\config
  66. */
  67. protected $config;
  68. /**
  69. * Database connection
  70. * @var \phpbb\db\driver\driver_interface
  71. */
  72. protected $db;
  73. /**
  74. * Database Tools object
  75. * @var \phpbb\db\tools\tools_interface
  76. */
  77. protected $db_tools;
  78. /**
  79. * Stores the database type if supported by sphinx
  80. * @var string
  81. */
  82. protected $dbtype;
  83. /**
  84. * phpBB event dispatcher object
  85. * @var \phpbb\event\dispatcher_interface
  86. */
  87. protected $phpbb_dispatcher;
  88. /**
  89. * User object
  90. * @var \phpbb\user
  91. */
  92. protected $user;
  93. /**
  94. * Stores the generated content of the sphinx config file
  95. * @var string
  96. */
  97. protected $config_file_data = '';
  98. /**
  99. * Contains tidied search query.
  100. * Operators are prefixed in search query and common words excluded
  101. * @var string
  102. */
  103. protected $search_query;
  104. /**
  105. * Constructor
  106. * Creates a new \phpbb\search\fulltext_postgres, which is used as a search backend
  107. *
  108. * @param string|bool $error Any error that occurs is passed on through this reference variable otherwise false
  109. * @param string $phpbb_root_path Relative path to phpBB root
  110. * @param string $phpEx PHP file extension
  111. * @param \phpbb\auth\auth $auth Auth object
  112. * @param \phpbb\config\config $config Config object
  113. * @param \phpbb\db\driver\driver_interface Database object
  114. * @param \phpbb\user $user User object
  115. * @param \phpbb\event\dispatcher_interface $phpbb_dispatcher Event dispatcher object
  116. */
  117. public function __construct(&$error, $phpbb_root_path, $phpEx, $auth, $config, $db, $user, $phpbb_dispatcher)
  118. {
  119. $this->phpbb_root_path = $phpbb_root_path;
  120. $this->php_ext = $phpEx;
  121. $this->config = $config;
  122. $this->phpbb_dispatcher = $phpbb_dispatcher;
  123. $this->user = $user;
  124. $this->db = $db;
  125. $this->auth = $auth;
  126. // Initialize \phpbb\db\tools\tools object
  127. global $phpbb_container; // TODO inject into object
  128. $this->db_tools = $phpbb_container->get('dbal.tools');
  129. if (!$this->config['fulltext_sphinx_id'])
  130. {
  131. $this->config->set('fulltext_sphinx_id', unique_id());
  132. }
  133. $this->id = $this->config['fulltext_sphinx_id'];
  134. $this->indexes = 'index_phpbb_' . $this->id . '_delta;index_phpbb_' . $this->id . '_main';
  135. if (!class_exists('SphinxClient'))
  136. {
  137. require($this->phpbb_root_path . 'includes/sphinxapi.' . $this->php_ext);
  138. }
  139. // Initialize sphinx client
  140. $this->sphinx = new \SphinxClient();
  141. $this->sphinx->SetServer(($this->config['fulltext_sphinx_host'] ? $this->config['fulltext_sphinx_host'] : 'localhost'), ($this->config['fulltext_sphinx_port'] ? (int) $this->config['fulltext_sphinx_port'] : 9312));
  142. $error = false;
  143. }
  144. /**
  145. * Returns the name of this search backend to be displayed to administrators
  146. *
  147. * @return string Name
  148. */
  149. public function get_name()
  150. {
  151. return 'Sphinx Fulltext';
  152. }
  153. /**
  154. * Returns the search_query
  155. *
  156. * @return string search query
  157. */
  158. public function get_search_query()
  159. {
  160. return $this->search_query;
  161. }
  162. /**
  163. * Returns false as there is no word_len array
  164. *
  165. * @return false
  166. */
  167. public function get_word_length()
  168. {
  169. return false;
  170. }
  171. /**
  172. * Returns an empty array as there are no common_words
  173. *
  174. * @return array common words that are ignored by search backend
  175. */
  176. public function get_common_words()
  177. {
  178. return array();
  179. }
  180. /**
  181. * Checks permissions and paths, if everything is correct it generates the config file
  182. *
  183. * @return string|bool Language key of the error/incompatibility encountered, or false if successful
  184. */
  185. public function init()
  186. {
  187. if ($this->db->get_sql_layer() != 'mysqli' && $this->db->get_sql_layer() != 'postgres')
  188. {
  189. return $this->user->lang['FULLTEXT_SPHINX_WRONG_DATABASE'];
  190. }
  191. // Move delta to main index each hour
  192. $this->config->set('search_gc', 3600);
  193. return false;
  194. }
  195. /**
  196. * Generates content of sphinx.conf
  197. *
  198. * @return bool True if sphinx.conf content is correctly generated, false otherwise
  199. */
  200. protected function config_generate()
  201. {
  202. // Check if Database is supported by Sphinx
  203. if ($this->db->get_sql_layer() == 'mysqli')
  204. {
  205. $this->dbtype = 'mysql';
  206. }
  207. else if ($this->db->get_sql_layer() == 'postgres')
  208. {
  209. $this->dbtype = 'pgsql';
  210. }
  211. else
  212. {
  213. $this->config_file_data = $this->user->lang('FULLTEXT_SPHINX_WRONG_DATABASE');
  214. return false;
  215. }
  216. // Check if directory paths have been filled
  217. if (!$this->config['fulltext_sphinx_data_path'])
  218. {
  219. $this->config_file_data = $this->user->lang('FULLTEXT_SPHINX_NO_CONFIG_DATA');
  220. return false;
  221. }
  222. include($this->phpbb_root_path . 'config.' . $this->php_ext);
  223. /* Now that we're sure everything was entered correctly,
  224. generate a config for the index. We use a config value
  225. fulltext_sphinx_id for this, as it should be unique. */
  226. $config_object = new \phpbb\search\sphinx\config($this->config_file_data);
  227. $config_data = array(
  228. 'source source_phpbb_' . $this->id . '_main' => array(
  229. array('type', $this->dbtype . ' # mysql or pgsql'),
  230. // This config value sql_host needs to be changed incase sphinx and sql are on different servers
  231. array('sql_host', $dbhost . ' # SQL server host sphinx connects to'),
  232. array('sql_user', '[dbuser]'),
  233. array('sql_pass', '[dbpassword]'),
  234. array('sql_db', $dbname),
  235. array('sql_port', $dbport . ' # optional, default is 3306 for mysql and 5432 for pgsql'),
  236. array('sql_query_pre', 'SET NAMES \'utf8\''),
  237. array('sql_query_pre', 'UPDATE ' . SPHINX_TABLE . ' SET max_doc_id = (SELECT MAX(post_id) FROM ' . POSTS_TABLE . ') WHERE counter_id = 1'),
  238. array('sql_query_range', 'SELECT MIN(post_id), MAX(post_id) FROM ' . POSTS_TABLE . ''),
  239. array('sql_range_step', '5000'),
  240. array('sql_query', 'SELECT
  241. p.post_id AS id,
  242. p.forum_id,
  243. p.topic_id,
  244. p.poster_id,
  245. p.post_visibility,
  246. CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post,
  247. p.post_time,
  248. p.post_subject,
  249. p.post_subject as title,
  250. p.post_text as data,
  251. t.topic_last_post_time,
  252. 0 as deleted
  253. FROM ' . POSTS_TABLE . ' p, ' . TOPICS_TABLE . ' t
  254. WHERE
  255. p.topic_id = t.topic_id
  256. AND p.post_id >= $start AND p.post_id <= $end'),
  257. array('sql_query_post', ''),
  258. array('sql_query_post_index', 'UPDATE ' . SPHINX_TABLE . ' SET max_doc_id = $maxid WHERE counter_id = 1'),
  259. array('sql_attr_uint', 'forum_id'),
  260. array('sql_attr_uint', 'topic_id'),
  261. array('sql_attr_uint', 'poster_id'),
  262. array('sql_attr_uint', 'post_visibility'),
  263. array('sql_attr_bool', 'topic_first_post'),
  264. array('sql_attr_bool', 'deleted'),
  265. array('sql_attr_timestamp', 'post_time'),
  266. array('sql_attr_timestamp', 'topic_last_post_time'),
  267. array('sql_attr_string', 'post_subject'),
  268. ),
  269. 'source source_phpbb_' . $this->id . '_delta : source_phpbb_' . $this->id . '_main' => array(
  270. array('sql_query_pre', 'SET NAMES \'utf8\''),
  271. array('sql_query_range', ''),
  272. array('sql_range_step', ''),
  273. array('sql_query', 'SELECT
  274. p.post_id AS id,
  275. p.forum_id,
  276. p.topic_id,
  277. p.poster_id,
  278. p.post_visibility,
  279. CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post,
  280. p.post_time,
  281. p.post_subject,
  282. p.post_subject as title,
  283. p.post_text as data,
  284. t.topic_last_post_time,
  285. 0 as deleted
  286. FROM ' . POSTS_TABLE . ' p, ' . TOPICS_TABLE . ' t
  287. WHERE
  288. p.topic_id = t.topic_id
  289. AND p.post_id >= ( SELECT max_doc_id FROM ' . SPHINX_TABLE . ' WHERE counter_id=1 )'),
  290. array('sql_query_post_index', ''),
  291. ),
  292. 'index index_phpbb_' . $this->id . '_main' => array(
  293. array('path', $this->config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_main'),
  294. array('source', 'source_phpbb_' . $this->id . '_main'),
  295. array('docinfo', 'extern'),
  296. array('morphology', 'none'),
  297. array('stopwords', ''),
  298. array('wordforms', ' # optional, specify path to wordforms file. See ./docs/sphinx_wordforms.txt for example'),
  299. array('exceptions', ' # optional, specify path to exceptions file. See ./docs/sphinx_exceptions.txt for example'),
  300. array('min_word_len', '2'),
  301. array('charset_table', 'U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z, A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101, U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109, U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F, U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117, U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D, U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135, U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C, U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144, U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B, U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153, U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159, U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161, U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167, U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F, U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175, U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C, U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+4E00..U+9FFF'),
  302. array('ignore_chars', 'U+0027, U+002C'),
  303. array('min_prefix_len', '3 # Minimum number of characters for wildcard searches by prefix (min 1). Default is 3. If specified, set min_infix_len to 0'),
  304. array('min_infix_len', '0 # Minimum number of characters for wildcard searches by infix (min 2). If specified, set min_prefix_len to 0'),
  305. array('html_strip', '1'),
  306. array('index_exact_words', '0 # Set to 1 to enable exact search operator. Requires wordforms or morphology'),
  307. array('blend_chars', 'U+23, U+24, U+25, U+26, U+40'),
  308. ),
  309. 'index index_phpbb_' . $this->id . '_delta : index_phpbb_' . $this->id . '_main' => array(
  310. array('path', $this->config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_delta'),
  311. array('source', 'source_phpbb_' . $this->id . '_delta'),
  312. ),
  313. 'indexer' => array(
  314. array('mem_limit', $this->config['fulltext_sphinx_indexer_mem_limit'] . 'M'),
  315. ),
  316. 'searchd' => array(
  317. array('listen' , ($this->config['fulltext_sphinx_host'] ? $this->config['fulltext_sphinx_host'] : 'localhost') . ':' . ($this->config['fulltext_sphinx_port'] ? $this->config['fulltext_sphinx_port'] : '9312')),
  318. array('log', $this->config['fulltext_sphinx_data_path'] . 'log/searchd.log'),
  319. array('query_log', $this->config['fulltext_sphinx_data_path'] . 'log/sphinx-query.log'),
  320. array('read_timeout', '5'),
  321. array('max_children', '30'),
  322. array('pid_file', $this->config['fulltext_sphinx_data_path'] . 'searchd.pid'),
  323. array('binlog_path', $this->config['fulltext_sphinx_data_path']),
  324. ),
  325. );
  326. $non_unique = array('sql_query_pre' => true, 'sql_attr_uint' => true, 'sql_attr_timestamp' => true, 'sql_attr_str2ordinal' => true, 'sql_attr_bool' => true);
  327. $delete = array('sql_group_column' => true, 'sql_date_column' => true, 'sql_str2ordinal_column' => true);
  328. /**
  329. * Allow adding/changing the Sphinx configuration data
  330. *
  331. * @event core.search_sphinx_modify_config_data
  332. * @var array config_data Array with the Sphinx configuration data
  333. * @var array non_unique Array with the Sphinx non-unique variables to delete
  334. * @var array delete Array with the Sphinx variables to delete
  335. * @since 3.1.7-RC1
  336. */
  337. $vars = array(
  338. 'config_data',
  339. 'non_unique',
  340. 'delete',
  341. );
  342. extract($this->phpbb_dispatcher->trigger_event('core.search_sphinx_modify_config_data', compact($vars)));
  343. foreach ($config_data as $section_name => $section_data)
  344. {
  345. $section = $config_object->get_section_by_name($section_name);
  346. if (!$section)
  347. {
  348. $section = $config_object->add_section($section_name);
  349. }
  350. foreach ($delete as $key => $void)
  351. {
  352. $section->delete_variables_by_name($key);
  353. }
  354. foreach ($non_unique as $key => $void)
  355. {
  356. $section->delete_variables_by_name($key);
  357. }
  358. foreach ($section_data as $entry)
  359. {
  360. $key = $entry[0];
  361. $value = $entry[1];
  362. if (!isset($non_unique[$key]))
  363. {
  364. $variable = $section->get_variable_by_name($key);
  365. if (!$variable)
  366. {
  367. $section->create_variable($key, $value);
  368. }
  369. else
  370. {
  371. $variable->set_value($value);
  372. }
  373. }
  374. else
  375. {
  376. $section->create_variable($key, $value);
  377. }
  378. }
  379. }
  380. $this->config_file_data = $config_object->get_data();
  381. return true;
  382. }
  383. /**
  384. * Splits keywords entered by a user into an array of words stored in $this->split_words
  385. * Stores the tidied search query in $this->search_query
  386. *
  387. * @param string $keywords Contains the keyword as entered by the user
  388. * @param string $terms is either 'all' or 'any'
  389. * @return false if no valid keywords were found and otherwise true
  390. */
  391. public function split_keywords(&$keywords, $terms)
  392. {
  393. // Keep quotes and new lines
  394. $keywords = str_replace(['&quot;', "\n"], ['"', ' '], trim($keywords));
  395. if ($terms == 'all')
  396. {
  397. // Replaces verbal operators OR and NOT with special characters | and -, unless appearing within quotation marks
  398. $match = ['#\sor\s(?=([^"]*"[^"]*")*[^"]*$)#i', '#\snot\s(?=([^"]*"[^"]*")*[^"]*$)#i'];
  399. $replace = [' | ', ' -'];
  400. $keywords = preg_replace($match, $replace, $keywords);
  401. $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED);
  402. }
  403. else
  404. {
  405. $match = ['\\', '(',')', '|', '!', '@', '~', '/', '^', '$', '=', '&amp;', '&lt;', '&gt;'];
  406. $keywords = str_replace($match, ' ', $keywords);
  407. $this->sphinx->SetMatchMode(SPH_MATCH_ANY);
  408. }
  409. if (strlen($keywords) > 0)
  410. {
  411. $this->search_query = str_replace('"', '&quot;', $keywords);
  412. return true;
  413. }
  414. return false;
  415. }
  416. /**
  417. * Cleans search query passed into Sphinx search engine, as follows:
  418. * 1. Hyphenated words are replaced with keyword search for either the exact phrase with spaces
  419. * or as a single word without spaces eg search for "know-it-all" becomes ("know it all"|"knowitall*")
  420. * 2. Words with apostrophes are contracted eg "it's" becomes "its"
  421. * 3. <, >, " and & are decoded from HTML entities.
  422. * 4. Following special characters used as search operators in Sphinx are preserved when used with correct syntax:
  423. * (a) quorum matching: "the world is a wonderful place"/3
  424. * Finds 3 of the words within the phrase. Number must be between 1 and 9.
  425. * (b) proximity search: "hello world"~10
  426. * Finds hello and world within 10 words of each other. Number can be between 1 and 99.
  427. * (c) strict word order: aaa << bbb << ccc
  428. * Finds "aaa" only where it appears before "bbb" and only where "bbb" appears before "ccc".
  429. * (d) exact match operator: if lemmatizer or stemming enabled,
  430. * search will find exact match only and ignore other grammatical forms of the same word stem.
  431. * eg. raining =cats and =dogs
  432. * will not return "raining cat and dog"
  433. * eg. ="search this exact phrase"
  434. * will not return "searched this exact phrase", "searching these exact phrases".
  435. * 5. Special characters /, ~, << and = not complying with the correct syntax
  436. * and other reserved operators are escaped and searched literally.
  437. * Special characters not explicitly listed in charset_table or blend_chars in sphinx.conf
  438. * will not be indexed and keywords containing them will be ignored by Sphinx.
  439. * By default, only $, %, & and @ characters are indexed and searchable.
  440. * String transformation is in backend only and not visible to the end user
  441. * nor reflected in the results page URL or keyword highlighting.
  442. *
  443. * @param string $search_string
  444. * @return string
  445. */
  446. public function sphinx_clean_search_string($search_string)
  447. {
  448. $from = ['@', '^', '$', '!', '&lt;', '&gt;', '&quot;', '&amp;', '\''];
  449. $to = ['\@', '\^', '\$', '\!', '<', '>', '"', '&', ''];
  450. $search_string = str_replace($from, $to, $search_string);
  451. $search_string = strrev($search_string);
  452. $search_string = preg_replace(['#\/(?!"[^"]+")#', '#~(?!"[^"]+")#'], ['/\\', '~\\'], $search_string);
  453. $search_string = strrev($search_string);
  454. $match = ['#(/|\\\\/)(?![1-9](\s|$))#', '#(~|\\\\~)(?!\d{1,2}(\s|$))#', '#((?:\p{L}|\p{N})+)-((?:\p{L}|\p{N})+)(?:-((?:\p{L}|\p{N})+))?(?:-((?:\p{L}|\p{N})+))?#i', '#<<\s*$#', '#(\S\K=|=(?=\s)|=$)#'];
  455. $replace = ['\/', '\~', '("$1 $2 $3 $4"|$1$2$3$4*)', '\<\<', '\='];
  456. $search_string = preg_replace($match, $replace, $search_string);
  457. $search_string = preg_replace('#\s+"\|#', '"|', $search_string);
  458. /**
  459. * OPTIONAL: Thousands separator stripped from numbers, eg search for '90,000' is queried as '90000'.
  460. * By default commas are stripped from search index so that '90,000' is indexed as '90000'
  461. */
  462. // $search_string = preg_replace('#[0-9]{1,3}\K,(?=[0-9]{3})#', '', $search_string);
  463. return $search_string;
  464. }
  465. /**
  466. * Performs a search on keywords depending on display specific params. You have to run split_keywords() first
  467. *
  468. * @param string $type contains either posts or topics depending on what should be searched for
  469. * @param string $fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
  470. * @param string $terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
  471. * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query
  472. * @param string $sort_key is the key of $sort_by_sql for the selected sorting
  473. * @param string $sort_dir is either a or d representing ASC and DESC
  474. * @param string $sort_days specifies the maximum amount of days a post may be old
  475. * @param array $ex_fid_ary specifies an array of forum ids which should not be searched
  476. * @param string $post_visibility specifies which types of posts the user can view in which forums
  477. * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
  478. * @param array $author_ary an array of author ids if the author should be ignored during the search the array is empty
  479. * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match
  480. * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
  481. * @param int $start indicates the first index of the page
  482. * @param int $per_page number of ids each page is supposed to contain
  483. * @return boolean|int total number of results
  484. */
  485. public function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page)
  486. {
  487. global $user, $phpbb_log;
  488. // No keywords? No posts.
  489. if (!strlen($this->search_query) && !count($author_ary))
  490. {
  491. return false;
  492. }
  493. $id_ary = array();
  494. // Sorting
  495. if ($type == 'topics')
  496. {
  497. switch ($sort_key)
  498. {
  499. case 'a':
  500. $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'poster_id ' . (($sort_dir == 'a') ? 'ASC' : 'DESC'));
  501. break;
  502. case 'f':
  503. $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'forum_id ' . (($sort_dir == 'a') ? 'ASC' : 'DESC'));
  504. break;
  505. case 'i':
  506. case 's':
  507. $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'post_subject ' . (($sort_dir == 'a') ? 'ASC' : 'DESC'));
  508. break;
  509. case 't':
  510. default:
  511. $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'topic_last_post_time ' . (($sort_dir == 'a') ? 'ASC' : 'DESC'));
  512. break;
  513. }
  514. }
  515. else
  516. {
  517. switch ($sort_key)
  518. {
  519. case 'a':
  520. $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'poster_id');
  521. break;
  522. case 'f':
  523. $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'forum_id');
  524. break;
  525. case 'i':
  526. case 's':
  527. $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'post_subject');
  528. break;
  529. case 't':
  530. default:
  531. $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'post_time');
  532. break;
  533. }
  534. }
  535. // Most narrow filters first
  536. if ($topic_id)
  537. {
  538. $this->sphinx->SetFilter('topic_id', array($topic_id));
  539. }
  540. /**
  541. * Allow modifying the Sphinx search options
  542. *
  543. * @event core.search_sphinx_keywords_modify_options
  544. * @var string type Searching type ('posts', 'topics')
  545. * @var string fields Searching fields ('titleonly', 'msgonly', 'firstpost', 'all')
  546. * @var string terms Searching terms ('all', 'any')
  547. * @var int sort_days Time, in days, of the oldest possible post to list
  548. * @var string sort_key The sort type used from the possible sort types
  549. * @var int topic_id Limit the search to this topic_id only
  550. * @var array ex_fid_ary Which forums not to search on
  551. * @var string post_visibility Post visibility data
  552. * @var array author_ary Array of user_id containing the users to filter the results to
  553. * @var string author_name The username to search on
  554. * @var object sphinx The Sphinx searchd client object
  555. * @since 3.1.7-RC1
  556. */
  557. $sphinx = $this->sphinx;
  558. $vars = array(
  559. 'type',
  560. 'fields',
  561. 'terms',
  562. 'sort_days',
  563. 'sort_key',
  564. 'topic_id',
  565. 'ex_fid_ary',
  566. 'post_visibility',
  567. 'author_ary',
  568. 'author_name',
  569. 'sphinx',
  570. );
  571. extract($this->phpbb_dispatcher->trigger_event('core.search_sphinx_keywords_modify_options', compact($vars)));
  572. $this->sphinx = $sphinx;
  573. unset($sphinx);
  574. $search_query_prefix = '';
  575. switch ($fields)
  576. {
  577. case 'titleonly':
  578. // Only search the title
  579. if ($terms == 'all')
  580. {
  581. $search_query_prefix = '@title ';
  582. }
  583. // Weight for the title
  584. $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1));
  585. // 1 is first_post, 0 is not first post
  586. $this->sphinx->SetFilter('topic_first_post', array(1));
  587. break;
  588. case 'msgonly':
  589. // Only search the body
  590. if ($terms == 'all')
  591. {
  592. $search_query_prefix = '@data ';
  593. }
  594. // Weight for the body
  595. $this->sphinx->SetFieldWeights(array("title" => 1, "data" => 5));
  596. break;
  597. case 'firstpost':
  598. // More relative weight for the title, also search the body
  599. $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1));
  600. // 1 is first_post, 0 is not first post
  601. $this->sphinx->SetFilter('topic_first_post', array(1));
  602. break;
  603. default:
  604. // More relative weight for the title, also search the body
  605. $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1));
  606. break;
  607. }
  608. if (count($author_ary))
  609. {
  610. $this->sphinx->SetFilter('poster_id', $author_ary);
  611. }
  612. // As this is not simply possible at the moment, we limit the result to approved posts.
  613. // This will make it impossible for moderators to search unapproved and softdeleted posts,
  614. // but at least it will also cause the same for normal users.
  615. $this->sphinx->SetFilter('post_visibility', array(ITEM_APPROVED));
  616. if (count($ex_fid_ary))
  617. {
  618. // All forums that a user is allowed to access
  619. $fid_ary = array_unique(array_intersect(array_keys($this->auth->acl_getf('f_read', true)), array_keys($this->auth->acl_getf('f_search', true))));
  620. // All forums that the user wants to and can search in
  621. $search_forums = array_diff($fid_ary, $ex_fid_ary);
  622. if (count($search_forums))
  623. {
  624. $this->sphinx->SetFilter('forum_id', $search_forums);
  625. }
  626. }
  627. $this->sphinx->SetFilter('deleted', array(0));
  628. $this->sphinx->SetLimits((int) $start, (int) $per_page, max(SPHINX_MAX_MATCHES, (int) $start + $per_page));
  629. $result = $this->sphinx->Query($search_query_prefix . $this->sphinx->sphinx_clean_search_string(str_replace('&quot;', '"', $this->search_query)), $this->indexes);
  630. // Could be connection to localhost:9312 failed (errno=111,
  631. // msg=Connection refused) during rotate, retry if so
  632. $retries = SPHINX_CONNECT_RETRIES;
  633. while (!$result && (strpos($this->sphinx->GetLastError(), "errno=111,") !== false) && $retries--)
  634. {
  635. usleep(SPHINX_CONNECT_WAIT_TIME);
  636. $result = $this->sphinx->Query($search_query_prefix . $this->sphinx->sphinx_clean_search_string(str_replace('&quot;', '"', $this->search_query)), $this->indexes);
  637. }
  638. if ($this->sphinx->GetLastError())
  639. {
  640. $phpbb_log->add('critical', $user->data['user_id'], $user->ip, 'LOG_SPHINX_ERROR', false, array($this->sphinx->GetLastError()));
  641. if ($this->auth->acl_get('a_'))
  642. {
  643. trigger_error($this->user->lang('SPHINX_SEARCH_FAILED', $this->sphinx->GetLastError()));
  644. }
  645. else
  646. {
  647. trigger_error($this->user->lang('SPHINX_SEARCH_FAILED_LOG'));
  648. }
  649. }
  650. $result_count = $result['total_found'];
  651. if ($result_count && $start >= $result_count)
  652. {
  653. $start = floor(($result_count - 1) / $per_page) * $per_page;
  654. $this->sphinx->SetLimits((int) $start, (int) $per_page, max(SPHINX_MAX_MATCHES, (int) $start + $per_page));
  655. $result = $this->sphinx->Query($search_query_prefix . $this->sphinx->sphinx_clean_search_string(str_replace('&quot;', '"', $this->search_query)), $this->indexes);
  656. // Could be connection to localhost:9312 failed (errno=111,
  657. // msg=Connection refused) during rotate, retry if so
  658. $retries = SPHINX_CONNECT_RETRIES;
  659. while (!$result && (strpos($this->sphinx->GetLastError(), "errno=111,") !== false) && $retries--)
  660. {
  661. usleep(SPHINX_CONNECT_WAIT_TIME);
  662. $result = $this->sphinx->Query($search_query_prefix . $this->sphinx->sphinx_clean_search_string(str_replace('&quot;', '"', $this->search_query)), $this->indexes);
  663. }
  664. }
  665. $id_ary = array();
  666. if (isset($result['matches']))
  667. {
  668. if ($type == 'posts')
  669. {
  670. $id_ary = array_keys($result['matches']);
  671. }
  672. else
  673. {
  674. foreach ($result['matches'] as $key => $value)
  675. {
  676. $id_ary[] = $value['attrs']['topic_id'];
  677. }
  678. }
  679. }
  680. else
  681. {
  682. return false;
  683. }
  684. $id_ary = array_slice($id_ary, 0, (int) $per_page);
  685. return $result_count;
  686. }
  687. /**
  688. * Performs a search on an author's posts without caring about message contents. Depends on display specific params
  689. *
  690. * @param string $type contains either posts or topics depending on what should be searched for
  691. * @param boolean $firstpost_only if true, only topic starting posts will be considered
  692. * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query
  693. * @param string $sort_key is the key of $sort_by_sql for the selected sorting
  694. * @param string $sort_dir is either a or d representing ASC and DESC
  695. * @param string $sort_days specifies the maximum amount of days a post may be old
  696. * @param array $ex_fid_ary specifies an array of forum ids which should not be searched
  697. * @param string $post_visibility specifies which types of posts the user can view in which forums
  698. * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
  699. * @param array $author_ary an array of author ids
  700. * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match
  701. * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
  702. * @param int $start indicates the first index of the page
  703. * @param int $per_page number of ids each page is supposed to contain
  704. * @return boolean|int total number of results
  705. */
  706. public function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page)
  707. {
  708. $this->search_query = '';
  709. $this->sphinx->SetMatchMode(SPH_MATCH_FULLSCAN);
  710. $fields = ($firstpost_only) ? 'firstpost' : 'all';
  711. $terms = 'all';
  712. return $this->keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, $id_ary, $start, $per_page);
  713. }
  714. /**
  715. * Updates wordlist and wordmatch tables when a message is posted or changed
  716. *
  717. * @param string $mode Contains the post mode: edit, post, reply, quote
  718. * @param int $post_id The id of the post which is modified/created
  719. * @param string &$message New or updated post content
  720. * @param string &$subject New or updated post subject
  721. * @param int $poster_id Post author's user id
  722. * @param int $forum_id The id of the forum in which the post is located
  723. */
  724. public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
  725. {
  726. /**
  727. * Event to modify method arguments before the Sphinx search index is updated
  728. *
  729. * @event core.search_sphinx_index_before
  730. * @var string mode Contains the post mode: edit, post, reply, quote
  731. * @var int post_id The id of the post which is modified/created
  732. * @var string message New or updated post content
  733. * @var string subject New or updated post subject
  734. * @var int poster_id Post author's user id
  735. * @var int forum_id The id of the forum in which the post is located
  736. * @since 3.2.3-RC1
  737. */
  738. $vars = array(
  739. 'mode',
  740. 'post_id',
  741. 'message',
  742. 'subject',
  743. 'poster_id',
  744. 'forum_id',
  745. );
  746. extract($this->phpbb_dispatcher->trigger_event('core.search_sphinx_index_before', compact($vars)));
  747. if ($mode == 'edit')
  748. {
  749. $this->sphinx->UpdateAttributes($this->indexes, array('forum_id', 'poster_id'), array((int) $post_id => array((int) $forum_id, (int) $poster_id)));
  750. }
  751. else if ($mode != 'post' && $post_id)
  752. {
  753. // Update topic_last_post_time for full topic
  754. $sql_array = array(
  755. 'SELECT' => 'p1.post_id',
  756. 'FROM' => array(
  757. POSTS_TABLE => 'p1',
  758. ),
  759. 'LEFT_JOIN' => array(array(
  760. 'FROM' => array(
  761. POSTS_TABLE => 'p2'
  762. ),
  763. 'ON' => 'p1.topic_id = p2.topic_id',
  764. )),
  765. 'WHERE' => 'p2.post_id = ' . ((int) $post_id),
  766. );
  767. $sql = $this->db->sql_build_query('SELECT', $sql_array);
  768. $result = $this->db->sql_query($sql);
  769. $post_updates = array();
  770. $post_time = time();
  771. while ($row = $this->db->sql_fetchrow($result))
  772. {
  773. $post_updates[(int) $row['post_id']] = array($post_time);
  774. }
  775. $this->db->sql_freeresult($result);
  776. if (count($post_updates))
  777. {
  778. $this->sphinx->UpdateAttributes($this->indexes, array('topic_last_post_time'), $post_updates);
  779. }
  780. }
  781. }
  782. /**
  783. * Delete a post from the index after it was deleted
  784. */
  785. public function index_remove($post_ids, $author_ids, $forum_ids)
  786. {
  787. $values = array();
  788. foreach ($post_ids as $post_id)
  789. {
  790. $values[$post_id] = array(1);
  791. }
  792. $this->sphinx->UpdateAttributes($this->indexes, array('deleted'), $values);
  793. }
  794. /**
  795. * Nothing needs to be destroyed
  796. */
  797. public function tidy($create = false)
  798. {
  799. $this->config->set('search_last_gc', time(), false);
  800. }
  801. /**
  802. * Create sphinx table
  803. *
  804. * @return string|bool error string is returned incase of errors otherwise false
  805. */
  806. public function create_index($acp_module, $u_action)
  807. {
  808. if (!$this->index_created())
  809. {
  810. $table_data = array(
  811. 'COLUMNS' => array(
  812. 'counter_id' => array('UINT', 0),
  813. 'max_doc_id' => array('UINT', 0),
  814. ),
  815. 'PRIMARY_KEY' => 'counter_id',
  816. );
  817. $this->db_tools->sql_create_table(SPHINX_TABLE, $table_data);
  818. $sql = 'TRUNCATE TABLE ' . SPHINX_TABLE;
  819. $this->db->sql_query($sql);
  820. $data = array(
  821. 'counter_id' => '1',
  822. 'max_doc_id' => '0',
  823. );
  824. $sql = 'INSERT INTO ' . SPHINX_TABLE . ' ' . $this->db->sql_build_array('INSERT', $data);
  825. $this->db->sql_query($sql);
  826. }
  827. return false;
  828. }
  829. /**
  830. * Drop sphinx table
  831. *
  832. * @return string|bool error string is returned incase of errors otherwise false
  833. */
  834. public function delete_index($acp_module, $u_action)
  835. {
  836. if (!$this->index_created())
  837. {
  838. return false;
  839. }
  840. $this->db_tools->sql_table_drop(SPHINX_TABLE);
  841. return false;
  842. }
  843. /**
  844. * Returns true if the sphinx table was created
  845. *
  846. * @return bool true if sphinx table was created
  847. */
  848. public function index_created($allow_new_files = true)
  849. {
  850. $created = false;
  851. if ($this->db_tools->sql_table_exists(SPHINX_TABLE))
  852. {
  853. $created = true;
  854. }
  855. return $created;
  856. }
  857. /**
  858. * Returns an associative array containing information about the indexes
  859. *
  860. * @return string|bool Language string of error false otherwise
  861. */
  862. public function index_stats()
  863. {
  864. if (empty($this->stats))
  865. {
  866. $this->get_stats();
  867. }
  868. return array(
  869. $this->user->lang['FULLTEXT_SPHINX_MAIN_POSTS'] => ($this->index_created()) ? $this->stats['main_posts'] : 0,
  870. $this->user->lang['FULLTEXT_SPHINX_DELTA_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] - $this->stats['main_posts'] : 0,
  871. $this->user->lang['FULLTEXT_MYSQL_TOTAL_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] : 0,
  872. );
  873. }
  874. /**
  875. * Collects stats that can be displayed on the index maintenance page
  876. */
  877. protected function get_stats()
  878. {
  879. if ($this->index_created())
  880. {
  881. $sql = 'SELECT COUNT(post_id) as total_posts
  882. FROM ' . POSTS_TABLE;
  883. $result = $this->db->sql_query($sql);
  884. $this->stats['total_posts'] = (int) $this->db->sql_fetchfield('total_posts');
  885. $this->db->sql_freeresult($result);
  886. $sql = 'SELECT COUNT(p.post_id) as main_posts
  887. FROM ' . POSTS_TABLE . ' p, ' . SPHINX_TABLE . ' m
  888. WHERE p.post_id <= m.max_doc_id
  889. AND m.counter_id = 1';
  890. $result = $this->db->sql_query($sql);
  891. $this->stats['main_posts'] = (int) $this->db->sql_fetchfield('main_posts');
  892. $this->db->sql_freeresult($result);
  893. }
  894. }
  895. /**
  896. * Returns a list of options for the ACP to display
  897. *
  898. * @return associative array containing template and config variables
  899. */
  900. public function acp()
  901. {
  902. $config_vars = array(
  903. 'fulltext_sphinx_data_path' => 'string',
  904. 'fulltext_sphinx_host' => 'string',
  905. 'fulltext_sphinx_port' => 'string',
  906. 'fulltext_sphinx_indexer_mem_limit' => 'int',
  907. );
  908. $tpl = '
  909. <span class="error">' . $this->user->lang['FULLTEXT_SPHINX_CONFIGURE']. '</span>
  910. <dl>
  911. <dt><label for="fulltext_sphinx_data_path">' . $this->user->lang['FULLTEXT_SPHINX_DATA_PATH'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_DATA_PATH_EXPLAIN'] . '</span></dt>
  912. <dd><input id="fulltext_sphinx_data_path" type="text" size="40" maxlength="255" name="config[fulltext_sphinx_data_path]" value="' . $this->config['fulltext_sphinx_data_path'] . '" /></dd>
  913. </dl>
  914. <dl>
  915. <dt><label for="fulltext_sphinx_host">' . $this->user->lang['FULLTEXT_SPHINX_HOST'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_HOST_EXPLAIN'] . '</span></dt>
  916. <dd><input id="fulltext_sphinx_host" type="text" size="40" maxlength="255" name="config[fulltext_sphinx_host]" value="' . $this->config['fulltext_sphinx_host'] . '" /></dd>
  917. </dl>
  918. <dl>
  919. <dt><label for="fulltext_sphinx_port">' . $this->user->lang['FULLTEXT_SPHINX_PORT'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_PORT_EXPLAIN'] . '</span></dt>
  920. <dd><input id="fulltext_sphinx_port" type="number" min="0" max="9999999999" name="config[fulltext_sphinx_port]" value="' . $this->config['fulltext_sphinx_port'] . '" /></dd>
  921. </dl>
  922. <dl>
  923. <dt><label for="fulltext_sphinx_indexer_mem_limit">' . $this->user->lang['FULLTEXT_SPHINX_INDEXER_MEM_LIMIT'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_INDEXER_MEM_LIMIT_EXPLAIN'] . '</span></dt>
  924. <dd><input id="fulltext_sphinx_indexer_mem_limit" type="number" min="0" max="9999999999" name="config[fulltext_sphinx_indexer_mem_limit]" value="' . $this->config['fulltext_sphinx_indexer_mem_limit'] . '" /> ' . $this->user->lang['MIB'] . '</dd>
  925. </dl>
  926. <dl>
  927. <dt><label for="fulltext_sphinx_config_file">' . $this->user->lang['FULLTEXT_SPHINX_CONFIG_FILE'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_CONFIG_FILE_EXPLAIN'] . '</span></dt>
  928. <dd>' . (($this->config_generate()) ? '<textarea readonly="readonly" rows="6" id="sphinx_config_data">' . htmlspecialchars($this->config_file_data) . '</textarea>' : $this->config_file_data) . '</dd>
  929. <dl>
  930. ';
  931. // These are fields required in the config table
  932. return array(
  933. 'tpl' => $tpl,
  934. 'config' => $config_vars
  935. );
  936. }
  937. }