PageRenderTime 29ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/frontend/modules/search/engine/model.php

http://github.com/forkcms/forkcms
PHP | 437 lines | 227 code | 61 blank | 149 comment | 20 complexity | 6ca05a2c9374d05b2794dc36264dfda2 MD5 | raw file
Possible License(s): MPL-2.0-no-copyleft-exception, MIT, AGPL-3.0, LGPL-2.1, BSD-3-Clause
  1. <?php
  2. /*
  3. * This file is part of Fork CMS.
  4. *
  5. * For the full copyright and license information, please view the license
  6. * file that was distributed with this source code.
  7. */
  8. /**
  9. * In this file we store all generic functions that we will be using in the search module
  10. *
  11. * @author Matthias Mullie <matthias@mullie.eu>
  12. */
  13. class FrontendSearchModel
  14. {
  15. /**
  16. * Build the search term
  17. *
  18. * @param string $terms The string to build.
  19. * @return string
  20. */
  21. public static function buildTerm($terms)
  22. {
  23. // loop all items
  24. foreach($terms as $i => $term)
  25. {
  26. // trim terms
  27. $term = trim($term);
  28. // last word may be incomplete (still typing)
  29. $split = explode(' ', $term);
  30. $last = (string) array_pop($split);
  31. $terms[$i] = ($split ? '+' . implode(' +', $split) . ' ' : '') . '(>+' . $last . ' <+' . $last . '*)';
  32. // current string encountered
  33. $terms[$i] = '>' . $terms[$i];
  34. if(strpos($terms[$i], ' ') !== false)
  35. {
  36. // part of words encountered
  37. $terms[$i] .= ' <(' . implode(' ', $split) . ' ' . trim($last) . '*)';
  38. }
  39. }
  40. return $terms;
  41. }
  42. /**
  43. * Execute actual search
  44. *
  45. * This function can be called with either a string as parameter (simple search) or an array (advanced search)
  46. * Simple search: all search index fields will be searched for the given term
  47. * Advanced search: only the given fields (keys in the array) will be matched to the corresponding values (correspinding values in the array)
  48. *
  49. * @param mixed $term The searchterm (simple search) or the fields to search for (advanced search - please note that the field names may not be consistent throughout several modules).
  50. * @param int[optional] $limit The number of articles to get.
  51. * @param int[optional] $offset The offset.
  52. * @return array
  53. */
  54. public static function execSearch($term, $limit = 20, $offset = 0)
  55. {
  56. $limit = (int) $limit;
  57. $offset = (int) $offset;
  58. // advanced search
  59. if(is_array($term))
  60. {
  61. // init vars
  62. $where = array();
  63. $order = array();
  64. $join = array();
  65. $params1 = array();
  66. $params2 = array();
  67. // loop all searches
  68. foreach($term as $field => $value)
  69. {
  70. // get all terms to search for (including synonyms)
  71. $terms = self::getSynonyms((string) $value);
  72. // build search terms
  73. $terms = self::buildTerm($terms);
  74. $queryNr = count($where);
  75. // add query
  76. $where[$queryNr] = '(' . substr(str_repeat('MATCH (i' . $queryNr . '.value) AGAINST (? IN BOOLEAN MODE) OR ', count($terms)), 0, -4) . ') AND i' . $queryNr . '.field = ? AND i' . $queryNr . '.language = ? AND i' . $queryNr . '.active = ? AND m' . $queryNr . '.searchable = ?';
  77. $order[$queryNr] = '(' . substr(str_repeat('MATCH (i' . $queryNr . '.value) AGAINST (? IN BOOLEAN MODE) + ', count($terms)), 0, -3) . ') * m' . $queryNr . '.weight';
  78. $join[$queryNr] = 'search_index AS i' . $queryNr . ($join ? ' ON i' . $queryNr . '.module = i0.module AND i' . $queryNr . '.other_id = i0.other_id' : '') . ' INNER JOIN search_modules AS m' . $queryNr . ' ON m' . $queryNr . '.module = i' . $queryNr . '.module';
  79. // add params
  80. $params1 = array_merge($params1, $terms);
  81. $params2 = array_merge($params2, $terms, array((string) $field, FRONTEND_LANGUAGE, 'Y', 'Y'));
  82. }
  83. // prepare query and params
  84. $query =
  85. 'SELECT i0.module, i0.other_id, ' . implode(' + ', $order) . ' AS score
  86. FROM ' . implode(' INNER JOIN ', $join) . '
  87. WHERE ' . implode(' AND ', $where) . '
  88. ORDER BY score DESC
  89. LIMIT ?, ?';
  90. $params = array_merge($params1, $params2, array($offset, $limit));
  91. }
  92. // simple search
  93. else
  94. {
  95. // get all terms to search for (including synonyms)
  96. $terms = self::getSynonyms((string) $term);
  97. // build search terms
  98. $terms = self::buildTerm($terms);
  99. // prepare query and params
  100. $query =
  101. 'SELECT i.module, i.other_id, SUM(' . substr(str_repeat('MATCH (i.value) AGAINST (? IN BOOLEAN MODE) + ', count($terms)), 0, -3) . ') * m.weight AS score
  102. FROM search_index AS i
  103. INNER JOIN search_modules AS m ON i.module = m.module
  104. WHERE (' . substr(str_repeat('MATCH (i.value) AGAINST (? IN BOOLEAN MODE) OR ', count($terms)), 0, -4) . ') AND i.language = ? AND i.active = ? AND m.searchable = ?
  105. GROUP BY module, other_id
  106. ORDER BY score DESC
  107. LIMIT ?, ?';
  108. $params = array_merge($terms, $terms, array(FRONTEND_LANGUAGE, 'Y', 'Y', $offset, $limit));
  109. }
  110. return (array) FrontendModel::getDB()->getRecords($query, $params);
  111. }
  112. /**
  113. * Get preview searches that start with ...
  114. *
  115. * @param string $term The first letters of the term we're looking for.
  116. * @param string[optional] $language The language to search in.
  117. * @param int[optional] $limit Limit resultset.
  118. * @return array
  119. */
  120. public static function getStartsWith($term, $language = '', $limit = 10)
  121. {
  122. // language given
  123. if($language)
  124. {
  125. return (array) FrontendModel::getDB()->getRecords(
  126. 'SELECT s1.term, s1.num_results
  127. FROM search_statistics AS s1
  128. INNER JOIN
  129. (
  130. SELECT term, MAX(id) AS id, language
  131. FROM search_statistics
  132. WHERE term LIKE ? AND num_results IS NOT NULL AND language = ?
  133. GROUP BY term
  134. ) AS s2 ON s1.term = s2.term AND s1.id = s2.id AND s1.language = s2.language AND s1.num_results > 0
  135. ORDER BY s1.num_results ASC
  136. LIMIT ?',
  137. array((string) $term . '%', $language, $limit)
  138. );
  139. }
  140. // no language given
  141. else
  142. {
  143. return (array) FrontendModel::getDB()->getRecords(
  144. 'SELECT s1.term, s1.num_results
  145. FROM search_statistics AS s1
  146. INNER JOIN
  147. (
  148. SELECT term, MAX(id) AS id, language
  149. FROM search_statistics
  150. WHERE term LIKE ? AND num_results IS NOT NULL
  151. GROUP BY term
  152. ) AS s2 ON s1.term = s2.term AND s1.id = s2.id AND s1.language = s2.language AND s1.num_results > 0
  153. ORDER BY s1.num_results ASC
  154. LIMIT ?',
  155. array((string) $term . '%', $limit)
  156. );
  157. }
  158. }
  159. /**
  160. * Get synonyms
  161. *
  162. * @param string $term The term to get synonyms for.
  163. * @return array
  164. */
  165. public static function getSynonyms($term)
  166. {
  167. // query db for synonyms
  168. $synonyms = FrontendModel::getDB()->getVar(
  169. 'SELECT synonym
  170. FROM search_synonyms
  171. WHERE term = ?',
  172. array((string) $term)
  173. );
  174. // found any? merge with original term
  175. if($synonyms) return array_unique(array_merge(array($term), explode(',', $synonyms)));
  176. // only original term
  177. return array($term);
  178. }
  179. /**
  180. * Get total results
  181. *
  182. * Note: please be aware that this is an approximate amount. It IS possible that this is not the exact amount of search results,
  183. * since search results may vary in time (entries may not yet/no longer be shown) and we will not rebuild the entire search index
  184. * on every search (would be a great performance killer and huge scalibility loss)
  185. *
  186. * This function can be called with either a string as parameter (simple search) or an array (advanced search)
  187. * Simple search: all search index fields will be searched for the given term
  188. * Advanced search: only the given fields (keys in the array) will be matched to the corresponding values (correspinding values in the array)
  189. *
  190. * @param mixed $term The searchterm (simple search) or the fields to search for (advanced search - please note that the field names may not be consistent throughout several modules).
  191. * @return int
  192. */
  193. public static function getTotal($term)
  194. {
  195. // advanced search
  196. if(is_array($term))
  197. {
  198. // init vars
  199. $where = array();
  200. $join = array();
  201. $params = array();
  202. // loop all searches
  203. foreach($term as $field => $value)
  204. {
  205. // get all terms to search for (including synonyms)
  206. $terms = self::getSynonyms((string) $value);
  207. // build search terms
  208. $terms = self::buildTerm($terms);
  209. $queryNr = count($where);
  210. // add query
  211. $where[$queryNr] = '(' . substr(str_repeat('MATCH (i' . $queryNr . '.value) AGAINST (? IN BOOLEAN MODE) OR ', count($terms)), 0, -4) . ') AND i' . $queryNr . '.field = ? AND i' . $queryNr . '.language = ? AND i' . $queryNr . '.active = ? AND m' . $queryNr . '.searchable = ?';
  212. $join[$queryNr] = 'search_index AS i' . $queryNr . ($join ? ' ON i' . $queryNr . '.module = i0.module AND i' . $queryNr . '.other_id = i0.other_id' : '') . ' INNER JOIN search_modules AS m' . $queryNr . ' ON m' . $queryNr . '.module = i' . $queryNr . '.module';
  213. // add params
  214. $params = array_merge($params, $terms, array((string) $field, FRONTEND_LANGUAGE, 'Y', 'Y'));
  215. }
  216. // prepare query and params
  217. $query =
  218. 'SELECT COUNT(module)
  219. FROM
  220. (
  221. SELECT i0.module, i0.other_id
  222. FROM ' . implode(' INNER JOIN ', $join) . '
  223. WHERE ' . implode(' AND ', $where) . '
  224. ) AS results';
  225. }
  226. // simple search
  227. else
  228. {
  229. // get all terms to search for (including synonyms)
  230. $terms = self::getSynonyms((string) $term);
  231. // build search terms
  232. $terms = self::buildTerm($terms);
  233. // prepare query and params
  234. $query =
  235. 'SELECT COUNT(module)
  236. FROM
  237. (
  238. SELECT i.module
  239. FROM search_index AS i
  240. INNER JOIN search_modules AS m ON i.module = m.module
  241. WHERE (' . substr(str_repeat('MATCH (i.value) AGAINST (? IN BOOLEAN MODE) OR ', count($terms)), 0, -4) . ') AND i.language = ? AND i.active = ? AND m.searchable = ?
  242. GROUP BY i.module, i.other_id
  243. ) AS results';
  244. $params = array_merge($terms, array(FRONTEND_LANGUAGE, 'Y', 'Y'));
  245. }
  246. // get the search results
  247. return (int) FrontendModel::getDB()->getVar($query, $params);
  248. }
  249. /**
  250. * Save a search
  251. *
  252. * @param array $item The data to store.
  253. */
  254. public static function save($item)
  255. {
  256. FrontendModel::getDB(true)->insert('search_statistics', $item);
  257. }
  258. /**
  259. * Search
  260. * The actual search will be performed by the execSearch() method.
  261. * It will then pass on the results to the specific modules, which are then responsible for returning the required data and filtering out unwanted results (entries that should not be shown)
  262. * The activation/deactivation of search indices will automatically be handled by this function to keep the search index up to date, based on the module's returned results
  263. *
  264. * This function can be called with either a string as parameter (simple search) or an array (advanced search)
  265. * Simple search: all search index fields will be searched for the given term
  266. * Advanced search: only the given fields (keys in the array) will be matched to the corresponding values (correspinding values in the array)
  267. *
  268. * @param mixed $term The searchterm (simple search) or the fields to search for (advanced search - please note that the field names may not be consistent throughout several modules).
  269. * @param int[optional] $limit The number of articles to get.
  270. * @param int[optional] $offset The offset.
  271. * @return array
  272. */
  273. public static function search($term, $limit = 20, $offset = 0)
  274. {
  275. // revalidate searches
  276. if(FrontendModel::getModuleSetting('search', 'validate_search', true) == true) self::validateSearch(); // @note: on heavy sites with a lot of inactive search indices better use a cronjob (which will automatically set this module setting to N)
  277. // execute the actual search
  278. $searchResults = self::execSearch($term, $limit, $offset);
  279. // get the total amount of results (we'll get back to this later ;) )
  280. $total = count($searchResults);
  281. // none found? return empty :(
  282. if(!$searchResults) return array();
  283. // prepare to send to modules
  284. $moduleResults = array();
  285. // loop the resultset
  286. foreach($searchResults as $searchResult) $moduleResults[$searchResult['module']][] = $searchResult['other_id'];
  287. // pass the results to the modules
  288. foreach($moduleResults as $module => $otherIds)
  289. {
  290. // check if this module actually is prepared to handle searches (well it should, because else there shouldn't be any search indices)
  291. if(is_callable(array('Frontend' . SpoonFilter::toCamelCase($module) . 'Model', 'search')))
  292. {
  293. // get the required info from our module
  294. $moduleResults[$module] = call_user_func(array('Frontend' . SpoonFilter::toCamelCase($module) . 'Model', 'search'), $otherIds);
  295. }
  296. // does not exist, let's get this module out of here
  297. else unset($moduleResults[$module]);
  298. }
  299. // now place the prepared data back in our original resultset, which has our results in correct order
  300. foreach($searchResults as $i => $result)
  301. {
  302. // loop parsed results for this specific module to find the one we want here
  303. foreach($moduleResults[$result['module']] as $otherId => $moduleResult)
  304. {
  305. // that's the one..
  306. if($otherId == $result['other_id'])
  307. {
  308. $searchResults[$i] = array_merge(array('module' => $result['module']), $moduleResult);
  309. continue 2;
  310. }
  311. }
  312. // if we made it here, we obviously did not get this result parsed by the module, so remove it!
  313. unset($searchResults[$i]);
  314. self::statusIndex($result['module'], (array) $result['other_id'], false);
  315. }
  316. // results got removed by the module? oh noes :o have another run, because now we've deactivated those responsible for the holes :)
  317. if(count($searchResults) < $total && $total == $limit) $searchResults = self::search($term, $limit, $offset);
  318. // return results
  319. return $searchResults;
  320. }
  321. /**
  322. * Deactivate an index (no longer has to be searched)
  323. *
  324. * @param string $module The module we're deleting an item from.
  325. * @param array $otherIds An array of other_id's for this module.
  326. * @param bool[optional] $active Set the index to active?
  327. */
  328. public static function statusIndex($module, array $otherIds, $active = true)
  329. {
  330. // redefine
  331. $active = ($active && $active !== 'N') ? 'Y' : 'N';
  332. // deactivate!
  333. if($otherIds) FrontendModel::getDB(true)->update('search_index', array('active' => $active), 'module = ? AND other_id IN (' . implode(',', $otherIds) . ')', array((string) $module));
  334. }
  335. /**
  336. * Validate searches: check everything that has been marked as 'inactive', if should still be inactive
  337. */
  338. public static function validateSearch()
  339. {
  340. // we'll iterate through the inactive search indices in little batches
  341. $offset = 0;
  342. $limit = 50;
  343. while(1)
  344. {
  345. // get the inactive indices
  346. $searchResults = (array) FrontendModel::getDB()->getRecords(
  347. 'SELECT module, other_id
  348. FROM search_index
  349. WHERE language = ? AND active = ?
  350. GROUP BY module, other_id
  351. LIMIT ?, ?',
  352. array(FRONTEND_LANGUAGE, 'N', $offset, $limit)
  353. );
  354. // none found? good news!
  355. if(!$searchResults) return;
  356. // prepare to send to modules
  357. $moduleResults = array();
  358. // loop the resultset
  359. foreach($searchResults as $searchResult) $moduleResults[$searchResult['module']][] = $searchResult['other_id'];
  360. // pass the results to the modules
  361. foreach($moduleResults as $module => $otherIds)
  362. {
  363. // check if this module actually is prepared to handle searches (well it should, because else there shouldn't be any search indices)
  364. if(is_callable(array('Frontend' . SpoonFilter::ucfirst($module) . 'Model', 'search')))
  365. {
  366. $moduleResults[$module] = call_user_func(array('Frontend' . SpoonFilter::ucfirst($module) . 'Model', 'search'), $otherIds);
  367. // update the ones that are allowed to be searched through
  368. self::statusIndex($module, array_keys($moduleResults[$module]), true);
  369. }
  370. }
  371. // didn't even get the amount of result we asked for? no need to ask again!
  372. if(count($searchResults) < $offset) return;
  373. $offset += $limit;
  374. }
  375. }
  376. }