PageRenderTime 56ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/mod/search/start.php

https://github.com/fragilbert/Elgg
PHP | 502 lines | 273 code | 85 blank | 144 comment | 51 complexity | a4a1ad986fdc02768e4bfaece3f068da MD5 | raw file
Possible License(s): MIT, BSD-3-Clause, LGPL-2.1, GPL-2.0
  1. <?php
  2. /**
  3. * Elgg search plugin
  4. *
  5. */
  6. elgg_register_event_handler('init','system','search_init');
  7. /**
  8. * Initialize search plugin
  9. */
  10. function search_init() {
  11. global $CONFIG;
  12. require_once 'search_hooks.php';
  13. // page handler for search actions and results
  14. elgg_register_page_handler('search', 'search_page_handler');
  15. // register some default search hooks
  16. elgg_register_plugin_hook_handler('search', 'object', 'search_objects_hook');
  17. elgg_register_plugin_hook_handler('search', 'user', 'search_users_hook');
  18. elgg_register_plugin_hook_handler('search', 'group', 'search_groups_hook');
  19. // tags and comments are a bit different.
  20. // register a search types and a hooks for them.
  21. elgg_register_plugin_hook_handler('search_types', 'get_types', 'search_custom_types_tags_hook');
  22. elgg_register_plugin_hook_handler('search', 'tags', 'search_tags_hook');
  23. elgg_register_plugin_hook_handler('search_types', 'get_types', 'search_custom_types_comments_hook');
  24. elgg_register_plugin_hook_handler('search', 'comments', 'search_comments_hook');
  25. // get server min and max allowed chars for ft searching
  26. $CONFIG->search_info = array();
  27. // can't use get_data() here because some servers don't have these globals set,
  28. // which throws a db exception.
  29. $dblink = get_db_link('read');
  30. $r = mysql_query('SELECT @@ft_min_word_len as min, @@ft_max_word_len as max', $dblink);
  31. if ($r && ($word_lens = mysql_fetch_assoc($r))) {
  32. $CONFIG->search_info['min_chars'] = $word_lens['min'];
  33. $CONFIG->search_info['max_chars'] = $word_lens['max'];
  34. } else {
  35. // uhhh these are good numbers.
  36. $CONFIG->search_info['min_chars'] = 4;
  37. $CONFIG->search_info['max_chars'] = 90;
  38. }
  39. // add in CSS for search elements
  40. elgg_extend_view('css/elgg', 'search/css');
  41. // extend view for elgg topbar search box
  42. elgg_extend_view('page/elements/header', 'search/header');
  43. }
  44. /**
  45. * Page handler for search
  46. *
  47. * @param array $page Page elements from core page handler
  48. * @return bool
  49. */
  50. function search_page_handler($page) {
  51. // if there is no q set, we're being called from a legacy installation
  52. // it expects a search by tags.
  53. // actually it doesn't, but maybe it should.
  54. // maintain backward compatibility
  55. if(!get_input('q', get_input('tag', NULL))) {
  56. set_input('q', $page[0]);
  57. //set_input('search_type', 'tags');
  58. }
  59. $base_dir = elgg_get_plugins_path() . 'search/pages/search';
  60. include_once("$base_dir/index.php");
  61. return true;
  62. }
  63. /**
  64. * Return a string with highlighted matched queries and relevant context
  65. * Determines context based upon occurance and distance of words with each other.
  66. *
  67. * @param string $haystack
  68. * @param string $query
  69. * @param int $min_match_context = 30
  70. * @param int $max_length = 300
  71. * @param bool $tag_match Search is for tags. Don't ignore words.
  72. * @return string
  73. */
  74. function search_get_highlighted_relevant_substrings($haystack, $query, $min_match_context = 30, $max_length = 300, $tag_match = false) {
  75. $haystack = strip_tags($haystack);
  76. $haystack_length = elgg_strlen($haystack);
  77. $haystack_lc = elgg_strtolower($haystack);
  78. if (!$tag_match) {
  79. $words = search_remove_ignored_words($query, 'array');
  80. } else {
  81. $words = array();
  82. }
  83. // if haystack < $max_length return the entire haystack w/formatting immediately
  84. if ($haystack_length <= $max_length) {
  85. $return = search_highlight_words($words, $haystack);
  86. return $return;
  87. }
  88. // get the starting positions and lengths for all matching words
  89. $starts = array();
  90. $lengths = array();
  91. foreach ($words as $word) {
  92. $word = elgg_strtolower($word);
  93. $count = elgg_substr_count($haystack_lc, $word);
  94. $word_len = elgg_strlen($word);
  95. $haystack_len = elgg_strlen($haystack_lc);
  96. // find the start positions for the words
  97. if ($count > 1) {
  98. $offset = 0;
  99. while (FALSE !== $pos = elgg_strpos($haystack_lc, $word, $offset)) {
  100. $start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
  101. $starts[] = $start;
  102. $stop = $pos + $word_len + $min_match_context;
  103. $lengths[] = $stop - $start;
  104. $offset += $pos + $word_len;
  105. if ($offset >= $haystack_len) {
  106. break;
  107. }
  108. }
  109. } else {
  110. $pos = elgg_strpos($haystack_lc, $word);
  111. $start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
  112. $starts[] = $start;
  113. $stop = $pos + $word_len + $min_match_context;
  114. $lengths[] = $stop - $start;
  115. }
  116. }
  117. $offsets = search_consolidate_substrings($starts, $lengths);
  118. // figure out if we can adjust the offsets and lengths
  119. // in order to return more context
  120. $total_length = array_sum($offsets);
  121. $add_length = 0;
  122. if ($total_length < $max_length && $offsets) {
  123. $add_length = floor((($max_length - $total_length) / count($offsets)) / 2);
  124. $starts = array();
  125. $lengths = array();
  126. foreach ($offsets as $offset => $length) {
  127. $start = ($offset - $add_length > 0) ? $offset - $add_length : 0;
  128. $length = $length + $add_length;
  129. $starts[] = $start;
  130. $lengths[] = $length;
  131. }
  132. $offsets = search_consolidate_substrings($starts, $lengths);
  133. }
  134. // sort by order of string size descending (which is roughly
  135. // the proximity of matched terms) so we can keep the
  136. // substrings with terms closest together and discard
  137. // the others as needed to fit within $max_length.
  138. arsort($offsets);
  139. $return_strs = array();
  140. $total_length = 0;
  141. foreach ($offsets as $start => $length) {
  142. $string = trim(elgg_substr($haystack, $start, $length));
  143. // continue past if adding this substring exceeds max length
  144. if ($total_length + $length > $max_length) {
  145. continue;
  146. }
  147. $total_length += $length;
  148. $return_strs[$start] = $string;
  149. }
  150. // put the strings in order of occurence
  151. ksort($return_strs);
  152. // add ...s where needed
  153. $return = implode('...', $return_strs);
  154. if (!array_key_exists(0, $return_strs)) {
  155. $return = "...$return";
  156. }
  157. // add to end of string if last substring doesn't hit the end.
  158. $starts = array_keys($return_strs);
  159. $last_pos = $starts[count($starts)-1];
  160. if ($last_pos + elgg_strlen($return_strs[$last_pos]) < $haystack_length) {
  161. $return .= '...';
  162. }
  163. $return = search_highlight_words($words, $return);
  164. return $return;
  165. }
  166. /**
  167. * Takes an array of offsets and lengths and consolidates any
  168. * overlapping entries, returning an array of new offsets and lengths
  169. *
  170. * Offsets and lengths are specified in separate arrays because of possible
  171. * index collisions with the offsets.
  172. *
  173. * @param array $offsets
  174. * @param array $lengths
  175. * @return array
  176. */
  177. function search_consolidate_substrings($offsets, $lengths) {
  178. // sort offsets by occurence
  179. asort($offsets, SORT_NUMERIC);
  180. // reset the indexes maintaining association with the original offsets.
  181. $offsets = array_merge($offsets);
  182. $new_lengths = array();
  183. foreach ($offsets as $i => $offset) {
  184. $new_lengths[] = $lengths[$i];
  185. }
  186. $lengths = $new_lengths;
  187. $return = array();
  188. $count = count($offsets);
  189. for ($i=0; $i<$count; $i++) {
  190. $offset = $offsets[$i];
  191. $length = $lengths[$i];
  192. $end_pos = $offset + $length;
  193. // find the next entry that doesn't overlap
  194. while (array_key_exists($i+1, $offsets) && $end_pos > $offsets[$i+1]) {
  195. $i++;
  196. if (!array_key_exists($i, $offsets)) {
  197. break;
  198. }
  199. $end_pos = $lengths[$i] + $offsets[$i];
  200. }
  201. $length = $end_pos - $offset;
  202. // will never have a colliding offset, so can return as a single array
  203. $return[$offset] = $length;
  204. }
  205. return $return;
  206. }
  207. /**
  208. * Safely highlights the words in $words found in $string avoiding recursion
  209. *
  210. * @param array $words
  211. * @param string $string
  212. * @return string
  213. */
  214. function search_highlight_words($words, $string) {
  215. $i = 1;
  216. $replace_html = array(
  217. 'strong' => rand(10000, 99999),
  218. 'class' => rand(10000, 99999),
  219. 'search-highlight' => rand(10000, 99999),
  220. 'search-highlight-color' => rand(10000, 99999)
  221. );
  222. foreach ($words as $word) {
  223. // remove any boolean mode operators
  224. $word = preg_replace("/([\-\+~])([\w]+)/i", '$2', $word);
  225. // escape the delimiter and any other regexp special chars
  226. $word = preg_quote($word, '/');
  227. $search = "/($word)/i";
  228. // @todo
  229. // must replace with placeholders in case one of the search terms is
  230. // in the html string.
  231. // later, will replace the placeholders with the actual html.
  232. // Yeah this is hacky. I'm tired.
  233. $strong = $replace_html['strong'];
  234. $class = $replace_html['class'];
  235. $highlight = $replace_html['search-highlight'];
  236. $color = $replace_html['search-highlight-color'];
  237. $replace = "<$strong $class=\"$highlight $color{$i}\">$1</$strong>";
  238. $string = preg_replace($search, $replace, $string);
  239. $i++;
  240. }
  241. foreach ($replace_html as $replace => $search) {
  242. $string = str_replace($search, $replace, $string);
  243. }
  244. return $string;
  245. }
  246. /**
  247. * Returns a query with stop and too short words removed.
  248. * (Unless the entire query is < ft_min_word_chars, in which case
  249. * it's taken literally.)
  250. *
  251. * @param array $query
  252. * @param str $format Return as an array or a string
  253. * @return mixed
  254. */
  255. function search_remove_ignored_words($query, $format = 'array') {
  256. global $CONFIG;
  257. // don't worry about "s or boolean operators
  258. //$query = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($query)));
  259. $query = stripslashes(strip_tags($query));
  260. $words = explode(' ', $query);
  261. $min_chars = $CONFIG->search_info['min_chars'];
  262. // if > ft_min_word we're not running in literal mode.
  263. if (elgg_strlen($query) >= $min_chars) {
  264. // clean out any words that are ignored by mysql
  265. foreach ($words as $i => $word) {
  266. if (elgg_strlen($word) < $min_chars) {
  267. unset ($words[$i]);
  268. }
  269. }
  270. }
  271. if ($format == 'string') {
  272. return implode(' ', $words);
  273. }
  274. return $words;
  275. }
  276. /**
  277. * Passes results, and original params to the view functions for
  278. * search type.
  279. *
  280. * @param array $results
  281. * @param array $params
  282. * @param string $view_type = list, entity or layout
  283. * @return string
  284. */
  285. function search_get_search_view($params, $view_type) {
  286. switch ($view_type) {
  287. case 'list':
  288. case 'entity':
  289. case 'layout':
  290. break;
  291. default:
  292. return FALSE;
  293. }
  294. $view_order = array();
  295. // check if there's a special search list view for this type:subtype
  296. if (isset($params['type']) && $params['type'] && isset($params['subtype']) && $params['subtype']) {
  297. $view_order[] = "search/{$params['type']}/{$params['subtype']}/$view_type";
  298. }
  299. // also check for the default type
  300. if (isset($params['type']) && $params['type']) {
  301. $view_order[] = "search/{$params['type']}/$view_type";
  302. }
  303. // check search types
  304. if (isset($params['search_type']) && $params['search_type']) {
  305. $view_order[] = "search/{$params['search_type']}/$view_type";
  306. }
  307. // finally default to a search list default
  308. $view_order[] = "search/$view_type";
  309. foreach ($view_order as $view) {
  310. if (elgg_view_exists($view)) {
  311. return $view;
  312. }
  313. }
  314. return FALSE;
  315. }
  316. /**
  317. * Returns a where clause for a search query.
  318. *
  319. * @param str $table Prefix for table to search on
  320. * @param array $fields Fields to match against
  321. * @param array $params Original search params
  322. * @return str
  323. */
  324. function search_get_where_sql($table, $fields, $params, $use_fulltext = TRUE) {
  325. global $CONFIG;
  326. $query = $params['query'];
  327. // add the table prefix to the fields
  328. foreach ($fields as $i => $field) {
  329. if ($table) {
  330. $fields[$i] = "$table.$field";
  331. }
  332. }
  333. $where = '';
  334. // if query is shorter than the min for fts words
  335. // it's likely a single acronym or similar
  336. // switch to literal mode
  337. if (elgg_strlen($query) < $CONFIG->search_info['min_chars']) {
  338. $likes = array();
  339. $query = sanitise_string($query);
  340. foreach ($fields as $field) {
  341. $likes[] = "$field LIKE '%$query%'";
  342. }
  343. $likes_str = implode(' OR ', $likes);
  344. $where = "($likes_str)";
  345. } else {
  346. // if we're not using full text, rewrite the query for bool mode.
  347. // exploiting a feature(ish) of bool mode where +-word is the same as -word
  348. if (!$use_fulltext) {
  349. $query = '+' . str_replace(' ', ' +', $query);
  350. }
  351. // if using advanced, boolean operators, or paired "s, switch into boolean mode
  352. $booleans_used = preg_match("/([\-\+~])([\w]+)/i", $query);
  353. $advanced_search = (isset($params['advanced_search']) && $params['advanced_search']);
  354. $quotes_used = (elgg_substr_count($query, '"') >= 2);
  355. if (!$use_fulltext || $booleans_used || $advanced_search || $quotes_used) {
  356. $options = 'IN BOOLEAN MODE';
  357. } else {
  358. // natural language mode is default and this keyword isn't supported in < 5.1
  359. //$options = 'IN NATURAL LANGUAGE MODE';
  360. $options = '';
  361. }
  362. // if short query, use query expansion.
  363. // @todo doesn't seem to be working well.
  364. // if (elgg_strlen($query) < 5) {
  365. // $options .= ' WITH QUERY EXPANSION';
  366. // }
  367. $query = sanitise_string($query);
  368. $fields_str = implode(',', $fields);
  369. $where = "(MATCH ($fields_str) AGAINST ('$query' $options))";
  370. }
  371. return $where;
  372. }
  373. /**
  374. * Returns ORDER BY sql for insertion into elgg_get_entities().
  375. *
  376. * @param str $entities_table Prefix for entities table.
  377. * @param str $type_table Prefix for the type table.
  378. * @param str $sort ORDER BY part
  379. * @param str $order ASC or DESC
  380. * @return str
  381. */
  382. function search_get_order_by_sql($entities_table, $type_table, $sort, $order) {
  383. $on = NULL;
  384. switch ($sort) {
  385. default:
  386. case 'relevance':
  387. // default is relevance descending.
  388. // ascending relevancy is silly and complicated.
  389. $on = '';
  390. break;
  391. case 'created':
  392. $on = "$entities_table.time_created";
  393. break;
  394. case 'updated':
  395. $on = "$entities_table.time_updated";
  396. break;
  397. case 'action_on':
  398. // @todo not supported yet in core
  399. $on = '';
  400. break;
  401. case 'alpha':
  402. // @todo not support yet because both title
  403. // and name columns are used for this depending
  404. // on the entity, which we don't always know. >:O
  405. break;
  406. }
  407. $order = strtolower($order);
  408. if ($order != 'asc' && $order != 'desc') {
  409. $order = 'DESC';
  410. }
  411. if ($on) {
  412. $order_by = "$on $order";
  413. } else {
  414. $order_by = '';
  415. }
  416. return $order_by;
  417. }