PageRenderTime 54ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/social/mod/search/start.php

https://github.com/melvincarvalho/foafme
PHP | 480 lines | 261 code | 77 blank | 142 comment | 46 complexity | ee08a54aca12003e9ebc2d921700d7eb MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception, AGPL-3.0, Apache-2.0, LGPL-2.1
  1. <?php
  2. /**
  3. * Elgg core search.
  4. *
  5. * @package Elgg
  6. * @subpackage Core
  7. * @author Curverider Ltd <info@elgg.com>, The MITRE Corporation <http://www.mitre.org>
  8. * @link http://elgg.org/
  9. */
  10. /**
  11. * Initialise search helper functions.
  12. *
  13. */
  14. function search_init() {
  15. global $CONFIG;
  16. require_once 'search_hooks.php';
  17. // page handler for search actions and results
  18. register_page_handler('search','search_page_handler');
  19. // register some default search hooks
  20. register_plugin_hook('search', 'object', 'search_objects_hook');
  21. register_plugin_hook('search', 'user', 'search_users_hook');
  22. // @todo pull this out into groups
  23. register_plugin_hook('search', 'group', 'search_groups_hook');
  24. // tags and comments are a bit different.
  25. // register a search types and a hooks for them.
  26. register_plugin_hook('search_types', 'get_types', 'search_custom_types_tags_hook');
  27. register_plugin_hook('search', 'tags', 'search_tags_hook');
  28. register_plugin_hook('search_types', 'get_types', 'search_custom_types_comments_hook');
  29. register_plugin_hook('search', 'comments', 'search_comments_hook');
  30. // get server min and max allowed chars for ft searching
  31. $CONFIG->search_info = array();
  32. // can't use get_data() here because some servers don't have these globals set,
  33. // which throws a db exception.
  34. $r = mysql_query('SELECT @@ft_min_word_len as min, @@ft_max_word_len as max');
  35. if ($r && ($word_lens = mysql_fetch_assoc($r))) {
  36. $CONFIG->search_info['min_chars'] = $word_lens['min'];
  37. $CONFIG->search_info['max_chars'] = $word_lens['max'];
  38. } else {
  39. // uhhh these are good numbers.
  40. $CONFIG->search_info['min_chars'] = 4;
  41. $CONFIG->search_info['max_chars'] = 90;
  42. }
  43. // add in CSS for search elements
  44. elgg_extend_view('css', 'search/css');
  45. }
  46. /**
  47. * Page handler for search
  48. *
  49. * @param array $page Page elements from pain page handler
  50. */
  51. function search_page_handler($page) {
  52. global $CONFIG;
  53. // if there is no q set, we're being called from a legacy installation
  54. // it expects a search by tags.
  55. // actually it doesn't, but maybe it should.
  56. // maintain backward compatibility
  57. if(!get_input('q', get_input('tag', NULL))) {
  58. set_input('q', $page[0]);
  59. //set_input('search_type', 'tags');
  60. }
  61. include_once('index.php');
  62. }
  63. /**
  64. * Return a string with highlighted matched queries and relevant context
  65. * Determins context based upon occurance and distance of words with each other.
  66. *
  67. * @param string $haystack
  68. * @param string $query
  69. * @param int $min_match_context = 30
  70. * @param int $max_length = 300
  71. * @return string
  72. */
  73. function search_get_highlighted_relevant_substrings($haystack, $query, $min_match_context = 30, $max_length = 300) {
  74. global $CONFIG;
  75. $haystack = strip_tags($haystack);
  76. $haystack_length = elgg_strlen($haystack);
  77. $haystack_lc = elgg_strtolower($haystack);
  78. $words = search_remove_ignored_words($query, 'array');
  79. // if haystack < $max_length return the entire haystack w/formatting immediately
  80. if ($haystack_length <= $max_length) {
  81. $return = search_highlight_words($words, $haystack);
  82. return $return;
  83. }
  84. // get the starting positions and lengths for all matching words
  85. $starts = array();
  86. $lengths = array();
  87. foreach ($words as $word) {
  88. $word = elgg_strtolower($word);
  89. $count = elgg_substr_count($haystack_lc, $word);
  90. $word_len = elgg_strlen($word);
  91. // find the start positions for the words
  92. if ($count > 1) {
  93. $offset = 0;
  94. while (FALSE !== $pos = elgg_strpos($haystack_lc, $word, $offset)) {
  95. $start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
  96. $starts[] = $start;
  97. $stop = $pos + $word_len + $min_match_context;
  98. $lengths[] = $stop - $start;
  99. $offset += $pos + $word_len;
  100. }
  101. } else {
  102. $pos = elgg_strpos($haystack_lc, $word);
  103. $start = ($pos - $min_match_context > 0) ? $pos - $min_match_context : 0;
  104. $starts[] = $start;
  105. $stop = $pos + $word_len + $min_match_context;
  106. $lengths[] = $stop - $start;
  107. }
  108. }
  109. $offsets = search_consolidate_substrings($starts, $lengths);
  110. // figure out if we can adjust the offsets and lengths
  111. // in order to return more context
  112. $total_length = array_sum($offsets);
  113. $add_length = 0;
  114. if ($total_length < $max_length) {
  115. $add_length = floor((($max_length - $total_length) / count($offsets)) / 2);
  116. $starts = array();
  117. $lengths = array();
  118. foreach ($offsets as $offset => $length) {
  119. $start = ($offset - $add_length > 0) ? $offset - $add_length : 0;
  120. $length = $length + $add_length;
  121. $starts[] = $start;
  122. $lengths[] = $length;
  123. }
  124. $offsets = search_consolidate_substrings($starts, $lengths);
  125. }
  126. // sort by order of string size descending (which is roughly
  127. // the proximity of matched terms) so we can keep the
  128. // substrings with terms closest together and discard
  129. // the others as needed to fit within $max_length.
  130. arsort($offsets);
  131. $return_strs = array();
  132. $total_length = 0;
  133. foreach ($offsets as $start => $length) {
  134. $string = trim(elgg_substr($haystack, $start, $length));
  135. // continue past if adding this substring exceeds max length
  136. if ($total_length + $length > $max_length) {
  137. continue;
  138. }
  139. $total_length += $length;
  140. $return_strs[$start] = $string;
  141. }
  142. // put the strings in order of occurence
  143. ksort($return_strs);
  144. // add ...s where needed
  145. $return = implode('...', $return_strs);
  146. if (!array_key_exists(0, $return_strs)) {
  147. $return = "...$return";
  148. }
  149. // add to end of string if last substring doesn't hit the end.
  150. $starts = array_keys($return_strs);
  151. $last_pos = $starts[count($starts)-1];
  152. if ($last_pos + elgg_strlen($return_strs[$last_pos]) < $haystack_length) {
  153. $return .= '...';
  154. }
  155. $return = search_highlight_words($words, $return);
  156. return $return;
  157. }
  158. /**
  159. * Takes an array of offsets and lengths and consolidates any
  160. * overlapping entries, returning an array of new offsets and lengths
  161. *
  162. * Offsets and lengths are specified in separate arrays because of possible
  163. * index collisions with the offsets.
  164. *
  165. * @param array $offsets
  166. * @param array $lengths
  167. * @return array
  168. */
  169. function search_consolidate_substrings($offsets, $lengths) {
  170. // sort offsets by occurence
  171. asort($offsets, SORT_NUMERIC);
  172. // reset the indexes maintaining association with the original offsets.
  173. $offsets = array_merge($offsets);
  174. $new_lengths = array();
  175. foreach ($offsets as $i => $offset) {
  176. $new_lengths[] = $lengths[$i];
  177. }
  178. $lengths = $new_lengths;
  179. $return = array();
  180. $count = count($offsets);
  181. for ($i=0; $i<$count; $i++) {
  182. $offset = $offsets[$i];
  183. $length = $lengths[$i];
  184. $end_pos = $offset + $length;
  185. // find the next entry that doesn't overlap
  186. while(array_key_exists($i+1, $offsets) && $end_pos > $offsets[$i+1]) {
  187. $i++;
  188. if (!array_key_exists($i, $offsets)) {
  189. break;
  190. }
  191. $end_pos = $lengths[$i] + $offsets[$i];
  192. }
  193. $length = $end_pos - $offset;
  194. // will never have a colliding offset, so can return as a single array
  195. $return[$offset] = $length;
  196. }
  197. return $return;
  198. }
  199. /**
  200. * Safely highlights the words in $words found in $string avoiding recursion
  201. *
  202. * @param array $words
  203. * @param string $string
  204. * @return string
  205. */
  206. function search_highlight_words($words, $string) {
  207. $i = 1;
  208. $replace_html = array(
  209. 'strong' => rand(10000, 99999),
  210. 'class' => rand(10000, 99999),
  211. 'searchMatch' => rand(10000, 99999),
  212. 'searchMatchColor' => rand(10000, 99999)
  213. );
  214. foreach ($words as $word) {
  215. $search = "/($word)/i";
  216. // must replace with placeholders in case one of the search terms is
  217. // in the html string.
  218. // later, will replace the placeholders with the actual html.
  219. // Yeah this is hacky. I'm tired.
  220. $strong = $replace_html['strong'];
  221. $class = $replace_html['class'];
  222. $searchMatch = $replace_html['searchMatch'];
  223. $searchMatchColor = $replace_html['searchMatchColor'];
  224. $replace = "<$strong $class=\"$searchMatch $searchMatchColor{$i}\">$1</$strong>";
  225. $string = preg_replace($search, $replace, $string);
  226. $i++;
  227. }
  228. foreach ($replace_html as $replace => $search) {
  229. $string = str_replace($search, $replace, $string);
  230. }
  231. return $string;
  232. }
  233. /**
  234. * Returns a query with stop and too short words removed.
  235. * (Unless the entire query is < ft_min_word_chars, in which case
  236. * it's taken literally.)
  237. *
  238. * @param array $query
  239. * @param str $format Return as an array or a string
  240. * @return mixed
  241. */
  242. function search_remove_ignored_words($query, $format = 'array') {
  243. global $CONFIG;
  244. // don't worry about "s or boolean operators
  245. $query = str_replace(array('"', '-', '+', '~'), '', stripslashes(strip_tags($query)));
  246. $words = explode(' ', $query);
  247. $min_chars = $CONFIG->search_info['min_chars'];
  248. // if > ft_min_word we're not running in literal mode.
  249. if (elgg_strlen($query) >= $min_chars) {
  250. // clean out any words that are ignored by mysql
  251. foreach ($words as $i => $word) {
  252. if (elgg_strlen($word) < $min_chars) {
  253. unset ($words[$i]);
  254. }
  255. }
  256. }
  257. if ($format == 'string') {
  258. return implode(' ', $words);
  259. }
  260. return $words;
  261. }
  262. /**
  263. * Passes results, and original params to the view functions for
  264. * search type.
  265. *
  266. * @param array $results
  267. * @param array $params
  268. * @param string $view_type = listing, entity or listing
  269. * @return string
  270. */
  271. function search_get_search_view($params, $view_type) {
  272. switch ($view_type) {
  273. case 'listing':
  274. case 'entity':
  275. case 'layout':
  276. break;
  277. default:
  278. return FALSE;
  279. }
  280. $view_order = array();
  281. // check if there's a special search listing view for this type:subtype
  282. if (isset($params['type']) && $params['type'] && isset($params['subtype']) && $params['subtype']) {
  283. $view_order[] = "search/{$params['type']}/{$params['subtype']}/$view_type";
  284. }
  285. // also check for the default type
  286. if (isset($params['type']) && $params['type']) {
  287. $view_order[] = "search/{$params['type']}/$view_type";
  288. }
  289. // check search types
  290. if (isset($params['search_type']) && $params['search_type']) {
  291. $view_order[] = "search/{$params['search_type']}/$view_type";
  292. }
  293. // finally default to a search listing default
  294. $view_order[] = "search/$view_type";
  295. foreach ($view_order as $view) {
  296. if (elgg_view_exists($view)) {
  297. return $view;
  298. }
  299. }
  300. return FALSE;
  301. }
  302. /**
  303. * Returns a where clause for a search query.
  304. *
  305. * @param str $table Prefix for table to search on
  306. * @param array $fields Fields to match against
  307. * @param array $params Original search params
  308. * @return str
  309. */
  310. function search_get_where_sql($table, $fields, $params, $use_fulltext = TRUE) {
  311. global $CONFIG;
  312. $query = $params['query'];
  313. // add the table prefix to the fields
  314. foreach ($fields as $i => $field) {
  315. if ($table) {
  316. $fields[$i] = "$table.$field";
  317. }
  318. }
  319. // if we're not using full text, rewrite the query for bool mode.
  320. // exploiting a feature(ish) of bool mode where +-word is the same as -word
  321. if (!$use_fulltext) {
  322. $query = '+' . str_replace(' ', ' +', $query);
  323. }
  324. // if query is shorter than the min for fts words
  325. // it's likely a single acronym or similar
  326. // switch to literal mode
  327. if (elgg_strlen($query) < $CONFIG->search_info['min_chars']) {
  328. $likes = array();
  329. $query = sanitise_string($query);
  330. foreach ($fields as $field) {
  331. $likes[] = "$field LIKE '%$query%'";
  332. }
  333. $likes_str = implode(' OR ', $likes);
  334. $where = "($likes_str)";
  335. } else {
  336. // if using advanced or paired "s, switch into boolean mode
  337. if (!$use_fulltext
  338. || (isset($params['advanced_search']) && $params['advanced_search'])
  339. || elgg_substr_count($query, '"') >= 2 ) {
  340. $options = 'IN BOOLEAN MODE';
  341. } else {
  342. // natural language mode is default and this keyword isn't supported in < 5.1
  343. //$options = 'IN NATURAL LANGUAGE MODE';
  344. $options = '';
  345. }
  346. // if short query, use query expansion.
  347. // @todo doesn't seem to be working well.
  348. if (elgg_strlen($query) < 5) {
  349. //$options .= ' WITH QUERY EXPANSION';
  350. }
  351. $query = sanitise_string($query);
  352. $fields_str = implode(',', $fields);
  353. $where = "(MATCH ($fields_str) AGAINST ('$query' $options))";
  354. }
  355. return $where;
  356. }
  357. /**
  358. * Returns ORDER BY sql for insertion into elgg_get_entities().
  359. *
  360. * @param str $entities_table Prefix for entities table.
  361. * @param str $type_table Prefix for the type table.
  362. * @param str $sort ORDER BY part
  363. * @param str $order ASC or DESC
  364. * @return str
  365. */
  366. function search_get_order_by_sql($entities_table, $type_table, $sort, $order) {
  367. $on = NULL;
  368. switch ($sort) {
  369. default:
  370. case 'relevance':
  371. // default is relevance descending.
  372. // acending relevancy is silly and complicated.
  373. $on = '';
  374. break;
  375. case 'created':
  376. $on = "$entities_table.time_created";
  377. break;
  378. case 'updated':
  379. $on = "$entities_table.time_updated";
  380. break;
  381. case 'action_on':
  382. // @todo not supported yet in core
  383. $on = '';
  384. break;
  385. case 'alpha':
  386. // @todo not support yet because both title
  387. // and name columns are used for this depending
  388. // on the entity, which we don't always know. >:O
  389. break;
  390. }
  391. $order = strtolower($order);
  392. if ($order != 'asc' && $order != 'desc') {
  393. $order = 'DESC';
  394. }
  395. if ($on) {
  396. $order_by = "$on $order";
  397. } else {
  398. $order_by = '';
  399. }
  400. return $order_by;
  401. }
  402. /** Register init system event **/
  403. register_elgg_event_handler('init','system','search_init');