PageRenderTime 44ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/includes/functions_search.php

http://github.com/MightyGorgon/icy_phoenix
PHP | 416 lines | 321 code | 59 blank | 36 comment | 78 complexity | 7074e0c83ca58ad79a37c5af891f24bd MD5 | raw file
Possible License(s): AGPL-1.0
  1. <?php
  2. /**
  3. *
  4. * @package Icy Phoenix
  5. * @version $Id$
  6. * @copyright (c) 2008 Icy Phoenix
  7. * @license http://opensource.org/licenses/gpl-license.php GNU Public License
  8. *
  9. */
  10. /**
  11. *
  12. * @Icy Phoenix is based on phpBB
  13. * @copyright (c) 2008 phpBB Group
  14. *
  15. */
  16. if (!defined('IN_ICYPHOENIX'))
  17. {
  18. die('Hacking attempt');
  19. }
  20. function clean_words($mode, &$entry, &$stopword_list, &$synonym_list)
  21. {
  22. static $drop_char_match = array('^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!');
  23. static $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' , ' ', ' ', ' ', ' ', ' ', ' ');
  24. $entry = ' ' . strip_tags(strtolower($entry)) . ' ';
  25. if ($mode == 'post')
  26. {
  27. // Replace line endings by a space
  28. $entry = preg_replace('/[\n\r]/is', ' ', $entry);
  29. // HTML entities like &nbsp;
  30. $entry = preg_replace('/\b&[a-z]+;\b/', ' ', $entry);
  31. // Remove URL's
  32. $entry = preg_replace('/\b[a-z0-9]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/]+)?/', ' ', $entry);
  33. // Quickly remove BBcode.
  34. $entry = preg_replace('/\[img:[a-z0-9]{10,}\].*?\[\/img:[a-z0-9]{10,}\]/', ' ', $entry);
  35. $entry = preg_replace('/\[\/?url(=.*?)?\]/', ' ', $entry);
  36. $entry = preg_replace('/\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]/', ' ', $entry);
  37. }
  38. elseif ($mode == 'search')
  39. {
  40. $entry = str_replace(' +', ' and ', $entry);
  41. $entry = str_replace(' -', ' not ', $entry);
  42. }
  43. //
  44. // Filter out strange characters like ^, $, &, change "it's" to "its"
  45. //
  46. for($i = 0; $i < sizeof($drop_char_match); $i++)
  47. {
  48. $entry = str_replace($drop_char_match[$i], $drop_char_replace[$i], $entry);
  49. }
  50. if ($mode == 'post')
  51. {
  52. $entry = str_replace('*', ' ', $entry);
  53. // 'words' that consist of <3 or >20 characters are removed.
  54. $entry = preg_replace('/[ ]([\S]{1,2}|[\S]{21,})[ ]/',' ', $entry);
  55. }
  56. if (!empty($stopword_list))
  57. {
  58. for ($j = 0; $j < sizeof($stopword_list); $j++)
  59. {
  60. $stopword = trim($stopword_list[$j]);
  61. if ($mode == 'post' || ($stopword != 'not' && $stopword != 'and' && $stopword != 'or'))
  62. {
  63. $entry = str_replace(' ' . trim($stopword) . ' ', ' ', $entry);
  64. }
  65. }
  66. }
  67. if (!empty($synonym_list))
  68. {
  69. for ($j = 0; $j < sizeof($synonym_list); $j++)
  70. {
  71. list($replace_synonym, $match_synonym) = explode(' ', trim(strtolower($synonym_list[$j])));
  72. if ($mode == 'post' || ($match_synonym != 'not' && $match_synonym != 'and' && $match_synonym != 'or'))
  73. {
  74. $entry = str_replace(' ' . trim($match_synonym) . ' ', ' ' . trim($replace_synonym) . ' ', $entry);
  75. }
  76. }
  77. }
  78. return $entry;
  79. }
  80. function split_words($entry, $mode = 'post')
  81. {
  82. // If you experience problems with the new method, uncomment this block.
  83. /*
  84. $rex = ($mode == 'post') ? "/\b([\w±µ-ÿ][\w±µ-ÿ']*[\w±µ-ÿ]+|[\w±µ-ÿ]+?)\b/" : '/(\*?[a-z0-9±µ-ÿ]+\*?)|\b([a-z0-9±µ-ÿ]+)\b/';
  85. preg_match_all($rex, $entry, $split_entries);
  86. return $split_entries[1];
  87. */
  88. // Trim 1+ spaces to one space and split this trimmed string into words.
  89. return explode(' ', trim(preg_replace('#\s+#', ' ', $entry)));
  90. }
  91. function add_search_words($mode, $post_id, $post_text, $post_title = '')
  92. {
  93. global $db, $config, $lang;
  94. global $stopwords_array, $synonyms_array;
  95. stopwords_synonyms_init();
  96. $search_raw_words = array();
  97. $search_raw_words['text'] = split_words(clean_words('post', $post_text, $stopwords_array, $synonyms_array));
  98. $search_raw_words['title'] = split_words(clean_words('post', $post_title, $stopwords_array, $synonyms_array));
  99. @set_time_limit(0);
  100. $word = array();
  101. $word_insert_sql = array();
  102. while (list($word_in, $search_matches) = @each($search_raw_words))
  103. {
  104. $word_insert_sql[$word_in] = '';
  105. if (!empty($search_matches))
  106. {
  107. for ($i = 0; $i < sizeof($search_matches); $i++)
  108. {
  109. $search_matches[$i] = trim($search_matches[$i]);
  110. if($search_matches[$i] != '')
  111. {
  112. $word[] = $search_matches[$i];
  113. if (!strstr($word_insert_sql[$word_in], "'" . $search_matches[$i] . "'"))
  114. {
  115. $word_insert_sql[$word_in] .= ($word_insert_sql[$word_in] != "") ? ", '" . $search_matches[$i] . "'" : "'" . $search_matches[$i] . "'";
  116. }
  117. }
  118. }
  119. }
  120. }
  121. if (sizeof($word))
  122. {
  123. sort($word);
  124. $prev_word = '';
  125. $word_text_sql = '';
  126. $temp_word = array();
  127. for($i = 0; $i < sizeof($word); $i++)
  128. {
  129. if ($word[$i] != $prev_word)
  130. {
  131. $temp_word[] = $word[$i];
  132. $word_text_sql .= (($word_text_sql != '') ? ', ' : '') . "'" . $word[$i] . "'";
  133. }
  134. $prev_word = $word[$i];
  135. }
  136. $word = $temp_word;
  137. $check_words = array();
  138. $value_sql = '';
  139. $match_word = array();
  140. for ($i = 0; $i < sizeof($word); $i++)
  141. {
  142. $new_match = true;
  143. if (isset($check_words[$word[$i]]))
  144. {
  145. $new_match = false;
  146. }
  147. if ($new_match)
  148. {
  149. $value_sql .= (($value_sql != '') ? ', ' : '') . '(\'' . $word[$i] . '\', 0)';
  150. }
  151. }
  152. if ($value_sql != '')
  153. {
  154. $sql = "INSERT IGNORE INTO " . SEARCH_WORD_TABLE . " (word_text, word_common)
  155. VALUES $value_sql";
  156. $db->sql_query($sql);
  157. }
  158. }
  159. while(list($word_in, $match_sql) = @each($word_insert_sql))
  160. {
  161. $title_match = ($word_in == 'title') ? 1 : 0;
  162. if ($match_sql != '')
  163. {
  164. $sql = "INSERT INTO " . SEARCH_MATCH_TABLE . " (post_id, word_id, title_match)
  165. SELECT $post_id, word_id, $title_match
  166. FROM " . SEARCH_WORD_TABLE . "
  167. WHERE word_text IN ($match_sql)";
  168. $db->sql_query($sql);
  169. }
  170. }
  171. if ($mode == 'single')
  172. {
  173. remove_common('single', 4/10, $word);
  174. }
  175. return;
  176. }
  177. //
  178. // Check if specified words are too common now
  179. //
  180. function remove_common($mode, $fraction, $word_id_list = array())
  181. {
  182. global $db;
  183. $sql = "SELECT COUNT(post_id) AS total_posts
  184. FROM " . POSTS_TABLE;
  185. $result = $db->sql_query($sql);
  186. $row = $db->sql_fetchrow($result);
  187. if ($row['total_posts'] >= 100)
  188. {
  189. $common_threshold = floor($row['total_posts'] * $fraction);
  190. if ($mode == 'single' && sizeof($word_id_list))
  191. {
  192. $word_id_sql = '';
  193. for($i = 0; $i < sizeof($word_id_list); $i++)
  194. {
  195. $word_id_sql .= (($word_id_sql != '') ? ', ' : '') . "'" . $word_id_list[$i] . "'";
  196. }
  197. $sql = "SELECT m.word_id
  198. FROM " . SEARCH_MATCH_TABLE . " m, " . SEARCH_WORD_TABLE . " w
  199. WHERE w.word_text IN ($word_id_sql)
  200. AND m.word_id = w.word_id
  201. GROUP BY m.word_id
  202. HAVING COUNT(m.word_id) > $common_threshold";
  203. }
  204. else
  205. {
  206. $sql = "SELECT word_id
  207. FROM " . SEARCH_MATCH_TABLE . "
  208. GROUP BY word_id
  209. HAVING COUNT(word_id) > $common_threshold";
  210. }
  211. $result = $db->sql_query($sql);
  212. $common_word_id = '';
  213. while ($row = $db->sql_fetchrow($result))
  214. {
  215. $common_word_id .= (($common_word_id != '') ? ', ' : '') . $row['word_id'];
  216. }
  217. $db->sql_freeresult($result);
  218. if ($common_word_id != '')
  219. {
  220. $sql = "UPDATE " . SEARCH_WORD_TABLE . "
  221. SET word_common = " . TRUE . "
  222. WHERE word_id IN ($common_word_id)";
  223. $db->sql_query($sql);
  224. $sql = "DELETE FROM " . SEARCH_MATCH_TABLE . "
  225. WHERE word_id IN ($common_word_id)";
  226. $db->sql_query($sql);
  227. }
  228. }
  229. return;
  230. }
  231. function remove_search_post($post_id_sql, $remove_subject = true, $remove_message = true)
  232. {
  233. global $db, $cache;
  234. $words_removed = false;
  235. $where_sql = '';
  236. if (!$remove_subject || !$remove_message)
  237. {
  238. $where_sql = ' AND title_match = '. (($remove_subject) ? 1 : 0);
  239. }
  240. $sql = "SELECT word_id
  241. FROM " . SEARCH_MATCH_TABLE . "
  242. WHERE post_id IN ($post_id_sql)
  243. $where_sql
  244. GROUP BY word_id";
  245. $db->sql_return_on_error(true);
  246. $result = $db->sql_query($sql);
  247. $db->sql_return_on_error(false);
  248. if ($result)
  249. {
  250. $word_id_sql = '';
  251. while ($row = $db->sql_fetchrow($result))
  252. {
  253. $word_id_sql .= ($word_id_sql != '') ? ', ' . $row['word_id'] : $row['word_id'];
  254. }
  255. $sql = "SELECT word_id
  256. FROM " . SEARCH_MATCH_TABLE . "
  257. WHERE word_id IN ($word_id_sql)
  258. $where_sql
  259. GROUP BY word_id
  260. HAVING COUNT(word_id) = 1";
  261. $db->sql_return_on_error(true);
  262. $result = $db->sql_query($sql);
  263. $db->sql_return_on_error(false);
  264. if ($result)
  265. {
  266. $word_id_sql = '';
  267. while ($row = $db->sql_fetchrow($result))
  268. {
  269. $word_id_sql .= ($word_id_sql != '') ? ', ' . $row['word_id'] : $row['word_id'];
  270. }
  271. if ($word_id_sql != '')
  272. {
  273. $sql = "DELETE FROM " . SEARCH_WORD_TABLE . "
  274. WHERE word_id IN ($word_id_sql)";
  275. $db->sql_query($sql);
  276. $words_removed = $db->sql_affectedrows();
  277. }
  278. }
  279. }
  280. $sql = "DELETE FROM " . SEARCH_MATCH_TABLE . "
  281. WHERE post_id IN ($post_id_sql) $where_sql";
  282. $db->sql_query($sql);
  283. return $words_removed;
  284. }
  285. /*
  286. * Username search
  287. */
  288. function username_search($search_match, $ajax_search = false)
  289. {
  290. global $db, $config, $template, $images, $theme, $user, $lang;
  291. global $starttime, $gen_simple_header;
  292. $username_list = '';
  293. if (!empty($search_match))
  294. {
  295. $username_search = preg_replace('/\*/', '%', phpbb_clean_username($search_match));
  296. $sql = "SELECT username
  297. FROM " . USERS_TABLE . "
  298. WHERE LOWER(username) LIKE '" . $db->sql_escape(strtolower($username_search)) . "' AND user_id <> " . ANONYMOUS . "
  299. ORDER BY username";
  300. $result = $db->sql_query($sql);
  301. if ($row = $db->sql_fetchrow($result))
  302. {
  303. do
  304. {
  305. $username_list .= '<option value="' . htmlspecialchars($row['username']) . '">' . htmlspecialchars($row['username']) . '</option>';
  306. }
  307. while ($row = $db->sql_fetchrow($result));
  308. }
  309. else
  310. {
  311. $username_list .= '<option>' . $lang['No_match'] . '</option>';
  312. }
  313. $db->sql_freeresult($result);
  314. }
  315. $target_form_name = preg_replace('/[^A-Za-z0-9-_]+/', '', request_var('target_form_name', 'post'));
  316. $target_element_name = preg_replace('/[^A-Za-z0-9-_]+/', '', request_var('target_element_name', 'username'));
  317. $s_hidden_fields = build_hidden_fields(array(
  318. 'target_form_name' => $target_form_name,
  319. 'target_element_name' => $target_element_name,
  320. )
  321. );
  322. $template->assign_vars(array(
  323. 'USERNAME' => (!empty($search_match)) ? phpbb_clean_username($search_match) : '',
  324. 'L_CLOSE_WINDOW' => $lang['Close_window'],
  325. 'L_SEARCH_USERNAME' => $lang['FIND_USERNAME'],
  326. 'L_UPDATE_USERNAME' => $lang['Select_username'],
  327. 'L_SELECT' => $lang['Select'],
  328. 'L_SEARCH' => $lang['Search'],
  329. 'L_SEARCH_EXPLAIN' => $lang['Search_author_explain'],
  330. 'L_CLOSE_WINDOW' => $lang['Close_window'],
  331. 'S_TARGET_FORM_NAME' => $target_form_name,
  332. 'S_TARGET_ELEMENT_NAME' => $target_element_name,
  333. 'S_HIDDEN_FIELDS' => $s_hidden_fields,
  334. 'S_USERNAME_OPTIONS' => $username_list,
  335. 'S_SEARCH_ACTION' => append_sid(CMS_PAGE_SEARCH . '?mode=searchuser')
  336. )
  337. );
  338. if ($ajax_search = true)
  339. {
  340. if ($username_list == '')
  341. {
  342. $template->assign_var('USERNAME_LIST_VIS', 'style="display: none;"');
  343. }
  344. }
  345. else
  346. {
  347. if ($username_list != '')
  348. {
  349. $template->assign_block_vars('switch_select_name', array());
  350. }
  351. }
  352. $gen_simple_header = true;
  353. full_page_generation('search_username.tpl', $lang['Search'], '', '');
  354. return;
  355. }
  356. ?>