PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/modules/Search/search.tools.php

https://github.com/tedkulp/cmsmadesimple-2-0
PHP | 372 lines | 274 code | 52 blank | 46 comment | 32 complexity | 51afc2ea0c8ad87243b59b344325e3c1 MD5 | raw file
  1. <?php
  2. #CMS - CMS Made Simple
  3. #(c)2004 by Ted Kulp (wishy@users.sf.net)
  4. #This project's homepage is: http://cmsmadesimple.sf.net
  5. #
  6. #This program is free software; you can redistribute it and/or modify
  7. #it under the terms of the GNU General Public License as published by
  8. #the Free Software Foundation; either version 2 of the License, or
  9. #(at your option) any later version.
  10. #
  11. #This program is distributed in the hope that it will be useful,
  12. #but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. #GNU General Public License for more details.
  15. #You should have received a copy of the GNU General Public License
  16. #along with this program; if not, write to the Free Software
  17. #Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. function search_StemPhrase(&$module,$phrase)
  19. {
  20. // strip out smarty tags
  21. $phrase = preg_replace('/\{.*?\}/', '', $phrase);
  22. // add spaces between tags
  23. $phrase = str_replace("<"," <",$phrase);
  24. $phrase = str_replace(">","> ",$phrase);
  25. // strip out html and php stuff
  26. $phrase = strip_tags($phrase);
  27. // escape meta characters
  28. $phrase = preg_quote($phrase);
  29. // split into words
  30. // strtolower isn't friendly to other charsets
  31. $phrase = preg_replace("/([A-Z]+)/e",
  32. "strtolower('\\1')",
  33. $phrase);
  34. //$words = preg_split('/[\s,!.()+-\/\\\\]+/', $phrase);
  35. $words = preg_split('/[\s,!.;:\?()+-\/\\\\]+/', $phrase);
  36. // ignore stop words
  37. $words = $module->RemoveStopWordsFromArray($words);
  38. $stemmer = new PorterStemmer();
  39. // stem words
  40. $stemmed_words = array();
  41. $stem_pref = $module->GetPreference('usestemming', 'false');
  42. foreach ($words as $word)
  43. {
  44. //trim words get rid of wrapping quotes
  45. $word = trim($word, ' \'"');
  46. if (strlen($word) <= 0)
  47. {
  48. continue;
  49. }
  50. if ($stem_pref == 'true')
  51. $stemmed_words[] = $stemmer->stem($word, true);
  52. else
  53. $stemmed_words[] = $word;
  54. }
  55. return $stemmed_words;
  56. }
  57. function search_AddWords(&$obj, $module = 'Search', $id = -1, $attr = '', $content = '', $expires = NULL)
  58. {
  59. $obj->DeleteWords($module, $id, $attr);
  60. $db =& $obj->GetDb();
  61. $non_indexable = strpos($content, NON_INDEXABLE_CONTENT);
  62. if( $non_indexable ) return;
  63. @$obj->SendEvent('SearchItemAdded', array($module, $id, $attr, &$content, $expires));
  64. if ($content != "")
  65. {
  66. //Clean up the content
  67. $stemmed_words = $obj->StemPhrase($content);
  68. $words = array_count_values($stemmed_words);
  69. $q = "SELECT id FROM ".cms_db_prefix().'module_search_items WHERE module_name=?';
  70. $parms = array($module);
  71. if( $id != -1 )
  72. {
  73. $q .= " AND content_id=?";
  74. $parms[] = $id;
  75. }
  76. if( $attr != '' )
  77. {
  78. $q .= " AND extra_attr=?";
  79. $parms[] = $attr;
  80. }
  81. $dbresult = $db->Execute($q, $parms);
  82. if ($dbresult && $dbresult->RecordCount() > 0 && $row = $dbresult->FetchRow())
  83. {
  84. $itemid = $row['id'];
  85. }
  86. else
  87. {
  88. $itemid = $db->GenID(cms_db_prefix()."module_search_items_seq");
  89. $db->Execute('INSERT INTO '.cms_db_prefix().'module_search_items (id, module_name, content_id, extra_attr, expires) VALUES (?,?,?,?,?)', array($itemid, $module, $id, $attr, ($expires != NULL ? trim($db->DBTimeStamp($expires), "'") : NULL) ));
  90. }
  91. foreach ($words as $word=>$count)
  92. {
  93. $db->Execute('INSERT INTO '.cms_db_prefix().'module_search_index (item_id, word, count) VALUES (?,?,?)', array($itemid, $word, $count));
  94. }
  95. }
  96. }
  97. function search_DeleteWords(&$obj, $module = 'Search', $id = -1, $attr = '')
  98. {
  99. $db =& $obj->GetDb();
  100. $parms = array( $module );
  101. $q = "DELETE FROM ".cms_db_prefix().'module_search_items WHERE module_name=?';
  102. if( $id != -1 )
  103. {
  104. $q .= " AND content_id=?";
  105. $parms[] = $id;
  106. }
  107. if( $attr != '' )
  108. {
  109. $q .= " AND extra_attr=?";
  110. $parms[] = $attr;
  111. }
  112. $db->Execute($q, $parms);
  113. $db->Execute('DELETE FROM '.cms_db_prefix().'module_search_index WHERE item_id NOT IN (SELECT id FROM '.cms_db_prefix().'module_search_items)');
  114. @$obj->SendEvent('SearchItemDeleted', array($module, $id, $attr));
  115. }
  116. function search_Reindex(&$module)
  117. {
  118. @set_time_limit(999);
  119. $module->DeleteAllWords();
  120. global $gCms;
  121. $templateops =& $gCms->GetTemplateOperations();
  122. $alltemplates = $templateops->LoadTemplates();
  123. reset($alltemplates);
  124. while (list($key) = each($alltemplates))
  125. {
  126. $onetemplate =& $alltemplates[$key];
  127. //$module->EditTemplatePost($onetemplate);
  128. $params = array('template' => &$onetemplate,
  129. 'forceindexcontent'=>1);
  130. $module->DoEvent('Core', 'EditTemplatePost', $params);
  131. }
  132. $gcbops =& $gCms->GetGlobalContentOperations();
  133. $allblobs = $gcbops->LoadHtmlBlobs();
  134. reset($allblobs);
  135. while (list($key) = each($allblobs))
  136. {
  137. $oneblob =& $allblobs[$key];
  138. //$module->EditHtmlBlobPost($oneblob);
  139. $params = array('global_content' => &$oneblob);
  140. $module->DoEvent('Core', 'EditGlobalContentPost', $params);
  141. }
  142. foreach($gCms->modules as $key=>$value)
  143. {
  144. if ($gCms->modules[$key]['installed'] == true &&
  145. $gCms->modules[$key]['active'] == true)
  146. {
  147. if (method_exists($gCms->modules[$key]['object'], 'SearchReindex'))
  148. {
  149. $gCms->modules[$key]['object']->SearchReindex($module);
  150. }
  151. }
  152. }
  153. }
  154. function search_DoEvent(&$module, $originator, $eventname, &$params )
  155. {
  156. if ($originator == 'Core')
  157. {
  158. switch ($eventname)
  159. {
  160. case 'ContentEditPost':
  161. $content =& $params['content'];
  162. if (!isset($content)) return;
  163. $db =& $module->GetDb();
  164. $q = "SELECT id FROM ".cms_db_prefix()."module_search_items WHERE
  165. extra_attr = ? AND content_id = ?";
  166. $template_indexed = $db->GetOne( $q, array( 'template', $content->TemplateId() ));
  167. if( !$template_indexed )
  168. {
  169. $module->DeleteWords($module->GetName(), $content->Id(), 'content');
  170. break;
  171. }
  172. //Only index content if it's active
  173. // and searchable.
  174. // assume by default that it is searchable
  175. $tmp = $content->GetPropertyValue('searchable');
  176. if( $tmp == '' ) $tmp = 1;
  177. if ($content->Active() && $tmp )
  178. {
  179. //Weight the title and menu text higher
  180. $text = str_repeat(' '.$content->Name(), 2) . ' ';
  181. $text .= str_repeat(' '.$content->MenuText(), 2) . ' ';
  182. $props = $content->Properties();
  183. foreach ($props->mPropertyValues as $k=>$v)
  184. {
  185. $text .= $v.' ';
  186. }
  187. // here check for a string to see
  188. // if module content is indexable at all
  189. $non_indexable = strpos($text, NON_INDEXABLE_CONTENT);
  190. if (! $non_indexable)
  191. {
  192. $module->AddWords($module->GetName(), $content->Id(), 'content', $text);
  193. }
  194. else
  195. {
  196. $module->DeleteWords($module->GetName(), $content->Id(), 'content');
  197. }
  198. }
  199. else
  200. {
  201. //Just in case the active flag was turned off
  202. $module->DeleteWords($module->GetName(), $content->Id(), 'content');
  203. }
  204. break;
  205. case 'ContentDeletePost':
  206. $content =& $params['content'];
  207. if (!isset($content)) return;
  208. $module->DeleteWords($module->GetName(), $content->Id(), 'content');
  209. break;
  210. case 'AddTemplatePost':
  211. $template =& $params['template'];
  212. if( $template->active != false )
  213. $module->AddWords($module->GetName(), $template->id, 'template', $template->content);
  214. else
  215. $module->DeleteWords($module->GetName(), $template->id, 'template');
  216. break;
  217. case 'EditTemplatePost':
  218. $template =& $params['template'];
  219. if( $template->active != false )
  220. {
  221. // here check for a string to see
  222. // if this content is indexable at all
  223. $non_indexable = strpos($template->content, NON_INDEXABLE_CONTENT);
  224. $db =& $module->GetDb();
  225. // check if the page was indexed already or not
  226. $q = "SELECT id FROM ".cms_db_prefix()."module_search_items WHERE content_id = ?
  227. AND extra_attr = ?";
  228. $was_indexed = $db->GetOne( $q, array( $template->id, 'template' ) );
  229. // find all of the (active) pages tied to a template
  230. $q = "SELECT content_id FROM ".cms_db_prefix()."content WHERE active > 0 AND template_id = ?";
  231. $dbresult =& $db->Execute( $q, array( $template->id ) );
  232. if( ! $non_indexable )
  233. {
  234. $module->AddWords($module->GetName(), $template->id, 'template', $template->content);
  235. }
  236. else
  237. {
  238. $module->DeleteWords($module->GetName(), $template->id, 'template');
  239. }
  240. if( ($non_indexable && $was_indexed) )
  241. {
  242. // we can't index the template, and it was indexed
  243. // meaning we need to delete all indexes from
  244. // the children.
  245. $q2 = "DELETE FROM ".cms_db_prefix()."module_search_items WHERE
  246. extra_attr = ? AND content_id IN (";
  247. $parms = array( 'content' );
  248. // delete them all from the index
  249. while( $dbresult && !$dbresult->EOF )
  250. {
  251. $q2 .= "?,";
  252. $parms[] = $dbresult->fields['content_id'];
  253. $dbresult->MoveNext();
  254. }
  255. $q2 = substr($q2,0,strlen($q2)-1);
  256. $q2 .= ")";
  257. $db->Execute( $q2, $parms );
  258. $db->Execute('DELETE FROM '.cms_db_prefix().'module_search_index WHERE item_id NOT IN (SELECT id FROM '.cms_db_prefix().'module_search_items)');
  259. }
  260. else
  261. {
  262. if (!$non_indexable && !$was_indexed)
  263. {
  264. // The template is indexable, and was not indexed previously
  265. // so we have to index it's children.
  266. while( $dbresult && !$dbresult->EOF )
  267. {
  268. global $gCms;
  269. $contentops =& $gCms->GetContentOperations();
  270. $onecontent =& $contentops->LoadContentFromId($dbresult->fields['content_id']);
  271. $parms = array('content'=>&$onecontent);
  272. $module->DoEvent('Core','ContentEditPost',$parms);
  273. $dbresult->MoveNext();
  274. }
  275. }
  276. }
  277. }
  278. else
  279. {
  280. // template is inactive
  281. $module->DeleteWords($module->GetName(), $template->id, 'template');
  282. }
  283. break;
  284. case 'DeleteTemplatePost':
  285. $template =& $params['template'];
  286. $module->DeleteWords($module->GetName(), $template->id, 'template');
  287. break;
  288. case 'AddGlobalContentPost':
  289. $global_content =& $params['global_content'];
  290. $module->AddWords($module->GetName(), $global_content->id, 'global_content', $global_content->content);
  291. break;
  292. case 'EditGlobalContentPost':
  293. $global_content =& $params['global_content'];
  294. $module->AddWords($module->GetName(), $global_content->id, 'global_content', $global_content->content);
  295. break;
  296. case 'DeleteGlobalContentPost':
  297. $global_content =& $params['global_content'];
  298. $module->DeleteWords($module->GetName(), $global_content->id, 'global_content');
  299. break;
  300. case 'ModuleUninstalled':
  301. $module_name =& $params['name'];
  302. $module->DeleteWords($module_name);
  303. break;
  304. }
  305. }
  306. }
  307. #
  308. #
  309. ?>