PageRenderTime 33ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/classes/class.rexsearch.inc.php

https://github.com/xong/rexsearch
PHP | 2361 lines | 1640 code | 345 blank | 376 comment | 181 complexity | e3cae78c72e0ca8b27c40a1591236852 MD5 | raw file
Possible License(s): LGPL-2.1
  1. <?php
  2. /**
  3. * Class rexsearch
  4. *
  5. * This class is still being tested.
  6. * Please report errors at http://forum.redaxo.de.
  7. *
  8. * @author Robert Rupf
  9. * @package rexsearch
  10. */
  11. /**
  12. *
  13. */
  14. define('A587_ART_EXCLUDED',0);
  15. define('A587_ART_IDNOTFOUND',1);
  16. define('A587_ART_GENERATED',2);
  17. define('A587_ART_REDIRECT',3);
  18. define('A587_FILE_NOEXIST',0);
  19. define('A587_FILE_XPDFERR_OPENSRC',1);
  20. define('A587_FILE_XPDFERR_OPENDEST',2);
  21. define('A587_FILE_XPDFERR_PERM',3);
  22. define('A587_FILE_XPDFERR_OTHER',4);
  23. define('A587_FILE_FORBIDDEN_EXTENSION',5);
  24. define('A587_FILE_GENERATED',6);
  25. define('A587_FILE_EMPTY',7);
  26. define('A587_SIMILARWORDS_NONE',0);
  27. define('A587_SIMILARWORDS_SOUNDEX',1);
  28. define('A587_SIMILARWORDS_METAPHONE',2);
  29. define('A587_SIMILARWORDS_COLOGNEPHONE',4);
  30. define('A587_SIMILARWORDS_ALL',7);
  31. /**
  32. * @package rexsearch
  33. */
  34. class RexSearch
  35. {
  36. var $searchArticles = false;
  37. var $blacklist = array();
  38. var $blacklisted = array();
  39. var $cache = true;
  40. var $cachedArray = array();
  41. /**
  42. * @ignore
  43. */
  44. var $ci = true; // case insensitive?
  45. var $clang = false;
  46. var $documentRoot;
  47. var $dontIndexRedirects = true;
  48. var $ellipsis;
  49. var $ep_outputfilter = false;
  50. var $excludeIDs = array();
  51. var $fileExtensions = array();
  52. var $groupBy = true;
  53. var $hashMe = '';
  54. var $highlightType = 'surroundtext';
  55. var $includeColumns = array();
  56. var $includeDirectories = array();
  57. var $includePath;
  58. var $generatedPath;
  59. var $indexUnknownFileExtensions = false;
  60. var $indexMediapool = false;
  61. var $indexMissingFileExtensions = false;
  62. var $indexOffline = false;
  63. var $indexViaHTTP = false;
  64. var $indexWithTemplate = false;
  65. var $languages;
  66. var $limit = array(0,10);
  67. var $logicalMode = ' AND ';
  68. var $maxHighlightedTextChars = 100;
  69. var $maxTeaserChars = 200;
  70. var $mediaFolder;
  71. var $order = array('RELEVANCE587' => 'DESC');
  72. var $redaxo = false;
  73. var $searchArray = array();
  74. var $searchEntities = false;
  75. var $searchInIDs = array();
  76. var $searchMode = 'like';
  77. var $searchString = '';
  78. var $significantCharacterCount = 3;
  79. var $similarwords = false;
  80. var $similarwordsMode = 0;
  81. var $similarwordsPermanent = false;
  82. var $stopwords = array();
  83. var $surroundTags = array('<strong>','</strong>');
  84. var $tablePrefix;
  85. var $textMode = 'plain';
  86. var $whitelist = array();
  87. var $where = '';
  88. #function __construct($_clang = false, $_loadSettings = true)
  89. function RexSearch($_clang = false, $_loadSettings = true, $_useStopwords = true)
  90. {
  91. global $REX,$I18N;
  92. if($_loadSettings)
  93. {
  94. foreach($REX['ADDON']['settings']['rexsearch'] as $key => $value)
  95. {
  96. switch($key)
  97. {
  98. case 'logicalmode':
  99. $this->setLogicalMode($value);
  100. break;
  101. case 'textmode':
  102. $this->setTextMode($value);
  103. break;
  104. case 'searchmode':
  105. $this->setSearchMode($value);
  106. break;
  107. case 'surroundtags':
  108. $this->setSurroundTags($value);
  109. break;
  110. case 'limit':
  111. $this->setLimit($value);
  112. break;
  113. case 'ci':
  114. $this->setCI($value);
  115. break;
  116. case 'blacklist':
  117. $this->setBlacklist(is_array($value)?$value:array());
  118. break;
  119. case 'exclude_article_ids':
  120. $this->setExcludeIDs($value);
  121. break;
  122. case 'exclude_category_ids':
  123. if(is_array($value))
  124. {
  125. $ids = array();
  126. foreach($value as $catID)
  127. {
  128. foreach(a587_getArticles(array($catID)) as $id => $name)
  129. $ids[] = $id;
  130. $this->setExcludeIDs($ids);
  131. }
  132. }
  133. break;
  134. case 'include':
  135. $this->setIncludeColumns($value);
  136. break;
  137. case 'maxteaserchars':
  138. $this->setMaxTeaserChars($value);
  139. break;
  140. case 'maxhighlightchars':
  141. $this->setMaxHighlightedTextChars($value);
  142. break;
  143. case 'highlight':
  144. $this->setHighlightType($value);
  145. break;
  146. case 'indexmode':
  147. $this->indexViaHTTP = intval($value) == 0;
  148. $this->indexWithTemplate = intval($value) == 2;
  149. break;
  150. case 'indexoffline':
  151. $this->indexOffline = $value == '1';
  152. break;
  153. case 'similarwordsmode':
  154. $this->similarwordsMode = intval($value);
  155. $this->similarwords = !!intval($value);
  156. break;
  157. case 'similarwords_permanent':
  158. $this->similarwordsPermanent = !!intval($value);
  159. break;
  160. case 'fileextensions':
  161. $this->fileExtensions = $value;
  162. break;
  163. case 'indexfolders':
  164. $this->includeDirectories = $value;
  165. break;
  166. case 'indexmediapool':
  167. $this->indexMediapool = !!intval($value);
  168. break;
  169. case 'ep_outputfilter':
  170. $this->ep_outputfilter = !!intval($value);
  171. break;
  172. }
  173. }
  174. }
  175. $this->setClang($_clang);
  176. $this->languages = $REX['CLANG'];
  177. $this->tablePrefix = $REX['TABLE_PREFIX'];
  178. $this->includePath = $REX['INCLUDE_PATH'];
  179. $this->generatedPath = $REX['GENERATED_PATH'];
  180. $this->documentRoot = realpath($_SERVER['DOCUMENT_ROOT']);
  181. $this->mediaFolder = $REX['MEDIAFOLDER'];
  182. $locale = 'de_de';
  183. $langfile = new i18n($locale, $REX['INCLUDE_PATH'].'/addons/rexsearch/lang/');
  184. $this->ellipsis = $langfile->Msg('a587_ellipsis');
  185. // german stopwords
  186. if($_useStopwords)
  187. {
  188. include $this->includePath.'/addons/rexsearch/lang/stopwords.inc.php';
  189. $this->stopwords = $german_stopwords;
  190. }
  191. }
  192. /**
  193. * A function for retrieving the K?lner Phonetik value of a string
  194. *
  195. * As described at http://de.wikipedia.org/wiki/K?lner_Phonetik
  196. * Based on Hans Joachim Postel: Die K?lner Phonetik.
  197. * Ein Verfahren zur Identifizierung von Personennamen auf der
  198. * Grundlage der Gestaltanalyse.
  199. * in: IBM-Nachrichten, 19. Jahrgang, 1969, S. 925-931
  200. *
  201. * This program is distributed in the hope that it will be useful,
  202. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  203. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  204. * GNU General Public License for more details.
  205. *
  206. * @package phonetics
  207. * @version 1.0
  208. * @link http://www.einfachmarke.de
  209. * @license GPL 3.0 <http://www.gnu.org/licenses/>
  210. * @copyright 2008 by einfachmarke.de
  211. * @author Nicolas Zimmer <nicolas dot zimmer at einfachmarke.de>
  212. */
  213. function cologne_phone($_word)
  214. {
  215. /**
  216. * @param string $_word string to be analyzed
  217. * @return string $value represents the K?lner Phonetik value
  218. * @access public
  219. */
  220. //prepare for processing
  221. $_word = strtolower($_word);
  222. $substitution = array(
  223. '?'=>'a',
  224. '?'=>'o',
  225. '?'=>'u',
  226. '?'=>'ss',
  227. 'ph'=>'f'
  228. );
  229. foreach($substitution as $letter => $substitution)
  230. $_word = str_replace($letter, $substitution, $_word);
  231. $len = strlen($_word);
  232. //Rule for exeptions
  233. $exceptionsLeading = array(
  234. 4=>array('ca','ch','ck','cl','co','cq','cu','cx'),
  235. 8=>array('dc','ds','dz','tc','ts','tz')
  236. );
  237. $exceptionsFollowing = array('sc','zc','cx','kx','qx');
  238. //Table for coding
  239. $codingTable = array(
  240. 0 => array('a','e','i','j','o','u','y'),
  241. 1 => array('b','p'),
  242. 2 => array('d','t'),
  243. 3 => array('f','v','w'),
  244. 4 => array('c','g','k','q'),
  245. 48 => array('x'),
  246. 5 => array('l'),
  247. 6 => array('m','n'),
  248. 7 => array('r'),
  249. 8 => array('c','s','z')
  250. );
  251. for($i=0; $i < $len; $i++)
  252. {
  253. $value[$i] = '';
  254. //Exceptions
  255. if($i==0 AND $len > 1 AND $_word[$i].$_word[$i+1] == 'cr')
  256. $value[$i] = 4;
  257. if($i < ($len - 1))
  258. {
  259. foreach($exceptionsLeading as $code=>$letters)
  260. {
  261. if(in_array($_word[$i].$_word[$i+1],$letters))
  262. $value[$i] = $code;
  263. }
  264. }
  265. if($i AND in_array($_word[$i-1].$_word[$i], $exceptionsFollowing))
  266. $value[$i] = 8;
  267. //Normal encoding
  268. if($value[$i] == '')
  269. {
  270. foreach($codingTable as $code => $letters)
  271. {
  272. if(in_array($_word[$i], $letters))
  273. $value[$i] = $code;
  274. }
  275. }
  276. }
  277. //delete double values
  278. $len=count($value);
  279. for($i=1;$i<$len;$i++)
  280. {
  281. if($value[$i] == $value[$i-1])
  282. $value[$i] = '';
  283. }
  284. //delete vocals
  285. for ($i=1;$i>$len;$i++)
  286. {
  287. //omitting first characer code and h
  288. if($value[$i] == 0)
  289. $value[$i] = '';
  290. }
  291. $value = array_filter($value);
  292. $value = implode('', $value);
  293. return $value;
  294. }
  295. function doSearchArticles($_bool = false)
  296. {
  297. $this->searchArticles = $_bool;
  298. $this->hashMe .= $_bool;
  299. }
  300. function doGroupBy($_bool = true)
  301. {
  302. $this->groupBy = $_bool;
  303. $this->hashMe .= $_bool;
  304. }
  305. /**
  306. *
  307. */
  308. function setSearchInIDs($_searchInIDs, $_reset = false)
  309. {
  310. if($_reset)
  311. $this->searchInIDs = array();
  312. if(array_key_exists('articles',$_searchInIDs))
  313. {
  314. if(!array_key_exists('articles',$this->searchInIDs))
  315. $this->searchInIDs['articles'] = array();
  316. foreach($_searchInIDs['articles'] as $id)
  317. {
  318. if($id = intval($id))
  319. {
  320. $this->searchInIDs['articles'][] = $id;
  321. $this->hashMe .= 'a'.$id;
  322. }
  323. }
  324. }
  325. if(array_key_exists('categories',$_searchInIDs))
  326. {
  327. if(!array_key_exists('categories',$this->searchInIDs))
  328. $this->searchInIDs['categories'] = array();
  329. foreach($_searchInIDs['categories'] as $id)
  330. {
  331. if($id = intval($id))
  332. {
  333. $this->searchInIDs['categories'][] = $id;
  334. $this->hashMe .= 'c'.$id;
  335. }
  336. }
  337. }
  338. if(array_key_exists('filecategories',$_searchInIDs))
  339. {
  340. if(!array_key_exists('filecategories',$this->searchInIDs))
  341. $this->searchInIDs['filecategories'] = array();
  342. foreach($_searchInIDs['filecategories'] as $id)
  343. {
  344. if($id = intval($id))
  345. {
  346. $this->searchInIDs['filecategories'][] = $id;
  347. $this->hashMe .= 'f'.$id;
  348. }
  349. }
  350. }
  351. if(array_key_exists('db_columns',$_searchInIDs))
  352. {
  353. if(!array_key_exists('db_columns',$this->searchInIDs))
  354. $this->searchInIDs['db_columns'] = array();
  355. foreach($_searchInIDs['db_columns'] as $table => $columnArray)
  356. {
  357. $this->hashMe .= $table;
  358. $tmp = array();
  359. foreach($columnArray as $column)
  360. {
  361. $tmp[] = $column;
  362. $this->hashMe .= $column;
  363. }
  364. if(!array_key_exists($table,$this->searchInIDs['db_columns']))
  365. $this->searchInIDs['db_columns'][$table] = $tmp;
  366. else
  367. $this->searchInIDs['db_columns'][$table] = array_merge($this->searchInIDs['db_columns'][$table],$tmp);
  368. }
  369. }
  370. }
  371. /**
  372. * If utf8-encoding is used, the parameter will be appended with an "u".
  373. * Since there is only UTF-8 supported, it always appends the "u".
  374. *
  375. * @param string $_regex
  376. * @return string
  377. */
  378. function encodeRegex($_regex)
  379. {
  380. return $_regex.'u';
  381. }
  382. /**
  383. * Simulates the frontend by setting $REX['REDAXO'] to false.
  384. * The original state is saved in $this->redaxo.
  385. */
  386. function beginFrontendMode()
  387. {
  388. global $REX;
  389. $this->redaxo = $REX['REDAXO'];
  390. $REX['REDAXO'] = false;
  391. }
  392. /**
  393. * Ends the frontend-mode by setting $REX['REDAXO'] to the original state.
  394. */
  395. function endFrontendMode()
  396. {
  397. global $REX;
  398. $REX['REDAXO'] = $this->redaxo;
  399. }
  400. /**
  401. * Sets the maximum count of letters the teaser of a searched through text may have.
  402. *
  403. * @param int $_count
  404. */
  405. function setMaxTeaserChars($_count)
  406. {
  407. $this->maxTeaserChars = intval($_count);
  408. $this->hashMe .= $_count;
  409. }
  410. /**
  411. * Sets the maximum count of letters around an found search term in the highlighted text.
  412. * @param int $_count
  413. */
  414. function setMaxHighlightedTextChars($_count)
  415. {
  416. $this->maxHighlightedTextChars = intval($_count);
  417. $this->hashMe .= $_count;
  418. }
  419. /**
  420. * Generates the full index at once.
  421. */
  422. function generateIndex()
  423. {
  424. // delete old index
  425. $delete = new rex_sql();
  426. $delete->setTable($this->tablePrefix.'587_searchindex');
  427. $delete->delete();
  428. $delete2 = new rex_sql();
  429. $delete2->setTable($this->tablePrefix.'587_searchcacheindex_ids');
  430. $delete2->delete();
  431. $delete3 = new rex_sql();
  432. $delete3->setTable($this->tablePrefix.'587_searchcache');
  433. $delete3->delete();
  434. // index articles
  435. $art_sql = new rex_sql();
  436. $art_sql->setTable($this->tablePrefix.'article');
  437. if($art_sql->select('id,clang'))
  438. {
  439. foreach($art_sql->getArray() as $art)
  440. {
  441. $this->indexArticle($art['id'], $art['clang']);
  442. }
  443. }
  444. // index columns
  445. foreach($this->includeColumns as $table => $columnArray)
  446. {
  447. foreach($columnArray as $column)
  448. {
  449. $this->indexColumn($table, $column);
  450. }
  451. }
  452. // index mediapool
  453. if($this->indexMediapool)
  454. {
  455. $mediaSQL = new rex_sql();
  456. $mediaSQL->setTable($this->tablePrefix.'file');
  457. if($mediaSQL->select('file_id, category_id, filename'))
  458. {
  459. foreach($mediaSQL->getArray() as $file)
  460. {
  461. $this->indexFile(str_replace('\\','/',substr($this->mediaFolder, strlen($this->documentRoot))).'/'.$file['filename'], false, false, $file['file_id'], $file['category_id']);
  462. }
  463. }
  464. }
  465. // index files
  466. foreach($this->includeDirectories as $dir)
  467. {
  468. foreach(a587_getFiles($dir, $this->fileExtensions) as $filename)
  469. {
  470. $this->indexFile($filename);
  471. }
  472. }
  473. }
  474. /**
  475. * Indexes a certain article.
  476. *
  477. * @param int $_id
  478. * @param mixed $_clang
  479. *
  480. * @return int
  481. */
  482. function indexArticle($_id,$_clang = false)
  483. {
  484. global $REX;
  485. if($_clang === false)
  486. $langs = $this->languages;
  487. else
  488. $langs = array(intval($_clang) => $this->languages[intval($_clang)]);
  489. $return = array();
  490. $keywords = array();
  491. foreach($langs as $langID => $v)
  492. {
  493. if(in_array($_id, $this->excludeIDs))
  494. {
  495. $return[$v] = A587_ART_EXCLUDED;
  496. continue;
  497. }
  498. $REX['CUR_CLANG'] = $langID;
  499. $delete = new rex_sql();
  500. $where = sprintf("ftable = '%s' AND fid = %d AND clang = %d", $delete->escape($this->tablePrefix.'article'), $_id, $langID);
  501. // delete from cache
  502. $select = new rex_sql();
  503. $select->setTable($this->tablePrefix.'587_searchindex');
  504. $select->setWhere($where);
  505. $select->select('id');
  506. $indexIds = array();
  507. foreach($select->getArray() as $result)
  508. $indexIds[] = $result['id'];
  509. $this->deleteCache($indexIds);
  510. // delete old
  511. $delete->setTable($this->tablePrefix.'587_searchindex');
  512. $delete->setWhere($where);
  513. $delete->delete();
  514. // index article
  515. $article = OOArticle::getArticleById(intval($_id), $langID);
  516. if(is_object($article) AND ($article->isOnline() OR $this->indexOffline))
  517. {
  518. $this->beginFrontendMode();
  519. if(ini_get('allow_url_fopen') AND $this->indexViaHTTP)
  520. {
  521. $articleText = @file_get_contents('http://'.$_SERVER['HTTP_HOST'].substr($_SERVER['PHP_SELF'],0,strpos($_SERVER['PHP_SELF'],'/redaxo/')+1).rex_geturl($_id, $langID, '', '&'));
  522. }
  523. elseif ($_id != 0 AND $this->dontIndexRedirects)
  524. {
  525. $rex_article = new rex_article(intval($_id), $langID);
  526. $article_content_file = $this->generatedPath.'/articles/'.$_id.'.'.$langID.'.content';
  527. if(!file_exists($article_content_file))
  528. {
  529. include_once ($this->includePath."/functions/function_rex_generate.inc.php");
  530. $generated = rex_generateArticleContent($_id, $langID);
  531. if($generated !== true)
  532. {
  533. $return[$v] = A587_ART_IDNOTFOUND;
  534. continue;
  535. }
  536. }
  537. if(file_exists($article_content_file) AND preg_match($this->encodeRegex('~(header\s*\(\s*["\']\s*Location\s*:)|(rex_redirect\s*\()~is'), rex_get_file_contents($article_content_file)))
  538. {
  539. $return[$v] = A587_ART_REDIRECT;
  540. continue;
  541. }
  542. if($this->indexWithTemplate)
  543. $articleText = $rex_article->getArticleTemplate();
  544. else
  545. $articleText = $rex_article->getArticle();
  546. if($this->ep_outputfilter)
  547. {
  548. $tmp = array(
  549. 'artid' => $REX['ARTICLE_ID'],
  550. 'clang' => $REX['CUR_CLANG']
  551. );
  552. $REX['ARTICLE_ID'] = $_id;
  553. $REX['CUR_CLANG'] = $langID;
  554. $articleText = rex_register_extension_point('OUTPUT_FILTER', $articleText, array('environment' => 'frontend','sendcharset' => false));
  555. $REX['ARTICLE_ID'] = $tmp['artid'];
  556. $REX['CUR_CLANG'] = $tmp['clang'];
  557. }
  558. }
  559. $insert = new rex_sql();
  560. $articleData = array();
  561. $articleData['texttype'] = 'article';
  562. $articleData['ftable'] = $this->tablePrefix.'article';
  563. $articleData['fcolumn'] = NULL;
  564. $articleData['clang'] = $article->getClang();
  565. $articleData['fid'] = intval($_id);
  566. $articleData['catid'] = $article->getCategoryId();
  567. $articleData['unchangedtext'] = $insert->escape($articleText);
  568. $articleData['plaintext'] = $insert->escape($plaintext = $this->getPlaintext($articleText));
  569. if(array_key_exists($REX['TABLE_PREFIX'].'article', $this->includeColumns))
  570. {
  571. $additionalValues = array();
  572. $select->flush();
  573. $select->setTable($REX['TABLE_PREFIX'].'article');
  574. $select->setWhere('id = '.$_id.' AND clang = '.$langID);
  575. $select->select('`'.implode('`,`', $this->includeColumns[$REX['TABLE_PREFIX'].'article']).'`');
  576. foreach($this->includeColumns[$REX['TABLE_PREFIX'].'article'] as $col)
  577. {
  578. $additionalValues[$col] = $select->getValue($col);
  579. }
  580. $articleData['values'] = $insert->escape(serialize($additionalValues));
  581. }
  582. foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
  583. {
  584. if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
  585. $keywords[] = array('search'=>$keyword,'clang'=>$langID);
  586. }
  587. $articleData['teaser'] = $insert->escape($this->getTeaserText($plaintext));
  588. $insert->setTable($this->tablePrefix.'587_searchindex');
  589. $insert->setValues($articleData);
  590. $insert->insert();
  591. $this->endFrontendMode();
  592. $return[$langID] = A587_ART_GENERATED;
  593. }
  594. }
  595. $this->storeKeywords($keywords, false);
  596. return $return;
  597. }
  598. /**
  599. * Indexes a certain column.
  600. * Returns the number of the indexed rows or false.
  601. *
  602. * @param string $_table
  603. * @param mixed $_column
  604. * @param mixed $_idcol
  605. * @param mixed $_id
  606. * @param mixed $_start
  607. * @param mixed $_count
  608. *
  609. * @return mixed
  610. */
  611. function indexColumn($_table, $_column, $_idcol = false, $_id = false, $_start = false, $_count = false, $_where = false)
  612. {
  613. $delete = new rex_sql();
  614. $where = sprintf(" `ftable` = '%s' AND `fcolumn` = '%s' AND `texttype` = 'db_column'",$delete->escape($_table),$delete->escape($_column));
  615. //if(is_string($_idcol) AND ($_id !== false))
  616. //$where .= sprintf(' AND fid = %d',$_id);
  617. // delete from cache
  618. $select = new rex_sql();
  619. $select->setTable($this->tablePrefix.'587_searchindex');
  620. $select->setWhere($where);
  621. $indexIds = array();
  622. if($select->select('id'))
  623. {
  624. foreach($select->getArray() as $result)
  625. $indexIds[] = $result['id'];
  626. $this->deleteCache($indexIds);
  627. }
  628. // delete old data
  629. if($_start === 0)
  630. {
  631. $delete->setTable($this->tablePrefix.'587_searchindex');
  632. $delete->setWhere($where);
  633. $delete->delete();
  634. }
  635. $sql = new rex_sql();
  636. // get primary key column(s)
  637. $primaryKeys = array();
  638. foreach($sql->getArray("SHOW COLUMNS FROM `".$_table."` WHERE `KEY` = 'PRI'") as $col)
  639. $primaryKeys[] = $col['Field'];
  640. // index column
  641. $sql->flush();
  642. $sql->setTable($_table);
  643. $where = '1 ';
  644. if(is_string($_idcol) AND $_id)
  645. $where .= sprintf(' AND (%s = %d)', $_idcol, $_id);
  646. if(!empty($_where) AND is_string($_where))
  647. $where .= ' AND ('.$_where.')';
  648. if(is_numeric($_start) AND is_numeric($_count))
  649. $where .= ' LIMIT '.$_start.','.$_count;
  650. $sql->setWhere($where);
  651. $count = false;
  652. if($sql->select('*'))
  653. {
  654. $this->beginFrontendMode();
  655. $count = 0;
  656. $keywords = array();
  657. foreach($sql->getArray() as $value)
  658. {
  659. if(!empty($value[$_column]) AND ($this->indexOffline OR $this->tablePrefix.'article' != $_table OR $value['status'] == '1') AND ($this->tablePrefix.'article' != $_table OR !in_array($value['id'],$this->excludeIDs)))
  660. {
  661. $insert = new rex_sql();
  662. $indexData = array();
  663. $indexData['texttype'] = 'db_column';
  664. $indexData['ftable'] = $_table;
  665. $indexData['fcolumn'] = $_column;
  666. if(array_key_exists('clang',$value))
  667. $indexData['clang'] = $value['clang'];
  668. else
  669. $indexData['clang'] = NULL;
  670. $indexData['fid'] = NULL;
  671. if(is_string($_idcol) AND array_key_exists($_idcol,$value))
  672. {
  673. $indexData['fid'] = $value[$_idcol];
  674. }
  675. elseif($_table == $this->tablePrefix.'article')
  676. {
  677. $indexData['fid'] = $value['id'];
  678. }
  679. elseif(count($primaryKeys) == 1)
  680. {
  681. $indexData['fid'] = $value[$primaryKeys[0]];
  682. }
  683. elseif(count($primaryKeys))
  684. {
  685. $fids = array();
  686. foreach($primaryKeys as $pk)
  687. $fids[$pk] = $value[$pk];
  688. $indexData['fid'] = json_encode($fids);
  689. }
  690. if(is_null($indexData['fid']))
  691. $indexData['fid'] = $this->getMinFID();
  692. if(array_key_exists('re_id',$value))
  693. {
  694. $indexData['catid'] = $value['re_id'];
  695. if($_table == $this->tablePrefix.'article')
  696. $indexData['catid'] = intval($value['startpage']) ? $value['id'] : $value['re_id'];
  697. }
  698. elseif(array_key_exists('category_id',$value))
  699. $indexData['catid'] = $value['category_id'];
  700. else
  701. $indexData['catid'] = NULL;
  702. $additionalValues = array();
  703. foreach($this->includeColumns[$_table] as $col)
  704. {
  705. $additionalValues[$col] = $value[$col];
  706. }
  707. $indexData['values'] = $insert->escape(serialize($additionalValues));
  708. $indexData['unchangedtext'] = $insert->escape((string) $value[$_column]);
  709. $indexData['plaintext'] = $insert->escape($plaintext = $this->getPlaintext($value[$_column]));
  710. foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
  711. {
  712. if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
  713. $keywords[] = array('search'=>$keyword,'clang'=>is_null($indexData['clang'])?false:$indexData['clang']);
  714. }
  715. $indexData['teaser'] = '';
  716. if($this->tablePrefix.'article' == $_table)
  717. {
  718. $rex_article = new rex_article(intval($value['id']), intval($value['clang']));
  719. $teaser = true;
  720. $article_content_file = $this->generatedPath.'/articles/'.intval($value['id']).'.'.intval($value['clang']).'.content';
  721. if(!file_exists($article_content_file))
  722. {
  723. include_once ($this->includePath."/functions/function_rex_generate.inc.php");
  724. $generated = rex_generateArticleContent(intval($value['id']), intval($value['clang']));
  725. if($generated !== true)
  726. {
  727. $teaser = false;
  728. continue;
  729. }
  730. }
  731. if(file_exists($article_content_file) AND preg_match($this->encodeRegex('~(header\s*\(\s*["\']\s*Location\s*:)|(rex_redirect\s*\()~is'), rex_get_file_contents($article_content_file)))
  732. {
  733. $teaser = false;
  734. }
  735. $indexData['teaser'] = $teaser ? $insert->escape($this->getTeaserText($this->getPlaintext($rex_article->getArticle()))) : '';
  736. }
  737. $insert->setTable($this->tablePrefix.'587_searchindex');
  738. $insert->setValues($indexData);
  739. $insert->insert();
  740. $count++;
  741. }
  742. }
  743. $this->storeKeywords($keywords, false);
  744. $this->endFrontendMode();
  745. }
  746. else
  747. {
  748. return false;
  749. }
  750. return $count;
  751. }
  752. /**
  753. * Indexes a certain file.
  754. * Returns A587_FILE_GENERATED or an error code.
  755. *
  756. * @param string $_filename
  757. * @param mixed $_clang
  758. * @param mixed $_doPlaintext
  759. * @param mixed $_articleData
  760. *
  761. * @return mixed
  762. */
  763. function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = false, $_catid = false)
  764. {
  765. // extract file-extension
  766. $filenameArray = explode('.', $_filename);
  767. $fileext = $filenameArray[count($filenameArray) - 1];
  768. // check file-extension
  769. if((!in_array($fileext, $this->fileExtensions) AND !empty($this->fileExtensions)) AND !$this->indexUnknownFileExtensions AND !$this->indexMissingFileExtensions)
  770. return A587_FILE_FORBIDDEN_EXTENSION;
  771. // delete cache
  772. $delete = new rex_sql();
  773. $where = sprintf(" `filename` = '%s' AND `texttype` = 'file'", $delete->escape($_filename));
  774. if(is_int($_clang))
  775. $where .= sprintf(' AND clang = %d',$_clang);
  776. if(is_int($_fid))
  777. $where .= sprintf(' AND fid = %d',$_fid);
  778. elseif(is_array($_fid))
  779. $where .= sprintf(" AND fid = '%s'",$delete->escape(json_encode($_fid)));
  780. if(is_int($_catid))
  781. $where .= sprintf(' AND catid = %d',$_catid);
  782. // delete from cache
  783. $select = new rex_sql();
  784. $select->setTable($this->tablePrefix.'587_searchindex');
  785. $select->setWhere($where);
  786. $indexIds = array();
  787. if($select->select('id'))
  788. {
  789. foreach($select->getArray() as $result)
  790. $indexIds[] = $result['id'];
  791. $this->deleteCache($indexIds);
  792. }
  793. // delete old data
  794. $delete->setTable($this->tablePrefix.'587_searchindex');
  795. $delete->setWhere($where);
  796. $delete->delete();
  797. // index file
  798. $text = '';
  799. $plaintext = '';
  800. switch($fileext)
  801. {
  802. // pdf-files
  803. case 'pdf':
  804. // try XPDF
  805. $return = 0;
  806. $xpdf = false;
  807. $error = false;
  808. if(function_exists('exec'))
  809. {
  810. $tempFile = tempnam($this->generatedPath.'/files/', 'rexsearch');
  811. $encoding = 'UTF-8';
  812. exec('pdftotext '.escapeshellarg($this->documentRoot.'/'.$_filename).' '.escapeshellarg($tempFile).' -enc '.$encoding, $dummy, $return);
  813. if($return > 0)
  814. {
  815. if($return == 1)
  816. $error = A587_FILE_XPDFERR_OPENSRC;
  817. if($return == 2)
  818. $error = A587_FILE_XPDFERR_OPENDEST;
  819. if($return == 3)
  820. $error = A587_FILE_XPDFERR_PERM;
  821. if($return == 99)
  822. $error = A587_FILE_XPDFERR_OTHER;
  823. }
  824. else
  825. {
  826. if(false === $text = @file_get_contents($tempFile))
  827. $error = A587_FILE_NOEXIST;
  828. else
  829. $xpdf = true;
  830. }
  831. unlink($tempFile);
  832. }
  833. if(!$xpdf)
  834. {
  835. // if xpdf returned an error, try pdf2txt via php
  836. if(false === $pdfContent = @file_get_contents($this->documentRoot.'/'.$_filename))
  837. $error = A587_FILE_NOEXIST;
  838. else
  839. {
  840. require_once 'class.pdf2txt.inc.php';
  841. $text = pdf2txt::directConvert($pdfContent);
  842. $error = false;
  843. }
  844. }
  845. if($error !== false)
  846. return $error;
  847. elseif(trim($text) == '')
  848. return A587_FILE_EMPTY;
  849. $plaintext = $this->getPlaintext($text);
  850. break;
  851. // html- or php-file
  852. case 'htm':
  853. case 'html':
  854. case 'php':
  855. if(false === $text = @file_get_contents($this->documentRoot.'/'.$_filename))
  856. return A587_FILE_NOEXIST;
  857. $plaintext = $this->getPlaintext($text);
  858. // other filetype
  859. default:
  860. if(false === $text = @file_get_contents($this->documentRoot.'/'.$_filename))
  861. return A587_FILE_NOEXIST;
  862. }
  863. $text = @iconv(mb_detect_encoding($text), 'UTF-8', $text);
  864. // Plaintext
  865. if(empty($plaintext))
  866. {
  867. if($_doPlaintext)
  868. $plaintext = $this->getPlaintext($text);
  869. else
  870. $plaintext = $text;
  871. }
  872. // index file-content
  873. $insert = new rex_sql();
  874. $fileData['texttype'] = 'file';
  875. if($_fid !== false)
  876. $fileData['ftable'] = $this->tablePrefix.'file';
  877. $fileData['filename'] = $insert->escape($_filename);
  878. $fileData['fileext'] = $insert->escape($fileext);;
  879. if($_clang !== false)
  880. $fileData['clang'] = intval($_clang);
  881. if($_fid !== false)
  882. $fileData['fid'] = intval($_fid);
  883. else
  884. $fileData['fid'] = NULL;
  885. if(is_null($fileData['fid']))
  886. $fileData['fid'] = $this->getMinFID();
  887. if($_catid !== false)
  888. $fileData['catid'] = intval($_catid);
  889. $fileData['unchangedtext'] = $insert->escape($text);
  890. $fileData['plaintext'] = $insert->escape($plaintext);
  891. $keywords = array();
  892. foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
  893. {
  894. if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
  895. $keywords[] = array('search'=>$keyword,'clang'=>!isset($fileData['clang'])?false:$fileData['clang']);
  896. }
  897. $this->storeKeywords($keywords, false);
  898. $fileData['teaser'] = $insert->escape($this->getTeaserText($plaintext));
  899. $insert->setTable($this->tablePrefix.'587_searchindex');
  900. $insert->setValues($fileData);
  901. $insert->insert();
  902. return A587_FILE_GENERATED;
  903. }
  904. function getMinFID()
  905. {
  906. $minfid_sql = new rex_sql();
  907. $minfid_result = $minfid_sql->getArray('SELECT MIN(CONVERT(fid, SIGNED)) as minfid FROM `'.$this->tablePrefix.'587_searchindex`');
  908. $minfid = intval($minfid_result[0]['minfid']);
  909. return ($minfid < 0) ? --$minfid : -1;
  910. }
  911. /**
  912. * Excludes an article from the index.
  913. *
  914. * @param int $_id
  915. * @param mixed $_clang
  916. */
  917. function excludeArticle($_id,$_clang = false)
  918. {
  919. // exclude article
  920. $art_sql = new rex_sql();
  921. $art_sql->setTable($this->tablePrefix.'587_searchindex');
  922. $where = "fid = ".intval($_id)." AND texttype='article'";
  923. if($_clang !== false)
  924. $where .= " AND clang='".intval($_clang)."'";
  925. $art_sql->setWhere($where);
  926. $art_sql->delete();
  927. // delete from cache
  928. $select = new rex_sql();
  929. $select->setTable($this->tablePrefix.'587_searchindex');
  930. $select->setWhere($where);
  931. $select->select('id');
  932. $indexIds = array();
  933. foreach($select->getArray() as $result)
  934. $indexIds[] = $result['id'];
  935. $this->deleteCache($indexIds);
  936. }
  937. /**
  938. * Deletes the complete search index.
  939. *
  940. */
  941. function deleteIndex()
  942. {
  943. $delete = new rex_sql();
  944. $delete->setTable($this->tablePrefix.'587_searchindex');
  945. $delete->delete();
  946. $this->deleteCache();
  947. }
  948. /**
  949. * Sets the surround-tags for found keywords.
  950. *
  951. * Expects either the start- and the end-tag
  952. * or an array with both tags.
  953. */
  954. function setSurroundTags($_tags, $_endtag = false)
  955. {
  956. if(is_array($_tags) AND $_endtag === false)
  957. $this->surroundTags = $_tags;
  958. else
  959. $this->surroundTags = array((string) $_tags, (string) $_endtag);
  960. $this->hashMe .= $this->surroundTags[0].$this->surroundTags[1];
  961. }
  962. /**
  963. * Sets the maximum count of results.
  964. *
  965. * Expects either the start- and the count-limit
  966. * or an array with both limits
  967. * or only the count-limit.
  968. *
  969. * example method calls:
  970. * setLimit(10,10); // start with 10th result
  971. * setLimit(20); // maximum of 20 results starting with the first result
  972. * setLimit(array(0,20)); // maximum of 20 results starting with the first result
  973. */
  974. function setLimit($_limit, $_countLimit = false)
  975. {
  976. if(is_array($_limit) AND $_countLimit === false)
  977. $this->limit = array((int) $_limit[0], (int) $_limit[1]);
  978. elseif($_countLimit === false)
  979. $this->limit = array(0, (int) $_limit);
  980. else
  981. $this->limit = array((int) $_limit, (int) $_countLimit);
  982. $this->hashMe .= $this->limit[0].$this->limit[1];
  983. }
  984. /**
  985. * Sets words, which must not be found.
  986. *
  987. * Expects an array with the words as parameters.
  988. */
  989. function setBlacklist($_words)
  990. {
  991. foreach($_words as $key => $word)
  992. {
  993. $this->blacklist[] = $tmpkey = (string) ($this->ci?strtolower($word):$word);
  994. $this->hashMe .= $tmpkey;
  995. }
  996. }
  997. /**
  998. * Exclude Articles with the transfered IDs.
  999. *
  1000. * Expects an array with the IDs as parameters.
  1001. */
  1002. function setExcludeIDs($_ids)
  1003. {
  1004. foreach($_ids as $key => $id)
  1005. {
  1006. $this->excludeIDs[] = intval($id);
  1007. }
  1008. $this->excludeIDs = array_unique($this->excludeIDs);
  1009. }
  1010. /**
  1011. * Sets the IDs of the articles which are only to be searched through.
  1012. *
  1013. * Expects an array with the IDs as parameters.
  1014. */
  1015. function searchInArticles($_ids)
  1016. {
  1017. $this->setSearchInIDs(array('articles' => $_ids));
  1018. }
  1019. /**
  1020. * Sets the IDs of the categories which are only to be searched through.
  1021. *
  1022. * Expects an array with the IDs as parameters.
  1023. */
  1024. function searchInCategories($_ids)
  1025. {
  1026. $this->setSearchInIDs(array('categories' => $_ids));
  1027. }
  1028. /**
  1029. * Sets the IDs of the mediapool-categories which are only to be searched through.
  1030. *
  1031. * Expects an array with the IDs as parameters.
  1032. */
  1033. function searchInFileCategories($_ids)
  1034. {
  1035. $this->setSearchInIDs(array('filecategories' => $_ids));
  1036. }
  1037. /**
  1038. * Sets the columns which only should be searched through.
  1039. *
  1040. * @param string $_table
  1041. * @param string $_column
  1042. */
  1043. function searchInDbColumn($_table, $_column)
  1044. {
  1045. $this->setSearchinIDs(array('db_columns' => array($_table => array($_column))));
  1046. }
  1047. /**
  1048. * Sets the columns which should be indexed.
  1049. *
  1050. * @param array $_columns
  1051. */
  1052. function setIncludeColumns($_columns)
  1053. {
  1054. $this->includeColumns = $_columns;
  1055. }
  1056. function setWhere($_where)
  1057. {
  1058. $this->where = $_where;
  1059. $this->hashMe .= $_where;
  1060. }
  1061. /**
  1062. * Sets the mode of how the keywords are logical connected.
  1063. *
  1064. * Are the keywords to be connected conjunctional or disjunctional?
  1065. * Has each single keyword to be found or is one single keyword sufficient?
  1066. *
  1067. * @param string $_logicalMode
  1068. *
  1069. * @return bool
  1070. */
  1071. function setLogicalMode($_logicalMode)
  1072. {
  1073. switch(strtolower($_logicalMode))
  1074. {
  1075. case 'and':
  1076. case 'konj':
  1077. case 'strict':
  1078. case 'sharp':
  1079. $this->logicalMode = ' AND ';
  1080. break;
  1081. case 'or':
  1082. case 'disj':
  1083. case 'simple':
  1084. case 'fuzzy':
  1085. $this->logicalMode = ' OR ';
  1086. break;
  1087. default:
  1088. $this->logicalMode = ' AND ';
  1089. return false;
  1090. }
  1091. $this->hashMe .= $this->logicalMode;
  1092. return true;
  1093. }
  1094. /**
  1095. * Sets the mode concerning which text is to be searched through.
  1096. *
  1097. * You can choose between the original text, the plain text or both texts.
  1098. *
  1099. * @param string $_textMode
  1100. *
  1101. * @return bool
  1102. */
  1103. function setTextMode($_textMode)
  1104. {
  1105. switch(strtolower($_textMode))
  1106. {
  1107. case 'html':
  1108. case 'xhtml':
  1109. case 'unmodified':
  1110. case 'original':
  1111. $this->textMode = 'unmodified';
  1112. break;
  1113. case 'text':
  1114. case 'plain':
  1115. case 'stripped':
  1116. case 'bare':
  1117. case 'simple':
  1118. $this->textMode = 'plain';
  1119. break;
  1120. case 'both':
  1121. case 'all':
  1122. $this->textMode = 'both';
  1123. break;
  1124. default:
  1125. return false;
  1126. }
  1127. $this->hashMe .= $this->textMode;
  1128. return true;
  1129. }
  1130. /**
  1131. * Sets the MySQL search mode.
  1132. *
  1133. * You can choose between like and match
  1134. *
  1135. * @param string $_searchMode
  1136. *
  1137. * @return bool
  1138. */
  1139. function setSearchMode($_searchMode)
  1140. {
  1141. switch(strtolower($_searchMode))
  1142. {
  1143. case 'like':
  1144. case 'match':
  1145. $this->searchMode = strtolower($_searchMode);
  1146. break;
  1147. default:
  1148. return false;
  1149. }
  1150. $this->hashMe .= $this->searchMode;
  1151. return true;
  1152. }
  1153. /**
  1154. * Sets the sort order of the results.
  1155. *
  1156. * The parameter has to be an array with the columns as keys
  1157. * and the direction (DESC or ASC) as value (e.g.: array['COLUMN'] = 'ASC').
  1158. *
  1159. * @param array $_order
  1160. *
  1161. * @return bool
  1162. */
  1163. function setOrder($_order)
  1164. {
  1165. if(!is_array($_order))
  1166. {
  1167. error('Wrong parameter. Expecting an array',E_USER_WARNING);
  1168. return false;
  1169. }
  1170. $i = 0;
  1171. $dir2upper = '';
  1172. $col2upper = '';
  1173. foreach($_order as $col => $dir)
  1174. {
  1175. $i++;
  1176. if('RELEVANCE_587' == ($col2upper = strtoupper((string)$col)))
  1177. {
  1178. error(sprintf('Column %d must not be named "RELEVANCE_587". Column %d is ignored for the sort order',$i,$i));
  1179. }
  1180. else
  1181. {
  1182. if(!in_array($dir2upper = strtoupper((string)$dir), array('ASC','DESC')))
  1183. {
  1184. error(sprintf('Column %d has no correct sort order (ASC or DESC). Descending (DESC) sort order is assumed',$i));
  1185. $dir2upper = 'DESC';
  1186. }
  1187. $this->order[$col2upper] = $dir2upper;
  1188. $this->hashMe .= $col2upper.$dir2upper;
  1189. }
  1190. }
  1191. return true;
  1192. }
  1193. /**
  1194. * Sets the type of the text with the highlighted keywords.
  1195. *
  1196. * @param string $_type
  1197. *
  1198. * @return bool
  1199. */
  1200. function setHighlightType($_type)
  1201. {
  1202. switch($_type)
  1203. {
  1204. case 'sentence':
  1205. case 'paragraph':
  1206. case 'surroundtext':
  1207. case 'surroundtextsingle':
  1208. case 'teaser':
  1209. case 'array':
  1210. $this->highlightType = $_type;
  1211. return true;
  1212. break;
  1213. default:
  1214. $this->highlightType = 'surroundtextsingle';
  1215. return false;
  1216. }
  1217. $this->hashMe .= $this->highlightType;
  1218. }
  1219. /**
  1220. * Converts the search string to an array.
  1221. *
  1222. * Returns the number of search terms.
  1223. *
  1224. * @param string $_searchString
  1225. *
  1226. * @return int
  1227. */
  1228. function parseSearchString($_searchString)
  1229. {
  1230. // reset searchArray
  1231. $this->searchArray = array();
  1232. $matches = array();
  1233. preg_match_all($this->encodeRegex('~(?:(\+*)"([^"]*)")|(?:(\+*)(\S+))~is'), $_searchString, $matches, PREG_SET_ORDER);
  1234. $count = 0;
  1235. $replaceValues = array();
  1236. $sql = new rex_sql();
  1237. foreach($matches as $match)
  1238. {
  1239. if(count($match) == 5)
  1240. {
  1241. // words without double quotes (foo)
  1242. $word = $match[4];
  1243. $plus = $match[3];
  1244. }
  1245. elseif(!empty($match[2]))
  1246. {
  1247. // words with double quotes ("foo bar")
  1248. $word = $match[2];
  1249. $plus = $match[1];
  1250. }
  1251. else
  1252. {
  1253. continue;
  1254. }
  1255. $notBlacklisted = true;
  1256. // blacklisted words are excluded
  1257. foreach($this->blacklist as $blacklistedWord)
  1258. {
  1259. if(preg_match($this->encodeRegex('~\b'.preg_quote($blacklistedWord,'~').'\b~is'), $word))
  1260. {
  1261. $this->blacklisted[] = array($blacklistedWord => $word);
  1262. $notBlacklisted = false;
  1263. }
  1264. }
  1265. if($notBlacklisted)
  1266. {
  1267. // whitelisted words get extra weighted
  1268. $this->searchArray[$count] = array( 'search' => $word,
  1269. 'weight' => strlen($plus) + 1 + (array_key_exists($word,$this->whitelist)?$this->whitelist[$word]:0),
  1270. 'clang' => $this->clang
  1271. );
  1272. $count++;
  1273. }
  1274. }
  1275. return $count;
  1276. }
  1277. /**
  1278. * Which words are important?
  1279. *
  1280. * This method adds weight to special words.
  1281. * If an word already exists, the method adds the weight.
  1282. * Expects an array with the keys containing the words
  1283. * and the values containing the weight to add.
  1284. *
  1285. * @param array $_whitelist
  1286. *
  1287. *
  1288. */
  1289. function addWhitelist($_whitelist)
  1290. {
  1291. foreach($_whitelist as $word => $weight)
  1292. {
  1293. $key = (string)($this->ci?strtolower($word):$word);
  1294. $this->hashMe .= $key;
  1295. $this->whitelist[$key] = intval($this->whitelist[$key]) + intval($weight);
  1296. }
  1297. }
  1298. /**
  1299. * Case sensitive or case insensitive?
  1300. *
  1301. * @param bool $_ci
  1302. *
  1303. * @ignore
  1304. */
  1305. function setCaseInsensitive($_ci = true)
  1306. {
  1307. setCI($_ci);
  1308. }
  1309. /**
  1310. * Case sensitive or case insensitive?
  1311. *
  1312. * @param bool $_ci
  1313. *
  1314. * @ignore
  1315. */
  1316. function setCI($_ci = true)
  1317. {
  1318. $this->ci = (bool) $_ci;
  1319. }
  1320. /**
  1321. * Sets the language-Id.
  1322. *
  1323. * @param mixed $_clang
  1324. *
  1325. *
  1326. */
  1327. function setClang($_clang)
  1328. {
  1329. if($_clang === false)
  1330. $this->clang = false;
  1331. else
  1332. $this->clang = intval($_clang);
  1333. $this->hashMe .= $_clang;
  1334. }
  1335. /**
  1336. * Strips the HTML-Tags from a text and replaces them with spaces or line breaks
  1337. *
  1338. * @param string $_text
  1339. *
  1340. * @return string
  1341. */
  1342. function getPlaintext($_text)
  1343. {
  1344. $process = true;
  1345. $extensionReturn = rex_register_extension_point('A587_PLAINTEXT', $_text);
  1346. if(is_array($extensionReturn))
  1347. {
  1348. $_text = $extensionReturn['text'];
  1349. $process = !empty($extensionReturn['process']);
  1350. }
  1351. elseif(is_string($extensionReturn))
  1352. $_text = $extensionReturn;
  1353. if($process)
  1354. {
  1355. $tags2nl = $this->encodeRegex('~</?(address|blockquote|center|del|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|ins|isindex|menu|noframes|noscript|ol|p|pre|table|ul)[^>]+>~si');
  1356. $_text = trim(strip_tags(preg_replace(array($this->encodeRegex('~<(head|script).+?</(head|script)>~si'), $tags2nl, $this->encodeRegex('~<[^>]+>~si'), $this->encodeRegex('~[\n\r]+~si'), $this->encodeRegex('~[\t ]+~si')), array('',"\n",' ',"\n",' '), $_text)));
  1357. }
  1358. return $_text;
  1359. }
  1360. /**
  1361. * According to the highlight-type this method will return a string or an array.
  1362. * Found keywords will be highlighted with the surround-tags.
  1363. *
  1364. * @param string $_text
  1365. *
  1366. * @return mixed
  1367. */
  1368. function getHighlightedText($_text)
  1369. {
  1370. $tmp_searchArray = $this->searchArray;
  1371. if($this->searchEntities)
  1372. {
  1373. foreach($this->searchArray as $keyword)
  1374. {
  1375. $this->searchArray[] = array('search' => htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8'));
  1376. }
  1377. }
  1378. switch($this->highlightType)
  1379. {
  1380. case 'sentence':
  1381. case 'paragraph':
  1382. // split text at punctuation marks
  1383. if($this->highlightType == 'sentence')
  1384. $regex = '~(\!|\.|\?|[\n]+)~si';
  1385. // split text at line breaks
  1386. if($this->highlightType == 'paragraph')
  1387. $regex = '~([\r\n])~si';
  1388. $Apieces = preg_split($this->encodeRegex($regex), $_text, -1, PREG_SPLIT_DELIM_CAPTURE);
  1389. $search = array();
  1390. $replace = array();
  1391. foreach($this->searchArray as $keyword)
  1392. {
  1393. $search[] = preg_quote($keyword['search'],'~');
  1394. $replace[] = $this->encodeRegex('~'.preg_quote($keyword['search'],'~').'~is');
  1395. }
  1396. $i = 0;
  1397. for($i = 0; $i < count($Apieces); $i++)
  1398. if(preg_match($this->encodeRegex('~('.implode('|',$search).')~is'), $Apieces[$i]))
  1399. break;
  1400. $return = '';
  1401. if($i < count($Apieces))
  1402. $return .= $Apieces[$i];
  1403. $cutted = array();
  1404. preg_match($this->encodeRegex('~^.*?('.implode('|',$search).').{0,'.$this->maxHighlightedTextChars.'}~ims'), $return, $cutted);
  1405. $needEllipses = false;
  1406. if(strlen($cutted[1]) != strlen($return))
  1407. $needEllipses = true;
  1408. $return = preg_replace($replace, $this->surroundTags[0].'$0'.$this->surroundTags[1], substr($cutted[0],0,strrpos($cutted[0],' ')));
  1409. if($needEllipses)
  1410. $return .= ' '.$this->ellipsis;
  1411. return $return;
  1412. break;
  1413. case 'surroundtext':
  1414. case 'surroundtextsingle':
  1415. case 'array':
  1416. $startEllipsis = false;
  1417. $endEllipsis = false;
  1418. $Ahighlighted = array();
  1419. $_text = preg_replace('~\s+~', ' ', $_text);
  1420. $replace = array();
  1421. foreach($this->searchArray as $keyword)
  1422. $replace[] = $this->encodeRegex('~'.preg_quote($keyword['search'],'~').'~is');
  1423. $strlen = mb_strlen($_text);
  1424. $positions = array();
  1425. for($i = 0; $i < count($this->searchArray); $i++)
  1426. {
  1427. $hits = array();
  1428. $offset = 0;
  1429. preg_match_all($this->encodeRegex('~((.{0,'.$this->maxHighlightedTextChars.'})'.preg_quote($this->searchArray[$i]['search'],'~').'(.{0,'.$this->maxHighlightedTextChars.'}))~ims'), $_text, $hits, PREG_SET_ORDER);
  1430. foreach($hits as $hit)
  1431. {
  1432. $offset = strpos($_text, $hit[0], $offset) + 1;
  1433. $currentposition = ceil(intval(($offset - 1) / (2 * $this->maxHighlightedTextChars)));
  1434. if($this->highlightType == 'array' AND !array_key_exists($this->searchArray[$i]['search'], $Ahighlighted))
  1435. $Ahighlighted[$this->searchArray[$i]['search']] = array();
  1436. if(trim($hit[1]) != '')
  1437. {
  1438. $surroundText = $hit[1];
  1439. if(strlen($hit[2]) > 0 AND false !== strpos($hit[2], ' '))
  1440. $surroundText = substr($surroundText, strpos($surroundText, ' '));
  1441. if(strlen($hit[3]) > 0 AND false !== strpos($hit[3], ' '))
  1442. $surroundText = substr($surroundText, 0, strrpos($surroundText,' '));
  1443. if($i == 0 AND strlen($hit[2]) > 0)
  1444. $startEllipsis = true;
  1445. if($i == (count($this->searchArray) - 1) AND strlen($hit[3]) > 0)
  1446. $endEllipsis = true;
  1447. if($this->highlightType == 'array')
  1448. $Ahighlighted[$this->searchArray[$i]['search']][] = preg_replace($replace, $this->surroundTags[0].'$0'.$this->surroundTags[1], trim($surroundText));
  1449. else if(!in_array($currentposition, $positions))
  1450. $Ahighlighted[] = trim($surroundText);
  1451. $positions[] = $currentposition;
  1452. if($this->highlightType == 'surroundtextsingle')
  1453. break;
  1454. }
  1455. }
  1456. }
  1457. if($this->highlightType == 'array')
  1458. return $Ahighlighted;
  1459. $return = implode(' '.$this->ellipsis.' ', $Ahighlighted);
  1460. if($startEllipsis)
  1461. $return = $this->ellipsis.' '.$return;
  1462. if($endEllipsis)
  1463. $return = $return.' '.$this->ellipsis;
  1464. $return = preg_replace($replace, $this->surroundTags[0].'$0'.$this->surroundTags[1], $return);
  1465. return $return;
  1466. break;
  1467. case 'teaser':
  1468. $search = array();
  1469. foreach($this->searchArray as $keyword)
  1470. $search[] = $this->encodeRegex('~'.preg_quote($keyword['search'],'~').'~is');
  1471. return preg_replace($search, $this->surroundTags[0].'$0'.$this->surroundTags[1], $this->getTeaserText($_text));
  1472. break;
  1473. }
  1474. $this->searchArray = $tmp_searchArray;
  1475. }
  1476. /**
  1477. * Gets the teaser of a text.
  1478. *
  1479. * @param string $_text
  1480. *
  1481. * @return string
  1482. */
  1483. function getTeaserText($_text)
  1484. {
  1485. $i = 0;
  1486. $textArray = preg_split($this->encodeRegex('~\s+~si'), $_text, $this->maxTeaserChars);
  1487. $return = '';
  1488. $aborted = false;
  1489. foreach($textArray as $word)
  1490. {
  1491. if((($strlen = strlen($word)) + $i) > $this->maxTeaserChars)
  1492. { $aborted = true;
  1493. break;
  1494. }
  1495. $return .= $word.' ';
  1496. $i += $strlen + 1;
  1497. }
  1498. if($aborted)
  1499. $return .= $this->ellipsis;
  1500. return $return;
  1501. }
  1502. /**
  1503. * Returns if a search term is already cached.
  1504. * The cached result will be stored in $this->cachedArray.
  1505. *
  1506. * @param string $_search
  1507. *
  1508. * @return bool
  1509. */
  1510. function isCached($_search)
  1511. {
  1512. $sql = new rex_sql();
  1513. $sql->setTable($this->tablePrefix.'587_searchcache');
  1514. $sql->setWhere(sprintf("hash = '%s'",$this->cacheHash($_search)));
  1515. if($sql->select('returnarray'))
  1516. {
  1517. foreach($sql->getArray() as $value)
  1518. {
  1519. return false !== ($this->cachedArray = unserialize($value['returnarray']));
  1520. }
  1521. }
  1522. return false;
  1523. }
  1524. /**
  1525. * Calculates the cache hash.
  1526. *
  1527. * @param string $_searchString
  1528. *
  1529. * @return string
  1530. */
  1531. function cacheHash($_searchString)
  1532. {
  1533. return md5($_searchString.$this->hashMe);
  1534. }
  1535. /**
  1536. * Stores a search result in the cache.
  1537. *
  1538. * @param string $_result
  1539. * @param array $_indexIds
  1540. *
  1541. * @return bool
  1542. */
  1543. function cacheSearch($_result, $_indexIds)
  1544. {
  1545. $sql = new rex_sql();
  1546. $sql->setTable($this->tablePrefix.'587_searchcache');
  1547. $sql->setValues(array(
  1548. 'hash' => $this->cacheHash($this->searchString),
  1549. 'returnarray' => $sql->escape($_result)
  1550. )
  1551. );
  1552. $sql->insert();
  1553. $lastId = $sql->getLastId();
  1554. $Ainsert = array();
  1555. foreach($_indexIds as $id)
  1556. {
  1557. $Ainsert[] = sprintf('(%d,%d)',$id,$lastId);
  1558. }
  1559. $sql2 = new rex_sql();
  1560. return $sql2->setQuery(
  1561. sprintf(
  1562. 'INSERT INTO `%s` (index_id,cache_id) VALUES
  1563. %s;',
  1564. $this->tablePrefix.'587_searchcacheindex_ids',
  1565. implode(',',$Ainsert)
  1566. )
  1567. );
  1568. }
  1569. /**
  1570. * Truncates the cache or deletes all data that are concerned with the given index-ids.
  1571. *
  1572. * @param mixed $_indexIds
  1573. *
  1574. *
  1575. */
  1576. function deleteCache($_indexIds = false)
  1577. {
  1578. if($_indexIds === false)
  1579. {
  1580. // delete entire search-chache
  1581. $delete = new rex_sql();
  1582. $delete->setTable($this->tablePrefix.'587_searchcacheindex_ids');
  1583. $delete->delete();
  1584. $delete2 = new rex_sql();
  1585. $delete2->setTable($this->tablePrefix.'587_searchcache');
  1586. $delete2->delete();
  1587. }
  1588. elseif(is_array($_indexIds) AND !empty($_indexIds))
  1589. {
  1590. $sql = new rex_sql();
  1591. $query = sprintf('
  1592. SELECT cache_id
  1593. FROM %s
  1594. WHERE index_id IN (%s)',
  1595. $this->tablePrefix.'587_searchcacheindex_ids',
  1596. implode(',',$_indexIds)
  1597. );
  1598. $deleteIds = array(0);
  1599. foreach($sql->getArray($query) as $cacheId)
  1600. $deleteIds[] = $cacheId['cache_id'];
  1601. // delete from search-cache where indexed IDs exist
  1602. $delete = new rex_sql();
  1603. $delete->setTable($this->tablePrefix.'587_searchcache');
  1604. $delete->setWhere('id IN ('.implode(',',$deleteIds).')');
  1605. $delete->delete();
  1606. // delete the cache-ID and index-ID
  1607. $delete2 = new rex_sql();
  1608. $delete2->setTable($this->tablePrefix.'587_searchcacheindex_ids');
  1609. $delete2->setWhere('cache_id IN ('.implode(',',$deleteIds).')');
  1610. $delete2->delete();
  1611. // delete all cached searches which had no result (because now they maybe will have)
  1612. $delete3 = new rex_sql();
  1613. $delete3->setTable($this->tablePrefix.'587_searchcache');
  1614. $delete3->setWhere(sprintf('id NOT IN (SELECT cache_id FROM `%s`)',$this->tablePrefix.'587_searchcacheindex_ids'));
  1615. $delete3->delete();
  1616. }
  1617. }
  1618. function storeKeywords($_keywords, $_doCount = true)
  1619. {
  1620. // store similar words
  1621. $simWordsSQL = new rex_sql();
  1622. $simWords = array();
  1623. foreach($_keywords as $keyword)
  1624. {
  1625. if(
  1626. !in_array(mb_strtolower($keyword['search'], 'UTF-8'), $this->blacklist) AND
  1627. !in_array(mb_strtolower($keyword['search'], 'UTF-8'), $this->stopwords)
  1628. )
  1629. {
  1630. $simWords[] = sprintf(
  1631. "('%s', '%s', '%s', '%s', %s)",
  1632. $simWordsSQL->escape($keyword['search']),
  1633. ($this->similarwordsMode & A587_SIMILARWORDS_SOUNDEX)?soundex($keyword['search']):'',
  1634. ($this->similarwordsMode & A587_SIMILARWORDS_METAPHONE)?metaphone($keyword['search']):'',
  1635. ($this->similarwordsMode & A587_SIMILARWORDS_COLOGNEPHONE)?$this->cologne_phone($keyword['search']):'',
  1636. (isset($keyword['clang']) AND $keyword['clang']!==false)?$keyword['clang']:'-1'
  1637. );
  1638. }
  1639. }
  1640. if(!empty($simWords))
  1641. {
  1642. $simWordsSQL->setQuery(
  1643. sprintf("
  1644. INSERT INTO `%s`
  1645. (keyword, soundex, metaphone, colognephone, clang)
  1646. VALUES
  1647. %s
  1648. ON DUPLICATE KEY UPDATE count = count + %d",
  1649. $this->tablePrefix.'587_keywords',
  1650. implode(',', $simWords),
  1651. $_doCount ? 1 : 0
  1652. )
  1653. );
  1654. }
  1655. }
  1656. function deleteKeywords()
  1657. {
  1658. $kw_sql = new rex_sql();
  1659. return $kw_sql->setQuery(sprintf('TRUNCATE TABLE `%s`', $this->tablePrefix.'587_keywords'));
  1660. }
  1661. /**
  1662. * Executes the search.
  1663. *
  1664. * @param string $_search
  1665. *
  1666. * @return array
  1667. */
  1668. function search($_search)
  1669. {
  1670. $startTime = microtime(true);
  1671. $this->searchString = trim(stripslashes($_search));
  1672. $keywordCount = $this->parseSearchString($this->searchString);
  1673. if(empty($this->searchString) OR empty($this->searchArray))
  1674. {
  1675. return array(
  1676. 'count' => 0,
  1677. 'hits' => array(),
  1678. 'keywords' => array(),
  1679. 'keywords' => '',
  1680. 'sql' => 'No search performed.',
  1681. 'blacklisted' => false,
  1682. 'hash' => '',
  1683. 'simwordsnewsearch' => '',
  1684. 'simwords' => array(),
  1685. 'time' => 0
  1686. );
  1687. }
  1688. // ask cache
  1689. if($this->cache AND $this->isCached($this->searchString))
  1690. {
  1691. $this->cachedArray['time'] = microtime(true) - $startTime;
  1692. if($this->similarwords AND $this->cachedArray['count'] > 0)
  1693. {
  1694. $this->storeKeywords($this->searchArray);
  1695. }
  1696. // EP registrieren
  1697. rex_register_extension_point('A587_SEARCH_EXECUTED', $this->cachedArray);
  1698. //var_dump($this->cachedArray['sql']);
  1699. return $this->cachedArray;
  1700. }
  1701. $return = array();
  1702. $return['simwordsnewsearch'] = '';
  1703. $return['simwords'] = array();
  1704. if($this->similarwords)
  1705. {
  1706. $simwords = array();
  1707. foreach($this->searchArray as $keyword)
  1708. {
  1709. $sounds = array();
  1710. if($this->similarwordsMode & A587_SIMILARWORDS_SOUNDEX)
  1711. $sounds[] = "soundex = '".soundex($keyword['search'])."'";
  1712. if($this->similarwordsMode & A587_SIMILARWORDS_METAPHONE)
  1713. $sounds[] = "metaphone = '".metaphone($keyword['search'])."'";
  1714. if($this->similarwordsMode & A587_SIMILARWORDS_COLOGNEPHONE)
  1715. $sounds[] = "colognephone = '".$this->cologne_phone($keyword['search'])."'";
  1716. $simwords[] = sprintf("
  1717. SELECT
  1718. GROUP_CONCAT(DISTINCT keyword SEPARATOR ' ') as keyword,
  1719. '%s' AS typedin,
  1720. SUM(count) as count
  1721. FROM `%s`
  1722. WHERE 1
  1723. %s
  1724. AND (%s)",
  1725. $keyword['search'],
  1726. $this->tablePrefix.'587_keywords',
  1727. ($this->clang !== false) ? 'AND (clang = '.intval($this->clang).' OR clang IS NULL)' : '',
  1728. implode(' OR ', $sounds)
  1729. );
  1730. }
  1731. // simwords
  1732. $simWordsSQL = new rex_sql();
  1733. foreach($simWordsSQL->getArray(sprintf("
  1734. %s
  1735. GROUP BY %s
  1736. ORDER BY SUM(count)",
  1737. implode(' UNION ', $simwords),
  1738. $this->similarwordsPermanent ? "''" : 'keyword, typedin'
  1739. )
  1740. ) as $simword)
  1741. {
  1742. $return['simwords'][$simword['typedin']] = array(
  1743. 'keyword' => $simword['keyword'],
  1744. 'typedin' => $simword['typedin'],
  1745. 'count' => $simword['count'],
  1746. );
  1747. }
  1748. $newsearch = array();
  1749. foreach($this->searchArray as $keyword)
  1750. {
  1751. if(preg_match($this->encodeRegex('~\s~is'), $keyword['search']))
  1752. $quotes = '"';
  1753. else
  1754. $quotes = '';
  1755. if(array_key_exists($keyword['search'], $return['simwords']))
  1756. {
  1757. $newsearch[] = $quotes.$return['simwords'][$keyword['search']]['keyword'].$quotes;
  1758. }
  1759. else
  1760. {
  1761. $newsearch[] = $quotes.$keyword['search'].$quotes;
  1762. }
  1763. }
  1764. $return['simwordsnewsearch'] = implode(' ', $newsearch);
  1765. }
  1766. if($this->similarwordsPermanent)
  1767. $keywordCount = $this->parseSearchString($this->searchString.' '.$return['simwordsnewsearch']);
  1768. $searchColumns = array();
  1769. switch($this->textMode)
  1770. {
  1771. case 'unmodified':
  1772. $searchColumns[] = 'unchangedtext';
  1773. break;
  1774. case 'both':
  1775. $searchColumns[] = 'plaintext';
  1776. $searchColumns[] = 'unchangedtext';
  1777. break;
  1778. default:
  1779. $searchColumns[] = 'plaintext';
  1780. }
  1781. $sql = new rex_sql();
  1782. $Awhere = array();
  1783. $Amatch = array();
  1784. foreach($this->searchArray as $keyword)
  1785. {
  1786. // build MATCH-Array
  1787. $match = sprintf("(( MATCH (`%s`) AGAINST ('%s')) * %d)", implode('`,`',$searchColumns), $sql->escape($keyword['search']), $keyword['weight']);
  1788. if($this->searchEntities)
  1789. {
  1790. $match .= ' + '.sprintf("(( MATCH (`%s`) AGAINST ('%s')) * %d)", implode('`,`',$searchColumns), $sql->escape(htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8')), $keyword['weight']);
  1791. }
  1792. $Amatch[] = $match;
  1793. // build WHERE-Array
  1794. if($this->searchMode == 'match')
  1795. {
  1796. $AWhere[] = $match;
  1797. }
  1798. else
  1799. {
  1800. $tmpWhere = array();
  1801. foreach($searchColumns as $searchColumn)
  1802. {
  1803. $tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%','_'),array('\%','\_'),$sql->escape($keyword['search'])));
  1804. if($this->searchEntities)
  1805. {
  1806. $tmpWhere[] = sprintf("(`%s` LIKE '%%%s%%')", $searchColumn, str_replace(array('%','_'),array('\%','\_'),$sql->escape(htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8'))));
  1807. }
  1808. }
  1809. $AWhere[] = '('.implode(' OR ',$tmpWhere).')';
  1810. }
  1811. /*if($this->logicalMode == ' AND ')
  1812. $Awhere[] = '+*'.$keyword['search'].'*';
  1813. else
  1814. $AWhere[] = '*'.$keyword['search'].'*';*/
  1815. }
  1816. // build MATCH-String
  1817. $match = '('.implode(' + ',$Amatch).' + 1)';
  1818. // build WHERE-String
  1819. $where = '('.implode($this->logicalMode,$AWhere).')';
  1820. #$where = sprintf("( MATCH (%s) AGAINST ('%s' IN BOOLEAN MODE)) > 0",implode(',',$searchColumns),implode(' ',$Awhere));
  1821. // language
  1822. if($this->clang !== false)
  1823. $where .= ' AND (clang = '.intval($this->clang).' OR clang IS NULL)';
  1824. $AwhereToSearch = array();
  1825. if(array_key_exists('articles',$this->searchInIDs) AND count($this->searchInIDs['articles']))
  1826. {
  1827. $AwhereToSearch[] = "texttype = 'article'";
  1828. $AwhereToSearch[] = "(fid IN (".implode(',',$this->searchInIDs['articles'])."))";
  1829. }
  1830. if(array_key_exists('categories',$this->searchInIDs) AND count($this->searchInIDs['categories']))
  1831. {
  1832. $AwhereToSearch[] = "(catid IN (".implode(',',$this->searchInIDs['categories']).") AND ftable = '".$sql->escape($this->tablePrefix)."article')";
  1833. }
  1834. if(array_key_exists('filecategories',$this->searchInIDs) AND count($this->searchInIDs['filecategories']))
  1835. {
  1836. $AwhereToSearch[] = "(catid IN (".implode(',',$this->searchInIDs['filecategories']).") AND ftable = '".$sql->escape($this->tablePrefix)."file')";
  1837. }
  1838. if(array_key_exists('db_columns',$this->searchInIDs) AND count($this->searchInIDs['db_columns']))
  1839. {
  1840. $AwhereToSearch[] = "texttype = 'db_column'";
  1841. $Acolumns = array();
  1842. foreach($this->searchInIDs['db_columns'] as $table => $colArray)
  1843. {
  1844. foreach($colArray as $column)
  1845. {
  1846. //$Acolumns[] = sprintf("(ftable = '%s' AND fcolumn = '%s' %s)", $table, $column, $strSearchArticles);
  1847. $Acolumns[] = sprintf("(ftable = '%s' AND fcolumn = '%s')", $table, $column);
  1848. }
  1849. }
  1850. $AwhereToSearch[] = '('.implode(' OR ',$Acolumns).')';
  1851. }
  1852. if(count($AwhereToSearch))
  1853. {
  1854. if($this->searchArticles)
  1855. $where .= " AND ((texttype = 'article') OR (".implode(' AND ',$AwhereToSearch).'))';
  1856. else
  1857. $where .= ' AND ('.implode(' AND ',$AwhereToSearch).')';
  1858. }
  1859. if(!empty($this->where))
  1860. $where .= ' AND ('.$this->where.')';
  1861. // build ORDER-BY-String
  1862. $Aorder = array();
  1863. foreach($this->order as $col => $dir)
  1864. $Aorder[] = $col.' '.$dir;
  1865. $selectFields = array();
  1866. if($this->groupBy)
  1867. {
  1868. $selectFields[] = sprintf('(SELECT SUM%s FROM `%s` summe WHERE summe.fid = r1.fid AND summe.ftable = r1.ftable) AS RELEVANCE587', $match, $this->tablePrefix.'587_searchindex');
  1869. $selectFields[] = sprintf('(SELECT COUNT(*) FROM `%s` summe WHERE summe.fid = r1.fid AND (summe.ftable IS NULL OR summe.ftable = r1.ftable) AND (summe.fcolumn IS NULL OR summe.fcolumn = r1.fcolumn) AND summe.texttype = r1.texttype) AS COUNT587', $this->tablePrefix.'587_searchindex');
  1870. }
  1871. else
  1872. {
  1873. $selectFields[] = $match.' AS RELEVANCE587';
  1874. }
  1875. $selectFields[] = '`id`';
  1876. $selectFields[] = '`fid`';
  1877. $selectFields[] = '`catid`';
  1878. $selectFields[] = '`ftable`';
  1879. $selectFields[] = '`fcolumn`';
  1880. $selectFields[] = '`texttype`';
  1881. $selectFields[] = '`clang`';
  1882. $selectFields[] = '`unchangedtext`';
  1883. $selectFields[] = '`plaintext`';
  1884. $selectFields[] = '`teaser`';
  1885. $selectFields[] = '`values`';
  1886. $selectFields[] = '`filename`';
  1887. $selectFields[] = '`fileext`';
  1888. if($this->groupBy)
  1889. {
  1890. $query = sprintf('
  1891. SELECT SQL_CALC_FOUND_ROWS %s
  1892. FROM `%s` r1
  1893. WHERE (%s) AND (
  1894. (
  1895. %s = (SELECT MAX%s FROM `%s` r2 WHERE r1.ftable = r2.ftable AND r1.fid = r2.fid %s)
  1896. AND fid IS NOT NULL
  1897. ) OR
  1898. ftable IS NULL
  1899. )
  1900. GROUP BY ftable,fid,clang
  1901. ORDER BY %s
  1902. LIMIT %d,%d',
  1903. implode(",\n",$selectFields),
  1904. $this->tablePrefix.'587_searchindex',
  1905. $where,
  1906. $match,
  1907. $match,
  1908. $this->tablePrefix.'587_searchindex',
  1909. ($this->clang !== false) ? 'AND (clang = '.intval($this->clang).' OR clang IS NULL)' : '',
  1910. implode(",\n",$Aorder),
  1911. $this->limit[0],$this->limit[1]
  1912. );
  1913. }
  1914. else
  1915. {
  1916. $query = sprintf('
  1917. SELECT SQL_CALC_FOUND_ROWS %s
  1918. FROM `%s`
  1919. WHERE %s
  1920. ORDER BY %s
  1921. LIMIT %d,%d',
  1922. implode(",\n",$selectFields),
  1923. $this->tablePrefix.'587_searchindex',
  1924. $where,
  1925. implode(",\n",$Aorder),
  1926. $this->limit[0],$this->limit[1]
  1927. );
  1928. }
  1929. #echo '<pre>'.$query.'</pre>';
  1930. $sqlResult = $sql->getArray($query);
  1931. $indexIds = array();
  1932. $count = 0;
  1933. $sqlResultCount = $sql->getArray('SELECT FOUND_ROWS() as count');
  1934. $return['count'] = intval($sqlResultCount[0]['count']);
  1935. // hits
  1936. $return['hits'] = array();
  1937. $i = 0;
  1938. foreach($sqlResult as $hit)
  1939. {
  1940. $indexIds[] = $hit['id'];
  1941. $return['hits'][$i] = array();
  1942. $return['hits'][$i]['id'] = $hit['id'];
  1943. $return['hits'][$i]['fid'] = $hit['fid'];
  1944. if(!is_numeric($hit['fid']) AND !is_null($json_decode_fid = json_decode($hit['fid'], true)))
  1945. $return['hits'][$i]['fid'] = $json_decode_fid;
  1946. $return['hits'][$i]['table'] = $hit['ftable'];
  1947. $return['hits'][$i]['column'] = $hit['fcolumn'];
  1948. $return['hits'][$i]['type'] = $hit['texttype'];
  1949. $return['hits'][$i]['clang'] = $hit['clang'];
  1950. $return['hits'][$i]['unchangedtext'] = $hit['unchangedtext'];
  1951. $return['hits'][$i]['plaintext'] = $hit['plaintext'];
  1952. $return['hits'][$i]['teaser'] = $this->getTeaserText($hit['plaintext']);
  1953. $return['hits'][$i]['highlightedtext'] = $this->getHighlightedText($hit['plaintext']);
  1954. $return['hits'][$i]['article_teaser'] = $hit['teaser'];
  1955. $return['hits'][$i]['values'] = a587_config_unserialize($hit['values']);
  1956. $return['hits'][$i]['filename'] = $hit['filename'];
  1957. $return['hits'][$i]['fileext'] = $hit['fileext'];
  1958. $i++;
  1959. if($this->groupBy)
  1960. $count += $hit['COUNT587'];
  1961. }
  1962. if($this->groupBy)
  1963. {
  1964. $indexIds = array();
  1965. foreach($sql->getArray(
  1966. sprintf('
  1967. SELECT id
  1968. FROM `%s`
  1969. WHERE %s
  1970. LIMIT %d,%d',
  1971. $this->tablePrefix.'587_searchindex',
  1972. $where,
  1973. $this->limit[0],$count
  1974. )
  1975. ) as $hit)
  1976. {
  1977. $indexIds[] = $hit['id'];
  1978. }
  1979. }
  1980. // keywords, which were searched for
  1981. $return['keywords'] = $this->searchArray;
  1982. $return['searchterm'] = $this->searchString;
  1983. // sql
  1984. $return['sql'] = $query;
  1985. // was any blacklisted word searched for?
  1986. $return['blacklisted'] = false;
  1987. if(count($this->blacklisted) > 0)
  1988. $return['blacklisted'] = $this->blacklisted;
  1989. $return['hash'] = $this->cacheHash($this->searchString);
  1990. if($this->similarwords AND $i)
  1991. {
  1992. $this->storeKeywords($this->searchArray);
  1993. }
  1994. if($this->cache)
  1995. $this->cacheSearch(serialize($return), $indexIds);
  1996. // EP registrieren
  1997. rex_register_extension_point('A587_SEARCH_EXECUTED', $return);
  1998. $return['time'] = microtime(true) - $startTime;
  1999. return $return;
  2000. }
  2001. }