PageRenderTime 75ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 1ms

/classes/class.rexsearch.inc.php

https://github.com/xong/rexsearch
PHP | 2361 lines | 1640 code | 345 blank | 376 comment | 181 complexity | e3cae78c72e0ca8b27c40a1591236852 MD5 | raw file
Possible License(s): LGPL-2.1

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * Class rexsearch
  4. *
  5. * This class is still being tested.
  6. * Please report errors at http://forum.redaxo.de.
  7. *
  8. * @author Robert Rupf
  9. * @package rexsearch
  10. */
  11. /**
  12. *
  13. */
  14. define('A587_ART_EXCLUDED',0);
  15. define('A587_ART_IDNOTFOUND',1);
  16. define('A587_ART_GENERATED',2);
  17. define('A587_ART_REDIRECT',3);
  18. define('A587_FILE_NOEXIST',0);
  19. define('A587_FILE_XPDFERR_OPENSRC',1);
  20. define('A587_FILE_XPDFERR_OPENDEST',2);
  21. define('A587_FILE_XPDFERR_PERM',3);
  22. define('A587_FILE_XPDFERR_OTHER',4);
  23. define('A587_FILE_FORBIDDEN_EXTENSION',5);
  24. define('A587_FILE_GENERATED',6);
  25. define('A587_FILE_EMPTY',7);
  26. define('A587_SIMILARWORDS_NONE',0);
  27. define('A587_SIMILARWORDS_SOUNDEX',1);
  28. define('A587_SIMILARWORDS_METAPHONE',2);
  29. define('A587_SIMILARWORDS_COLOGNEPHONE',4);
  30. define('A587_SIMILARWORDS_ALL',7);
  31. /**
  32. * @package rexsearch
  33. */
  34. class RexSearch
  35. {
  36. var $searchArticles = false;
  37. var $blacklist = array();
  38. var $blacklisted = array();
  39. var $cache = true;
  40. var $cachedArray = array();
  41. /**
  42. * @ignore
  43. */
  44. var $ci = true; // case insensitive?
  45. var $clang = false;
  46. var $documentRoot;
  47. var $dontIndexRedirects = true;
  48. var $ellipsis;
  49. var $ep_outputfilter = false;
  50. var $excludeIDs = array();
  51. var $fileExtensions = array();
  52. var $groupBy = true;
  53. var $hashMe = '';
  54. var $highlightType = 'surroundtext';
  55. var $includeColumns = array();
  56. var $includeDirectories = array();
  57. var $includePath;
  58. var $generatedPath;
  59. var $indexUnknownFileExtensions = false;
  60. var $indexMediapool = false;
  61. var $indexMissingFileExtensions = false;
  62. var $indexOffline = false;
  63. var $indexViaHTTP = false;
  64. var $indexWithTemplate = false;
  65. var $languages;
  66. var $limit = array(0,10);
  67. var $logicalMode = ' AND ';
  68. var $maxHighlightedTextChars = 100;
  69. var $maxTeaserChars = 200;
  70. var $mediaFolder;
  71. var $order = array('RELEVANCE587' => 'DESC');
  72. var $redaxo = false;
  73. var $searchArray = array();
  74. var $searchEntities = false;
  75. var $searchInIDs = array();
  76. var $searchMode = 'like';
  77. var $searchString = '';
  78. var $significantCharacterCount = 3;
  79. var $similarwords = false;
  80. var $similarwordsMode = 0;
  81. var $similarwordsPermanent = false;
  82. var $stopwords = array();
  83. var $surroundTags = array('<strong>','</strong>');
  84. var $tablePrefix;
  85. var $textMode = 'plain';
  86. var $whitelist = array();
  87. var $where = '';
  88. #function __construct($_clang = false, $_loadSettings = true)
  89. function RexSearch($_clang = false, $_loadSettings = true, $_useStopwords = true)
  90. {
  91. global $REX,$I18N;
  92. if($_loadSettings)
  93. {
  94. foreach($REX['ADDON']['settings']['rexsearch'] as $key => $value)
  95. {
  96. switch($key)
  97. {
  98. case 'logicalmode':
  99. $this->setLogicalMode($value);
  100. break;
  101. case 'textmode':
  102. $this->setTextMode($value);
  103. break;
  104. case 'searchmode':
  105. $this->setSearchMode($value);
  106. break;
  107. case 'surroundtags':
  108. $this->setSurroundTags($value);
  109. break;
  110. case 'limit':
  111. $this->setLimit($value);
  112. break;
  113. case 'ci':
  114. $this->setCI($value);
  115. break;
  116. case 'blacklist':
  117. $this->setBlacklist(is_array($value)?$value:array());
  118. break;
  119. case 'exclude_article_ids':
  120. $this->setExcludeIDs($value);
  121. break;
  122. case 'exclude_category_ids':
  123. if(is_array($value))
  124. {
  125. $ids = array();
  126. foreach($value as $catID)
  127. {
  128. foreach(a587_getArticles(array($catID)) as $id => $name)
  129. $ids[] = $id;
  130. $this->setExcludeIDs($ids);
  131. }
  132. }
  133. break;
  134. case 'include':
  135. $this->setIncludeColumns($value);
  136. break;
  137. case 'maxteaserchars':
  138. $this->setMaxTeaserChars($value);
  139. break;
  140. case 'maxhighlightchars':
  141. $this->setMaxHighlightedTextChars($value);
  142. break;
  143. case 'highlight':
  144. $this->setHighlightType($value);
  145. break;
  146. case 'indexmode':
  147. $this->indexViaHTTP = intval($value) == 0;
  148. $this->indexWithTemplate = intval($value) == 2;
  149. break;
  150. case 'indexoffline':
  151. $this->indexOffline = $value == '1';
  152. break;
  153. case 'similarwordsmode':
  154. $this->similarwordsMode = intval($value);
  155. $this->similarwords = !!intval($value);
  156. break;
  157. case 'similarwords_permanent':
  158. $this->similarwordsPermanent = !!intval($value);
  159. break;
  160. case 'fileextensions':
  161. $this->fileExtensions = $value;
  162. break;
  163. case 'indexfolders':
  164. $this->includeDirectories = $value;
  165. break;
  166. case 'indexmediapool':
  167. $this->indexMediapool = !!intval($value);
  168. break;
  169. case 'ep_outputfilter':
  170. $this->ep_outputfilter = !!intval($value);
  171. break;
  172. }
  173. }
  174. }
  175. $this->setClang($_clang);
  176. $this->languages = $REX['CLANG'];
  177. $this->tablePrefix = $REX['TABLE_PREFIX'];
  178. $this->includePath = $REX['INCLUDE_PATH'];
  179. $this->generatedPath = $REX['GENERATED_PATH'];
  180. $this->documentRoot = realpath($_SERVER['DOCUMENT_ROOT']);
  181. $this->mediaFolder = $REX['MEDIAFOLDER'];
  182. $locale = 'de_de';
  183. $langfile = new i18n($locale, $REX['INCLUDE_PATH'].'/addons/rexsearch/lang/');
  184. $this->ellipsis = $langfile->Msg('a587_ellipsis');
  185. // german stopwords
  186. if($_useStopwords)
  187. {
  188. include $this->includePath.'/addons/rexsearch/lang/stopwords.inc.php';
  189. $this->stopwords = $german_stopwords;
  190. }
  191. }
  192. /**
  193. * A function for retrieving the K?lner Phonetik value of a string
  194. *
  195. * As described at http://de.wikipedia.org/wiki/K?lner_Phonetik
  196. * Based on Hans Joachim Postel: Die K?lner Phonetik.
  197. * Ein Verfahren zur Identifizierung von Personennamen auf der
  198. * Grundlage der Gestaltanalyse.
  199. * in: IBM-Nachrichten, 19. Jahrgang, 1969, S. 925-931
  200. *
  201. * This program is distributed in the hope that it will be useful,
  202. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  203. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  204. * GNU General Public License for more details.
  205. *
  206. * @package phonetics
  207. * @version 1.0
  208. * @link http://www.einfachmarke.de
  209. * @license GPL 3.0 <http://www.gnu.org/licenses/>
  210. * @copyright 2008 by einfachmarke.de
  211. * @author Nicolas Zimmer <nicolas dot zimmer at einfachmarke.de>
  212. */
  213. function cologne_phone($_word)
  214. {
  215. /**
  216. * @param string $_word string to be analyzed
  217. * @return string $value represents the K?lner Phonetik value
  218. * @access public
  219. */
  220. //prepare for processing
  221. $_word = strtolower($_word);
  222. $substitution = array(
  223. '?'=>'a',
  224. '?'=>'o',
  225. '?'=>'u',
  226. '?'=>'ss',
  227. 'ph'=>'f'
  228. );
  229. foreach($substitution as $letter => $substitution)
  230. $_word = str_replace($letter, $substitution, $_word);
  231. $len = strlen($_word);
  232. //Rule for exeptions
  233. $exceptionsLeading = array(
  234. 4=>array('ca','ch','ck','cl','co','cq','cu','cx'),
  235. 8=>array('dc','ds','dz','tc','ts','tz')
  236. );
  237. $exceptionsFollowing = array('sc','zc','cx','kx','qx');
  238. //Table for coding
  239. $codingTable = array(
  240. 0 => array('a','e','i','j','o','u','y'),
  241. 1 => array('b','p'),
  242. 2 => array('d','t'),
  243. 3 => array('f','v','w'),
  244. 4 => array('c','g','k','q'),
  245. 48 => array('x'),
  246. 5 => array('l'),
  247. 6 => array('m','n'),
  248. 7 => array('r'),
  249. 8 => array('c','s','z')
  250. );
  251. for($i=0; $i < $len; $i++)
  252. {
  253. $value[$i] = '';
  254. //Exceptions
  255. if($i==0 AND $len > 1 AND $_word[$i].$_word[$i+1] == 'cr')
  256. $value[$i] = 4;
  257. if($i < ($len - 1))
  258. {
  259. foreach($exceptionsLeading as $code=>$letters)
  260. {
  261. if(in_array($_word[$i].$_word[$i+1],$letters))
  262. $value[$i] = $code;
  263. }
  264. }
  265. if($i AND in_array($_word[$i-1].$_word[$i], $exceptionsFollowing))
  266. $value[$i] = 8;
  267. //Normal encoding
  268. if($value[$i] == '')
  269. {
  270. foreach($codingTable as $code => $letters)
  271. {
  272. if(in_array($_word[$i], $letters))
  273. $value[$i] = $code;
  274. }
  275. }
  276. }
  277. //delete double values
  278. $len=count($value);
  279. for($i=1;$i<$len;$i++)
  280. {
  281. if($value[$i] == $value[$i-1])
  282. $value[$i] = '';
  283. }
  284. //delete vocals
  285. for ($i=1;$i>$len;$i++)
  286. {
  287. //omitting first characer code and h
  288. if($value[$i] == 0)
  289. $value[$i] = '';
  290. }
  291. $value = array_filter($value);
  292. $value = implode('', $value);
  293. return $value;
  294. }
  295. function doSearchArticles($_bool = false)
  296. {
  297. $this->searchArticles = $_bool;
  298. $this->hashMe .= $_bool;
  299. }
  300. function doGroupBy($_bool = true)
  301. {
  302. $this->groupBy = $_bool;
  303. $this->hashMe .= $_bool;
  304. }
  305. /**
  306. *
  307. */
  308. function setSearchInIDs($_searchInIDs, $_reset = false)
  309. {
  310. if($_reset)
  311. $this->searchInIDs = array();
  312. if(array_key_exists('articles',$_searchInIDs))
  313. {
  314. if(!array_key_exists('articles',$this->searchInIDs))
  315. $this->searchInIDs['articles'] = array();
  316. foreach($_searchInIDs['articles'] as $id)
  317. {
  318. if($id = intval($id))
  319. {
  320. $this->searchInIDs['articles'][] = $id;
  321. $this->hashMe .= 'a'.$id;
  322. }
  323. }
  324. }
  325. if(array_key_exists('categories',$_searchInIDs))
  326. {
  327. if(!array_key_exists('categories',$this->searchInIDs))
  328. $this->searchInIDs['categories'] = array();
  329. foreach($_searchInIDs['categories'] as $id)
  330. {
  331. if($id = intval($id))
  332. {
  333. $this->searchInIDs['categories'][] = $id;
  334. $this->hashMe .= 'c'.$id;
  335. }
  336. }
  337. }
  338. if(array_key_exists('filecategories',$_searchInIDs))
  339. {
  340. if(!array_key_exists('filecategories',$this->searchInIDs))
  341. $this->searchInIDs['filecategories'] = array();
  342. foreach($_searchInIDs['filecategories'] as $id)
  343. {
  344. if($id = intval($id))
  345. {
  346. $this->searchInIDs['filecategories'][] = $id;
  347. $this->hashMe .= 'f'.$id;
  348. }
  349. }
  350. }
  351. if(array_key_exists('db_columns',$_searchInIDs))
  352. {
  353. if(!array_key_exists('db_columns',$this->searchInIDs))
  354. $this->searchInIDs['db_columns'] = array();
  355. foreach($_searchInIDs['db_columns'] as $table => $columnArray)
  356. {
  357. $this->hashMe .= $table;
  358. $tmp = array();
  359. foreach($columnArray as $column)
  360. {
  361. $tmp[] = $column;
  362. $this->hashMe .= $column;
  363. }
  364. if(!array_key_exists($table,$this->searchInIDs['db_columns']))
  365. $this->searchInIDs['db_columns'][$table] = $tmp;
  366. else
  367. $this->searchInIDs['db_columns'][$table] = array_merge($this->searchInIDs['db_columns'][$table],$tmp);
  368. }
  369. }
  370. }
  371. /**
  372. * If utf8-encoding is used, the parameter will be appended with an "u".
  373. * Since there is only UTF-8 supported, it always appends the "u".
  374. *
  375. * @param string $_regex
  376. * @return string
  377. */
  378. function encodeRegex($_regex)
  379. {
  380. return $_regex.'u';
  381. }
  382. /**
  383. * Simulates the frontend by setting $REX['REDAXO'] to false.
  384. * The original state is saved in $this->redaxo.
  385. */
  386. function beginFrontendMode()
  387. {
  388. global $REX;
  389. $this->redaxo = $REX['REDAXO'];
  390. $REX['REDAXO'] = false;
  391. }
  392. /**
  393. * Ends the frontend-mode by setting $REX['REDAXO'] to the original state.
  394. */
  395. function endFrontendMode()
  396. {
  397. global $REX;
  398. $REX['REDAXO'] = $this->redaxo;
  399. }
  400. /**
  401. * Sets the maximum count of letters the teaser of a searched through text may have.
  402. *
  403. * @param int $_count
  404. */
  405. function setMaxTeaserChars($_count)
  406. {
  407. $this->maxTeaserChars = intval($_count);
  408. $this->hashMe .= $_count;
  409. }
  410. /**
  411. * Sets the maximum count of letters around an found search term in the highlighted text.
  412. * @param int $_count
  413. */
  414. function setMaxHighlightedTextChars($_count)
  415. {
  416. $this->maxHighlightedTextChars = intval($_count);
  417. $this->hashMe .= $_count;
  418. }
  419. /**
  420. * Generates the full index at once.
  421. */
  422. function generateIndex()
  423. {
  424. // delete old index
  425. $delete = new rex_sql();
  426. $delete->setTable($this->tablePrefix.'587_searchindex');
  427. $delete->delete();
  428. $delete2 = new rex_sql();
  429. $delete2->setTable($this->tablePrefix.'587_searchcacheindex_ids');
  430. $delete2->delete();
  431. $delete3 = new rex_sql();
  432. $delete3->setTable($this->tablePrefix.'587_searchcache');
  433. $delete3->delete();
  434. // index articles
  435. $art_sql = new rex_sql();
  436. $art_sql->setTable($this->tablePrefix.'article');
  437. if($art_sql->select('id,clang'))
  438. {
  439. foreach($art_sql->getArray() as $art)
  440. {
  441. $this->indexArticle($art['id'], $art['clang']);
  442. }
  443. }
  444. // index columns
  445. foreach($this->includeColumns as $table => $columnArray)
  446. {
  447. foreach($columnArray as $column)
  448. {
  449. $this->indexColumn($table, $column);
  450. }
  451. }
  452. // index mediapool
  453. if($this->indexMediapool)
  454. {
  455. $mediaSQL = new rex_sql();
  456. $mediaSQL->setTable($this->tablePrefix.'file');
  457. if($mediaSQL->select('file_id, category_id, filename'))
  458. {
  459. foreach($mediaSQL->getArray() as $file)
  460. {
  461. $this->indexFile(str_replace('\\','/',substr($this->mediaFolder, strlen($this->documentRoot))).'/'.$file['filename'], false, false, $file['file_id'], $file['category_id']);
  462. }
  463. }
  464. }
  465. // index files
  466. foreach($this->includeDirectories as $dir)
  467. {
  468. foreach(a587_getFiles($dir, $this->fileExtensions) as $filename)
  469. {
  470. $this->indexFile($filename);
  471. }
  472. }
  473. }
  474. /**
  475. * Indexes a certain article.
  476. *
  477. * @param int $_id
  478. * @param mixed $_clang
  479. *
  480. * @return int
  481. */
  482. function indexArticle($_id,$_clang = false)
  483. {
  484. global $REX;
  485. if($_clang === false)
  486. $langs = $this->languages;
  487. else
  488. $langs = array(intval($_clang) => $this->languages[intval($_clang)]);
  489. $return = array();
  490. $keywords = array();
  491. foreach($langs as $langID => $v)
  492. {
  493. if(in_array($_id, $this->excludeIDs))
  494. {
  495. $return[$v] = A587_ART_EXCLUDED;
  496. continue;
  497. }
  498. $REX['CUR_CLANG'] = $langID;
  499. $delete = new rex_sql();
  500. $where = sprintf("ftable = '%s' AND fid = %d AND clang = %d", $delete->escape($this->tablePrefix.'article'), $_id, $langID);
  501. // delete from cache
  502. $select = new rex_sql();
  503. $select->setTable($this->tablePrefix.'587_searchindex');
  504. $select->setWhere($where);
  505. $select->select('id');
  506. $indexIds = array();
  507. foreach($select->getArray() as $result)
  508. $indexIds[] = $result['id'];
  509. $this->deleteCache($indexIds);
  510. // delete old
  511. $delete->setTable($this->tablePrefix.'587_searchindex');
  512. $delete->setWhere($where);
  513. $delete->delete();
  514. // index article
  515. $article = OOArticle::getArticleById(intval($_id), $langID);
  516. if(is_object($article) AND ($article->isOnline() OR $this->indexOffline))
  517. {
  518. $this->beginFrontendMode();
  519. if(ini_get('allow_url_fopen') AND $this->indexViaHTTP)
  520. {
  521. $articleText = @file_get_contents('http://'.$_SERVER['HTTP_HOST'].substr($_SERVER['PHP_SELF'],0,strpos($_SERVER['PHP_SELF'],'/redaxo/')+1).rex_geturl($_id, $langID, '', '&'));
  522. }
  523. elseif ($_id != 0 AND $this->dontIndexRedirects)
  524. {
  525. $rex_article = new rex_article(intval($_id), $langID);
  526. $article_content_file = $this->generatedPath.'/articles/'.$_id.'.'.$langID.'.content';
  527. if(!file_exists($article_content_file))
  528. {
  529. include_once ($this->includePath."/functions/function_rex_generate.inc.php");
  530. $generated = rex_generateArticleContent($_id, $langID);
  531. if($generated !== true)
  532. {
  533. $return[$v] = A587_ART_IDNOTFOUND;
  534. continue;
  535. }
  536. }
  537. if(file_exists($article_content_file) AND preg_match($this->encodeRegex('~(header\s*\(\s*["\']\s*Location\s*:)|(rex_redirect\s*\()~is'), rex_get_file_contents($article_content_file)))
  538. {
  539. $return[$v] = A587_ART_REDIRECT;
  540. continue;
  541. }
  542. if($this->indexWithTemplate)
  543. $articleText = $rex_article->getArticleTemplate();
  544. else
  545. $articleText = $rex_article->getArticle();
  546. if($this->ep_outputfilter)
  547. {
  548. $tmp = array(
  549. 'artid' => $REX['ARTICLE_ID'],
  550. 'clang' => $REX['CUR_CLANG']
  551. );
  552. $REX['ARTICLE_ID'] = $_id;
  553. $REX['CUR_CLANG'] = $langID;
  554. $articleText = rex_register_extension_point('OUTPUT_FILTER', $articleText, array('environment' => 'frontend','sendcharset' => false));
  555. $REX['ARTICLE_ID'] = $tmp['artid'];
  556. $REX['CUR_CLANG'] = $tmp['clang'];
  557. }
  558. }
  559. $insert = new rex_sql();
  560. $articleData = array();
  561. $articleData['texttype'] = 'article';
  562. $articleData['ftable'] = $this->tablePrefix.'article';
  563. $articleData['fcolumn'] = NULL;
  564. $articleData['clang'] = $article->getClang();
  565. $articleData['fid'] = intval($_id);
  566. $articleData['catid'] = $article->getCategoryId();
  567. $articleData['unchangedtext'] = $insert->escape($articleText);
  568. $articleData['plaintext'] = $insert->escape($plaintext = $this->getPlaintext($articleText));
  569. if(array_key_exists($REX['TABLE_PREFIX'].'article', $this->includeColumns))
  570. {
  571. $additionalValues = array();
  572. $select->flush();
  573. $select->setTable($REX['TABLE_PREFIX'].'article');
  574. $select->setWhere('id = '.$_id.' AND clang = '.$langID);
  575. $select->select('`'.implode('`,`', $this->includeColumns[$REX['TABLE_PREFIX'].'article']).'`');
  576. foreach($this->includeColumns[$REX['TABLE_PREFIX'].'article'] as $col)
  577. {
  578. $additionalValues[$col] = $select->getValue($col);
  579. }
  580. $articleData['values'] = $insert->escape(serialize($additionalValues));
  581. }
  582. foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
  583. {
  584. if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
  585. $keywords[] = array('search'=>$keyword,'clang'=>$langID);
  586. }
  587. $articleData['teaser'] = $insert->escape($this->getTeaserText($plaintext));
  588. $insert->setTable($this->tablePrefix.'587_searchindex');
  589. $insert->setValues($articleData);
  590. $insert->insert();
  591. $this->endFrontendMode();
  592. $return[$langID] = A587_ART_GENERATED;
  593. }
  594. }
  595. $this->storeKeywords($keywords, false);
  596. return $return;
  597. }
  598. /**
  599. * Indexes a certain column.
  600. * Returns the number of the indexed rows or false.
  601. *
  602. * @param string $_table
  603. * @param mixed $_column
  604. * @param mixed $_idcol
  605. * @param mixed $_id
  606. * @param mixed $_start
  607. * @param mixed $_count
  608. *
  609. * @return mixed
  610. */
  611. function indexColumn($_table, $_column, $_idcol = false, $_id = false, $_start = false, $_count = false, $_where = false)
  612. {
  613. $delete = new rex_sql();
  614. $where = sprintf(" `ftable` = '%s' AND `fcolumn` = '%s' AND `texttype` = 'db_column'",$delete->escape($_table),$delete->escape($_column));
  615. //if(is_string($_idcol) AND ($_id !== false))
  616. //$where .= sprintf(' AND fid = %d',$_id);
  617. // delete from cache
  618. $select = new rex_sql();
  619. $select->setTable($this->tablePrefix.'587_searchindex');
  620. $select->setWhere($where);
  621. $indexIds = array();
  622. if($select->select('id'))
  623. {
  624. foreach($select->getArray() as $result)
  625. $indexIds[] = $result['id'];
  626. $this->deleteCache($indexIds);
  627. }
  628. // delete old data
  629. if($_start === 0)
  630. {
  631. $delete->setTable($this->tablePrefix.'587_searchindex');
  632. $delete->setWhere($where);
  633. $delete->delete();
  634. }
  635. $sql = new rex_sql();
  636. // get primary key column(s)
  637. $primaryKeys = array();
  638. foreach($sql->getArray("SHOW COLUMNS FROM `".$_table."` WHERE `KEY` = 'PRI'") as $col)
  639. $primaryKeys[] = $col['Field'];
  640. // index column
  641. $sql->flush();
  642. $sql->setTable($_table);
  643. $where = '1 ';
  644. if(is_string($_idcol) AND $_id)
  645. $where .= sprintf(' AND (%s = %d)', $_idcol, $_id);
  646. if(!empty($_where) AND is_string($_where))
  647. $where .= ' AND ('.$_where.')';
  648. if(is_numeric($_start) AND is_numeric($_count))
  649. $where .= ' LIMIT '.$_start.','.$_count;
  650. $sql->setWhere($where);
  651. $count = false;
  652. if($sql->select('*'))
  653. {
  654. $this->beginFrontendMode();
  655. $count = 0;
  656. $keywords = array();
  657. foreach($sql->getArray() as $value)
  658. {
  659. if(!empty($value[$_column]) AND ($this->indexOffline OR $this->tablePrefix.'article' != $_table OR $value['status'] == '1') AND ($this->tablePrefix.'article' != $_table OR !in_array($value['id'],$this->excludeIDs)))
  660. {
  661. $insert = new rex_sql();
  662. $indexData = array();
  663. $indexData['texttype'] = 'db_column';
  664. $indexData['ftable'] = $_table;
  665. $indexData['fcolumn'] = $_column;
  666. if(array_key_exists('clang',$value))
  667. $indexData['clang'] = $value['clang'];
  668. else
  669. $indexData['clang'] = NULL;
  670. $indexData['fid'] = NULL;
  671. if(is_string($_idcol) AND array_key_exists($_idcol,$value))
  672. {
  673. $indexData['fid'] = $value[$_idcol];
  674. }
  675. elseif($_table == $this->tablePrefix.'article')
  676. {
  677. $indexData['fid'] = $value['id'];
  678. }
  679. elseif(count($primaryKeys) == 1)
  680. {
  681. $indexData['fid'] = $value[$primaryKeys[0]];
  682. }
  683. elseif(count($primaryKeys))
  684. {
  685. $fids = array();
  686. foreach($primaryKeys as $pk)
  687. $fids[$pk] = $value[$pk];
  688. $indexData['fid'] = json_encode($fids);
  689. }
  690. if(is_null($indexData['fid']))
  691. $indexData['fid'] = $this->getMinFID();
  692. if(array_key_exists('re_id',$value))
  693. {
  694. $indexData['catid'] = $value['re_id'];
  695. if($_table == $this->tablePrefix.'article')
  696. $indexData['catid'] = intval($value['startpage']) ? $value['id'] : $value['re_id'];
  697. }
  698. elseif(array_key_exists('category_id',$value))
  699. $indexData['catid'] = $value['category_id'];
  700. else
  701. $indexData['catid'] = NULL;
  702. $additionalValues = array();
  703. foreach($this->includeColumns[$_table] as $col)
  704. {
  705. $additionalValues[$col] = $value[$col];
  706. }
  707. $indexData['values'] = $insert->escape(serialize($additionalValues));
  708. $indexData['unchangedtext'] = $insert->escape((string) $value[$_column]);
  709. $indexData['plaintext'] = $insert->escape($plaintext = $this->getPlaintext($value[$_column]));
  710. foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
  711. {
  712. if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
  713. $keywords[] = array('search'=>$keyword,'clang'=>is_null($indexData['clang'])?false:$indexData['clang']);
  714. }
  715. $indexData['teaser'] = '';
  716. if($this->tablePrefix.'article' == $_table)
  717. {
  718. $rex_article = new rex_article(intval($value['id']), intval($value['clang']));
  719. $teaser = true;
  720. $article_content_file = $this->generatedPath.'/articles/'.intval($value['id']).'.'.intval($value['clang']).'.content';
  721. if(!file_exists($article_content_file))
  722. {
  723. include_once ($this->includePath."/functions/function_rex_generate.inc.php");
  724. $generated = rex_generateArticleContent(intval($value['id']), intval($value['clang']));
  725. if($generated !== true)
  726. {
  727. $teaser = false;
  728. continue;
  729. }
  730. }
  731. if(file_exists($article_content_file) AND preg_match($this->encodeRegex('~(header\s*\(\s*["\']\s*Location\s*:)|(rex_redirect\s*\()~is'), rex_get_file_contents($article_content_file)))
  732. {
  733. $teaser = false;
  734. }
  735. $indexData['teaser'] = $teaser ? $insert->escape($this->getTeaserText($this->getPlaintext($rex_article->getArticle()))) : '';
  736. }
  737. $insert->setTable($this->tablePrefix.'587_searchindex');
  738. $insert->setValues($indexData);
  739. $insert->insert();
  740. $count++;
  741. }
  742. }
  743. $this->storeKeywords($keywords, false);
  744. $this->endFrontendMode();
  745. }
  746. else
  747. {
  748. return false;
  749. }
  750. return $count;
  751. }
  752. /**
  753. * Indexes a certain file.
  754. * Returns A587_FILE_GENERATED or an error code.
  755. *
  756. * @param string $_filename
  757. * @param mixed $_clang
  758. * @param mixed $_doPlaintext
  759. * @param mixed $_articleData
  760. *
  761. * @return mixed
  762. */
  763. function indexFile($_filename, $_doPlaintext = false, $_clang = false, $_fid = false, $_catid = false)
  764. {
  765. // extract file-extension
  766. $filenameArray = explode('.', $_filename);
  767. $fileext = $filenameArray[count($filenameArray) - 1];
  768. // check file-extension
  769. if((!in_array($fileext, $this->fileExtensions) AND !empty($this->fileExtensions)) AND !$this->indexUnknownFileExtensions AND !$this->indexMissingFileExtensions)
  770. return A587_FILE_FORBIDDEN_EXTENSION;
  771. // delete cache
  772. $delete = new rex_sql();
  773. $where = sprintf(" `filename` = '%s' AND `texttype` = 'file'", $delete->escape($_filename));
  774. if(is_int($_clang))
  775. $where .= sprintf(' AND clang = %d',$_clang);
  776. if(is_int($_fid))
  777. $where .= sprintf(' AND fid = %d',$_fid);
  778. elseif(is_array($_fid))
  779. $where .= sprintf(" AND fid = '%s'",$delete->escape(json_encode($_fid)));
  780. if(is_int($_catid))
  781. $where .= sprintf(' AND catid = %d',$_catid);
  782. // delete from cache
  783. $select = new rex_sql();
  784. $select->setTable($this->tablePrefix.'587_searchindex');
  785. $select->setWhere($where);
  786. $indexIds = array();
  787. if($select->select('id'))
  788. {
  789. foreach($select->getArray() as $result)
  790. $indexIds[] = $result['id'];
  791. $this->deleteCache($indexIds);
  792. }
  793. // delete old data
  794. $delete->setTable($this->tablePrefix.'587_searchindex');
  795. $delete->setWhere($where);
  796. $delete->delete();
  797. // index file
  798. $text = '';
  799. $plaintext = '';
  800. switch($fileext)
  801. {
  802. // pdf-files
  803. case 'pdf':
  804. // try XPDF
  805. $return = 0;
  806. $xpdf = false;
  807. $error = false;
  808. if(function_exists('exec'))
  809. {
  810. $tempFile = tempnam($this->generatedPath.'/files/', 'rexsearch');
  811. $encoding = 'UTF-8';
  812. exec('pdftotext '.escapeshellarg($this->documentRoot.'/'.$_filename).' '.escapeshellarg($tempFile).' -enc '.$encoding, $dummy, $return);
  813. if($return > 0)
  814. {
  815. if($return == 1)
  816. $error = A587_FILE_XPDFERR_OPENSRC;
  817. if($return == 2)
  818. $error = A587_FILE_XPDFERR_OPENDEST;
  819. if($return == 3)
  820. $error = A587_FILE_XPDFERR_PERM;
  821. if($return == 99)
  822. $error = A587_FILE_XPDFERR_OTHER;
  823. }
  824. else
  825. {
  826. if(false === $text = @file_get_contents($tempFile))
  827. $error = A587_FILE_NOEXIST;
  828. else
  829. $xpdf = true;
  830. }
  831. unlink($tempFile);
  832. }
  833. if(!$xpdf)
  834. {
  835. // if xpdf returned an error, try pdf2txt via php
  836. if(false === $pdfContent = @file_get_contents($this->documentRoot.'/'.$_filename))
  837. $error = A587_FILE_NOEXIST;
  838. else
  839. {
  840. require_once 'class.pdf2txt.inc.php';
  841. $text = pdf2txt::directConvert($pdfContent);
  842. $error = false;
  843. }
  844. }
  845. if($error !== false)
  846. return $error;
  847. elseif(trim($text) == '')
  848. return A587_FILE_EMPTY;
  849. $plaintext = $this->getPlaintext($text);
  850. break;
  851. // html- or php-file
  852. case 'htm':
  853. case 'html':
  854. case 'php':
  855. if(false === $text = @file_get_contents($this->documentRoot.'/'.$_filename))
  856. return A587_FILE_NOEXIST;
  857. $plaintext = $this->getPlaintext($text);
  858. // other filetype
  859. default:
  860. if(false === $text = @file_get_contents($this->documentRoot.'/'.$_filename))
  861. return A587_FILE_NOEXIST;
  862. }
  863. $text = @iconv(mb_detect_encoding($text), 'UTF-8', $text);
  864. // Plaintext
  865. if(empty($plaintext))
  866. {
  867. if($_doPlaintext)
  868. $plaintext = $this->getPlaintext($text);
  869. else
  870. $plaintext = $text;
  871. }
  872. // index file-content
  873. $insert = new rex_sql();
  874. $fileData['texttype'] = 'file';
  875. if($_fid !== false)
  876. $fileData['ftable'] = $this->tablePrefix.'file';
  877. $fileData['filename'] = $insert->escape($_filename);
  878. $fileData['fileext'] = $insert->escape($fileext);;
  879. if($_clang !== false)
  880. $fileData['clang'] = intval($_clang);
  881. if($_fid !== false)
  882. $fileData['fid'] = intval($_fid);
  883. else
  884. $fileData['fid'] = NULL;
  885. if(is_null($fileData['fid']))
  886. $fileData['fid'] = $this->getMinFID();
  887. if($_catid !== false)
  888. $fileData['catid'] = intval($_catid);
  889. $fileData['unchangedtext'] = $insert->escape($text);
  890. $fileData['plaintext'] = $insert->escape($plaintext);
  891. $keywords = array();
  892. foreach(preg_split($this->encodeRegex('~[[:punct:][:space:]]+~ism'), $plaintext) as $keyword)
  893. {
  894. if($this->significantCharacterCount <= mb_strlen($keyword,'UTF-8'))
  895. $keywords[] = array('search'=>$keyword,'clang'=>!isset($fileData['clang'])?false:$fileData['clang']);
  896. }
  897. $this->storeKeywords($keywords, false);
  898. $fileData['teaser'] = $insert->escape($this->getTeaserText($plaintext));
  899. $insert->setTable($this->tablePrefix.'587_searchindex');
  900. $insert->setValues($fileData);
  901. $insert->insert();
  902. return A587_FILE_GENERATED;
  903. }
  904. function getMinFID()
  905. {
  906. $minfid_sql = new rex_sql();
  907. $minfid_result = $minfid_sql->getArray('SELECT MIN(CONVERT(fid, SIGNED)) as minfid FROM `'.$this->tablePrefix.'587_searchindex`');
  908. $minfid = intval($minfid_result[0]['minfid']);
  909. return ($minfid < 0) ? --$minfid : -1;
  910. }
  911. /**
  912. * Excludes an article from the index.
  913. *
  914. * @param int $_id
  915. * @param mixed $_clang
  916. */
  917. function excludeArticle($_id,$_clang = false)
  918. {
  919. // exclude article
  920. $art_sql = new rex_sql();
  921. $art_sql->setTable($this->tablePrefix.'587_searchindex');
  922. $where = "fid = ".intval($_id)." AND texttype='article'";
  923. if($_clang !== false)
  924. $where .= " AND clang='".intval($_clang)."'";
  925. $art_sql->setWhere($where);
  926. $art_sql->delete();
  927. // delete from cache
  928. $select = new rex_sql();
  929. $select->setTable($this->tablePrefix.'587_searchindex');
  930. $select->setWhere($where);
  931. $select->select('id');
  932. $indexIds = array();
  933. foreach($select->getArray() as $result)
  934. $indexIds[] = $result['id'];
  935. $this->deleteCache($indexIds);
  936. }
  937. /**
  938. * Deletes the complete search index.
  939. *
  940. */
  941. function deleteIndex()
  942. {
  943. $delete = new rex_sql();
  944. $delete->setTable($this->tablePrefix.'587_searchindex');
  945. $delete->delete();
  946. $this->deleteCache();
  947. }
  948. /**
  949. * Sets the surround-tags for found keywords.
  950. *
  951. * Expects either the start- and the end-tag
  952. * or an array with both tags.
  953. */
  954. function setSurroundTags($_tags, $_endtag = false)
  955. {
  956. if(is_array($_tags) AND $_endtag === false)
  957. $this->surroundTags = $_tags;
  958. else
  959. $this->surroundTags = array((string) $_tags, (string) $_endtag);
  960. $this->hashMe .= $this->surroundTags[0].$this->surroundTags[1];
  961. }
  962. /**
  963. * Sets the maximum count of results.
  964. *
  965. * Expects either the start- and the count-limit
  966. * or an array with both limits
  967. * or only the count-limit.
  968. *
  969. * example method calls:
  970. * setLimit(10,10); // start with 10th result
  971. * setLimit(20); // maximum of 20 results starting with the first result
  972. * setLimit(array(0,20)); // maximum of 20 results starting with the first result
  973. */
  974. function setLimit($_limit, $_countLimit = false)
  975. {
  976. if(is_array($_limit) AND $_countLimit === false)
  977. $this->limit = array((int) $_limit[0], (int) $_limit[1]);
  978. elseif($_countLimit === false)
  979. $this->limit = array(0, (int) $_limit);
  980. else
  981. $this->limit = array((int) $_limit, (int) $_countLimit);
  982. $this->hashMe .= $this->limit[0].$this->limit[1];
  983. }
  984. /**
  985. * Sets words, which must not be found.
  986. *
  987. * Expects an array with the words as parameters.
  988. */
  989. function setBlacklist($_words)
  990. {
  991. foreach($_words as $key => $word)
  992. {
  993. $this->blacklist[] = $tmpkey = (string) ($this->ci?strtolower($word):$word);
  994. $this->hashMe .= $tmpkey;
  995. }
  996. }
  997. /**
  998. * Exclude Articles with the transfered IDs.
  999. *
  1000. * Expects an array with the IDs as parameters.
  1001. */
  1002. function setExcludeIDs($_ids)
  1003. {
  1004. foreach($_ids as $key => $id)
  1005. {
  1006. $this->excludeIDs[] = intval($id);
  1007. }
  1008. $this->excludeIDs = array_unique($this->excludeIDs);
  1009. }
  1010. /**
  1011. * Sets the IDs of the articles which are only to be searched through.
  1012. *
  1013. * Expects an array with the IDs as parameters.
  1014. */
  1015. function searchInArticles($_ids)
  1016. {
  1017. $this->setSearchInIDs(array('articles' => $_ids));
  1018. }
  1019. /**
  1020. * Sets the IDs of the categories which are only to be searched through.
  1021. *
  1022. * Expects an array with the IDs as parameters.
  1023. */
  1024. function searchInCategories($_ids)
  1025. {
  1026. $this->setSearchInIDs(array('categories' => $_ids));
  1027. }
  1028. /**
  1029. * Sets the IDs of the mediapool-categories which are only to be searched through.
  1030. *
  1031. * Expects an array with the IDs as parameters.
  1032. */
  1033. function searchInFileCategories($_ids)
  1034. {
  1035. $this->setSearchInIDs(array('filecategories' => $_ids));
  1036. }
  1037. /**
  1038. * Sets the columns which only should be searched through.
  1039. *
  1040. * @param string $_table
  1041. * @param string $_column
  1042. */
  1043. function searchInDbColumn($_table, $_column)
  1044. {
  1045. $this->setSearchinIDs(array('db_columns' => array($_table => array($_column))));
  1046. }
  1047. /**
  1048. * Sets the columns which should be indexed.
  1049. *
  1050. * @param array $_columns
  1051. */
  1052. function setIncludeColumns($_columns)
  1053. {
  1054. $this->includeColumns = $_columns;
  1055. }
  1056. function setWhere($_where)
  1057. {
  1058. $this->where = $_where;
  1059. $this->hashMe .= $_where;
  1060. }
  1061. /**
  1062. * Sets the mode of how the keywords are logical connected.
  1063. *
  1064. * Are the keywords to be connected conjunctional or disjunctional?
  1065. * Has each single keyword to be found or is one single keyword sufficient?
  1066. *
  1067. * @param string $_logicalMode
  1068. *
  1069. * @return bool
  1070. */
  1071. function setLogicalMode($_logicalMode)
  1072. {
  1073. switch(strtolower($_logicalMode))
  1074. {
  1075. case 'and':
  1076. case 'konj':
  1077. case 'strict':
  1078. case 'sharp':
  1079. $this->logicalMode = ' AND ';
  1080. break;
  1081. case 'or':
  1082. case 'disj':
  1083. case 'simple':
  1084. case 'fuzzy':
  1085. $this->logicalMode = ' OR ';
  1086. break;
  1087. default:
  1088. $this->logicalMode = ' AND ';
  1089. return false;
  1090. }
  1091. $this->hashMe .= $this->logicalMode;
  1092. return true;
  1093. }
  1094. /**
  1095. * Sets the mode concerning which text is to be searched through.
  1096. *
  1097. * You can choose between the original text, the plain text or both texts.
  1098. *
  1099. * @param string $_textMode
  1100. *
  1101. * @return bool
  1102. */
  1103. function setTextMode($_textMode)
  1104. {
  1105. switch(strtolower($_textMode))
  1106. {
  1107. case 'html':
  1108. case 'xhtml':
  1109. case 'unmodified':
  1110. case 'original':
  1111. $this->textMode = 'unmodified';
  1112. break;
  1113. case 'text':
  1114. case 'plain':
  1115. case 'stripped':
  1116. case 'bare':
  1117. case 'simple':
  1118. $this->textMode = 'plain';
  1119. break;
  1120. case 'both':
  1121. case 'all':
  1122. $this->textMode = 'both';
  1123. break;
  1124. default:
  1125. return false;
  1126. }
  1127. $this->hashMe .= $this->textMode;
  1128. return true;
  1129. }
  1130. /**
  1131. * Sets the MySQL search mode.
  1132. *
  1133. * You can choose between like and match
  1134. *
  1135. * @param string $_searchMode
  1136. *
  1137. * @return bool
  1138. */
  1139. function setSearchMode($_searchMode)
  1140. {
  1141. switch(strtolower($_searchMode))
  1142. {
  1143. case 'like':
  1144. case 'match':
  1145. $this->searchMode = strtolower($_searchMode);
  1146. break;
  1147. default:
  1148. return false;
  1149. }
  1150. $this->hashMe .= $this->searchMode;
  1151. return true;
  1152. }
  1153. /**
  1154. * Sets the sort order of the results.
  1155. *
  1156. * The parameter has to be an array with the columns as keys
  1157. * and the direction (DESC or ASC) as value (e.g.: array['COLUMN'] = 'ASC').
  1158. *
  1159. * @param array $_order
  1160. *
  1161. * @return bool
  1162. */
  1163. function setOrder($_order)
  1164. {
  1165. if(!is_array($_order))
  1166. {
  1167. error('Wrong parameter. Expecting an array',E_USER_WARNING);
  1168. return false;
  1169. }
  1170. $i = 0;
  1171. $dir2upper = '';
  1172. $col2upper = '';
  1173. foreach($_order as $col => $dir)
  1174. {
  1175. $i++;
  1176. if('RELEVANCE_587' == ($col2upper = strtoupper((string)$col)))
  1177. {
  1178. error(sprintf('Column %d must not be named "RELEVANCE_587". Column %d is ignored for the sort order',$i,$i));
  1179. }
  1180. else
  1181. {
  1182. if(!in_array($dir2upper = strtoupper((string)$dir), array('ASC','DESC')))
  1183. {
  1184. error(sprintf('Column %d has no correct sort order (ASC or DESC). Descending (DESC) sort order is assumed',$i));
  1185. $dir2upper = 'DESC';
  1186. }
  1187. $this->order[$col2upper] = $dir2upper;
  1188. $this->hashMe .= $col2upper.$dir2upper;
  1189. }
  1190. }
  1191. return true;
  1192. }
  1193. /**
  1194. * Sets the type of the text with the highlighted keywords.
  1195. *
  1196. * @param string $_type
  1197. *
  1198. * @return bool
  1199. */
  1200. function setHighlightType($_type)
  1201. {
  1202. switch($_type)
  1203. {
  1204. case 'sentence':
  1205. case 'paragraph':
  1206. case 'surroundtext':
  1207. case 'surroundtextsingle':
  1208. case 'teaser':
  1209. case 'array':
  1210. $this->highlightType = $_type;
  1211. return true;
  1212. break;
  1213. default:
  1214. $this->highlightType = 'surroundtextsingle';
  1215. return false;
  1216. }
  1217. $this->hashMe .= $this->highlightType;
  1218. }
  1219. /**
  1220. * Converts the search string to an array.
  1221. *
  1222. * Returns the number of search terms.
  1223. *
  1224. * @param string $_searchString
  1225. *
  1226. * @return int
  1227. */
  1228. function parseSearchString($_searchString)
  1229. {
  1230. // reset searchArray
  1231. $this->searchArray = array();
  1232. $matches = array();
  1233. preg_match_all($this->encodeRegex('~(?:(\+*)"([^"]*)")|(?:(\+*)(\S+))~is'), $_searchString, $matches, PREG_SET_ORDER);
  1234. $count = 0;
  1235. $replaceValues = array();
  1236. $sql = new rex_sql();
  1237. foreach($matches as $match)
  1238. {
  1239. if(count($match) == 5)
  1240. {
  1241. // words without double quotes (foo)
  1242. $word = $match[4];
  1243. $plus = $match[3];
  1244. }
  1245. elseif(!empty($match[2]))
  1246. {
  1247. // words with double quotes ("foo bar")
  1248. $word = $match[2];
  1249. $plus = $match[1];
  1250. }
  1251. else
  1252. {
  1253. continue;
  1254. }
  1255. $notBlacklisted = true;
  1256. // blacklisted words are excluded
  1257. foreach($this->blacklist as $blacklistedWord)
  1258. {
  1259. if(preg_match($this->encodeRegex('~\b'.preg_quote($blacklistedWord,'~').'\b~is'), $word))
  1260. {
  1261. $this->blacklisted[] = array($blacklistedWord => $word);
  1262. $notBlacklisted = false;
  1263. }
  1264. }
  1265. if($notBlacklisted)
  1266. {
  1267. // whitelisted words get extra weighted
  1268. $this->searchArray[$count] = array( 'search' => $word,
  1269. 'weight' => strlen($plus) + 1 + (array_key_exists($word,$this->whitelist)?$this->whitelist[$word]:0),
  1270. 'clang' => $this->clang
  1271. );
  1272. $count++;
  1273. }
  1274. }
  1275. return $count;
  1276. }
  1277. /**
  1278. * Which words are important?
  1279. *
  1280. * This method adds weight to special words.
  1281. * If an word already exists, the method adds the weight.
  1282. * Expects an array with the keys containing the words
  1283. * and the values containing the weight to add.
  1284. *
  1285. * @param array $_whitelist
  1286. *
  1287. *
  1288. */
  1289. function addWhitelist($_whitelist)
  1290. {
  1291. foreach($_whitelist as $word => $weight)
  1292. {
  1293. $key = (string)($this->ci?strtolower($word):$word);
  1294. $this->hashMe .= $key;
  1295. $this->whitelist[$key] = intval($this->whitelist[$key]) + intval($weight);
  1296. }
  1297. }
  1298. /**
  1299. * Case sensitive or case insensitive?
  1300. *
  1301. * @param bool $_ci
  1302. *
  1303. * @ignore
  1304. */
  1305. function setCaseInsensitive($_ci = true)
  1306. {
  1307. setCI($_ci);
  1308. }
  1309. /**
  1310. * Case sensitive or case insensitive?
  1311. *
  1312. * @param bool $_ci
  1313. *
  1314. * @ignore
  1315. */
  1316. function setCI($_ci = true)
  1317. {
  1318. $this->ci = (bool) $_ci;
  1319. }
  1320. /**
  1321. * Sets the language-Id.
  1322. *
  1323. * @param mixed $_clang
  1324. *
  1325. *
  1326. */
  1327. function setClang($_clang)
  1328. {
  1329. if($_clang === false)
  1330. $this->clang = false;
  1331. else
  1332. $this->clang = intval($_clang);
  1333. $this->hashMe .= $_clang;
  1334. }
  1335. /**
  1336. * Strips the HTML-Tags from a text and replaces them with spaces or line breaks
  1337. *
  1338. * @param string $_text
  1339. *
  1340. * @return string
  1341. */
  1342. function getPlaintext($_text)
  1343. {
  1344. $process = true;
  1345. $extensionReturn = rex_register_extension_point('A587_PLAINTEXT', $_text);
  1346. if(is_array($extensionReturn))
  1347. {
  1348. $_text = $extensionReturn['text'];
  1349. $process = !empty($extensionReturn['process']);
  1350. }
  1351. elseif(is_string($extensionReturn))
  1352. $_text = $extensionReturn;
  1353. if($process)
  1354. {
  1355. $tags2nl = $this->encodeRegex('~</?(address|blockquote|center|del|dir|div|dl|fieldset|form|h1|h2|h3|h4|h5|h6|hr|ins|isindex|menu|noframes|noscript|ol|p|pre|table|ul)[^>]+>~si');
  1356. $_text = trim(strip_tags(preg_replace(array($this->encodeRegex('~<(head|script).+?</(head|script)>~si'), $tags2nl, $this->encodeRegex('~<[^>]+>~si'), $this->encodeRegex('~[\n\r]+~si'), $this->encodeRegex('~[\t ]+~si')), array('',"\n",' ',"\n",' '), $_text)));
  1357. }
  1358. return $_text;
  1359. }
  1360. /**
  1361. * According to the highlight-type this method will return a string or an array.
  1362. * Found keywords will be highlighted with the surround-tags.
  1363. *
  1364. * @param string $_text
  1365. *
  1366. * @return mixed
  1367. */
  1368. function getHighlightedText($_text)
  1369. {
  1370. $tmp_searchArray = $this->searchArray;
  1371. if($this->searchEntities)
  1372. {
  1373. foreach($this->searchArray as $keyword)
  1374. {
  1375. $this->searchArray[] = array('search' => htmlentities($keyword['search'], ENT_COMPAT, 'UTF-8'));
  1376. }
  1377. }
  1378. switch($this->highlightType)
  1379. {
  1380. case 'sentence':
  1381. case 'paragraph':
  1382. // split text at punctuation marks
  1383. if($this->highlightType == 'sentence')
  1384. $regex = '~(\!|\.|\?|[\n]+)~si';
  1385. // split text at line breaks
  1386. if($this->highlightType == 'paragraph')
  1387. $regex = '~([\r\n])~si';
  1388. $Apieces = preg_split($this->encodeRegex($regex), $_text, -1, PREG_SPLIT_DELIM_CAPTURE);
  1389. $search = array();
  1390. $replace = array();
  1391. foreach($this->searchArray as $keyword)
  1392. {
  1393. $search[] = preg_quote($keyword['search'],'~');
  1394. $replace[] = $this->encodeRegex('~'.preg_quote($keyword['search'],'~').'~is');
  1395. }
  1396. $i = 0;
  1397. for($i = 0; $i < count($Apieces); $i++)
  1398. if(preg_match($this->encodeRegex('~('.implode('|',$search).')~is'), $Apieces[$i]))
  1399. break;
  1400. $return = '';
  1401. if($i < count($Apieces))
  1402. $return .= $Apieces[$i];
  1403. $cutted = array();
  1404. preg_match($this->encodeRegex('~^.*?('.implode('|',$search).').{0,'.$this->maxHighlightedTextChars.'}~ims'), $return, $cutted);
  1405. $needEllipses = false;
  1406. if(strlen($cutted[1]) != strlen($return))
  1407. $needEllipses = true;
  1408. $return = preg_replace($replace, $this->surroundTags[0].'$0'.$this->surroundTags[1], substr($cutted[0],0,strrpos($cutted[0],' ')));
  1409. if($needEllipses)
  1410. $return .= ' '.$this->ellipsis;
  1411. return $return;
  1412. break;
  1413. case 'surroundtext':
  1414. case 'surroundtextsingle':
  1415. case 'array':
  1416. $startEllipsis = false;
  1417. $endEllipsis = false;
  1418. $Ahighlighted = array();
  1419. $_text = preg_replace('~\s+~', ' ', $_text);
  1420. $replace = array();
  1421. foreach($this->searchArray as $keyword)
  1422. $replace[] = $this->encodeRegex('~'.preg_quote($keyword['search'],'~').'~is');
  1423. $strlen = mb_strlen($_text);
  1424. $positions = array();
  1425. for($i = 0; $i < count($this->searchArray); $i++)
  1426. {
  1427. $hits = array();
  1428. $offset = 0;
  1429. preg_match_all($this->encodeRegex('~((.{0,'.$this->maxHighlightedTextChars.'})'.preg_quote($this->searchArray[$i]['search'],'~').'(.{0,'.$this->maxHighlightedTextChars.'}))~ims'), $_text, $hits, PREG_SET_ORDER);
  1430. foreach($hits as $hit)
  1431. {
  1432. $offset = strpos($_text, $hit[0], $offset) + 1;
  1433. $currentposition = ceil(intval(($offset - 1) / (2 * $this->maxHighlightedTextChars)));
  1434. if($this->highlightType == 'array' AND !array_key_exists($this->searchArray[$i]['search'], $Ahighlighted))
  1435. $Ahighlighted[$this->searchArray[$i]['search']] = array();
  1436. if(trim($hit[1]) != '')
  1437. {
  1438. $surroundText = $hit[1];
  1439. if(strlen($hit[2]) > 0 AND false !== strpos($hit[2], ' '))
  1440. $surroundText = substr($surroundText, strpos($surroundText, ' '));
  1441. if(strlen($hit[3]) > 0 AND false !== strpos($hit[3], ' '))
  1442. $surroundText = substr($surroundText, 0, strrpos($surroundText,' '));
  1443. if($i == 0 AND strlen($hit[2]) > 0)
  1444. $startEllipsis = true;
  1445. if($i == (count($this->searchArray) - 1) AND strlen($hit[3]) > 0)
  1446. $endEllipsis = true;
  1447. if($this->highlightType == 'array')
  1448. $Ahighlighted[$this->searchArray[$i]['search']][] = preg_replace($replace, $this->surroundTags[0].'$0'.$this->surroundTags[1], trim($surroundText));
  1449. else if(!in_array($currentposition, $positions))
  1450. $Ahighlighted[] = trim($surroundText);
  1451. $positions[] = $currentposition;
  1452. if($this->highlightType == 'surroundtextsingle')
  1453. break;
  1454. }
  1455. }
  1456. }
  1457. if($this->highlightType == 'array')
  1458. return $Ahighlighted;
  1459. $return = implode(' '.$this->ellipsis.' ', $Ahighlighted);
  1460. if($startEllipsis)
  1461. $return = $this->ellipsis.' '.$return;
  1462. if($endEllipsis)
  1463. $return = $return.' '.$this->ellipsis;
  1464. $return = preg_replace($replace, $this->surroundTags[0].'$0'.$this->surroundTags[1], $return);
  1465. return $return;
  1466. break;
  1467. case 'teaser':
  1468. $search = array();
  1469. foreach($this->searchArray as $keyword)
  1470. $search[] = $this->encodeRegex('~'.preg_quote($keyword['search'],'~').'~is');
  1471. return preg_replace($search, $this->surroundTags[0].'$0'.$this->surroundTags[1], $this->getTeaserText($_text));
  1472. break;
  1473. }
  1474. $this->searchArray = $tmp_searchArray;
  1475. }
  1476. /**
  1477. * Gets the teaser of a text.
  1478. *
  1479. * @param string $_text
  1480. *
  1481. * @return string
  1482. */
  1483. function getTeaserText($_text)
  1484. {
  1485. $i = 0;
  1486. $textArray = preg_split($this->encodeRegex('~\s+~si'), $_text, $this->maxTeaserChars);
  1487. $return = '';
  1488. $aborted = false;
  1489. foreach($textArray as $word)
  1490. {
  1491. if((($strlen = strlen($word)) + $i) > $this->maxTeaserChars)
  1492. { $aborted = true;
  1493. break;
  1494. }
  1495. $return .= $word.' ';
  1496. $i += $strlen + 1;
  1497. }
  1498. if($aborted)
  1499. $return .= $this->ellipsis;
  1500. return $return;
  1501. }
  1502. /**
  1503. * Returns if a search term is already cached.
  1504. * The cached result will be stored in $this->cachedArray.
  1505. *
  1506. * @param string $_search
  1507. *
  1508. * @return bool
  1509. */
  1510. function isCached($_search)
  1511. {
  1512. $sql = new rex_sql();
  1513. $sql->setTable($this->tablePrefix.'587_searchcache');
  1514. $sql-

Large files files are truncated, but you can click here to view the full file