PageRenderTime 52ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/protected/modules/zendsearch/vendors/Zend/Search/Lucene/Search/Query/Wildcard.php

https://gitlab.com/RonLab1987/YupePlusClear
PHP | 366 lines | 175 code | 46 blank | 145 comment | 36 complexity | 6d6b0edc15ae5c605d09150caa0457fc MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: Wildcard.php 24593 2012-01-05 20:35:02Z matthew $
  21. */
  22. /** Zend_Search_Lucene_Search_Query */
  23. require_once 'Zend/Search/Lucene/Search/Query.php';
  24. /**
  25. * @category Zend
  26. * @package Zend_Search_Lucene
  27. * @subpackage Search
  28. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  29. * @license http://framework.zend.com/license/new-bsd New BSD License
  30. */
  31. class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search_Query
  32. {
  33. /**
  34. * Search pattern.
  35. *
  36. * Field has to be fully specified or has to be null
  37. * Text may contain '*' or '?' symbols
  38. *
  39. * @var Zend_Search_Lucene_Index_Term
  40. */
  41. private $_pattern;
  42. /**
  43. * Matched terms.
  44. *
  45. * Matched terms list.
  46. * It's filled during the search (rewrite operation) and may be used for search result
  47. * post-processing
  48. *
  49. * Array of Zend_Search_Lucene_Index_Term objects
  50. *
  51. * @var array
  52. */
  53. private $_matches = null;
  54. /**
  55. * Minimum term prefix length (number of minimum non-wildcard characters)
  56. *
  57. * @var integer
  58. */
  59. private static $_minPrefixLength = 3;
  60. /**
  61. * Zend_Search_Lucene_Search_Query_Wildcard constructor.
  62. *
  63. * @param Zend_Search_Lucene_Index_Term $pattern
  64. */
  65. public function __construct(Zend_Search_Lucene_Index_Term $pattern)
  66. {
  67. $this->_pattern = $pattern;
  68. }
  69. /**
  70. * Get minimum prefix length
  71. *
  72. * @return integer
  73. */
  74. public static function getMinPrefixLength()
  75. {
  76. return self::$_minPrefixLength;
  77. }
  78. /**
  79. * Set minimum prefix length
  80. *
  81. * @param integer $minPrefixLength
  82. */
  83. public static function setMinPrefixLength($minPrefixLength)
  84. {
  85. self::$_minPrefixLength = $minPrefixLength;
  86. }
  87. /**
  88. * Get terms prefix
  89. *
  90. * @param string $word
  91. * @return string
  92. */
  93. private static function _getPrefix($word)
  94. {
  95. $questionMarkPosition = strpos($word, '?');
  96. $astrericPosition = strpos($word, '*');
  97. if ($questionMarkPosition !== false) {
  98. if ($astrericPosition !== false) {
  99. return substr($word, 0, min($questionMarkPosition, $astrericPosition));
  100. }
  101. return substr($word, 0, $questionMarkPosition);
  102. } elseif ($astrericPosition !== false) {
  103. return substr($word, 0, $astrericPosition);
  104. }
  105. return $word;
  106. }
  107. /**
  108. * Re-write query into primitive queries in the context of specified index
  109. *
  110. * @param Zend_Search_Lucene_Interface $index
  111. * @return Zend_Search_Lucene_Search_Query
  112. * @throws Zend_Search_Lucene_Exception
  113. */
  114. public function rewrite(Zend_Search_Lucene_Interface $index)
  115. {
  116. $this->_matches = [];
  117. if ($this->_pattern->field === null) {
  118. // Search through all fields
  119. $fields = $index->getFieldNames(true /* indexed fields list */);
  120. } else {
  121. $fields = [$this->_pattern->field];
  122. }
  123. $prefix = self::_getPrefix($this->_pattern->text);
  124. $prefixLength = strlen($prefix);
  125. $matchExpression = '/^' . str_replace(
  126. ['\\?', '\\*'],
  127. ['.', '.*'],
  128. preg_quote($this->_pattern->text, '/')
  129. ) . '$/';
  130. if ($prefixLength < self::$_minPrefixLength) {
  131. require_once 'Zend/Search/Lucene/Exception.php';
  132. throw new Zend_Search_Lucene_Exception('At least ' . self::$_minPrefixLength . ' non-wildcard characters are required at the beginning of pattern.');
  133. }
  134. /** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
  135. if (@preg_match('/\pL/u', 'a') == 1) {
  136. // PCRE unicode support is turned on
  137. // add Unicode modifier to the match expression
  138. $matchExpression .= 'u';
  139. }
  140. $maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
  141. foreach ($fields as $field) {
  142. $index->resetTermsStream();
  143. require_once 'Zend/Search/Lucene/Index/Term.php';
  144. if ($prefix != '') {
  145. $index->skipTo(new Zend_Search_Lucene_Index_Term($prefix, $field));
  146. while ($index->currentTerm() !== null &&
  147. $index->currentTerm()->field == $field &&
  148. substr($index->currentTerm()->text, 0, $prefixLength) == $prefix) {
  149. if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
  150. $this->_matches[] = $index->currentTerm();
  151. if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
  152. require_once 'Zend/Search/Lucene/Exception.php';
  153. throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
  154. }
  155. }
  156. $index->nextTerm();
  157. }
  158. } else {
  159. $index->skipTo(new Zend_Search_Lucene_Index_Term('', $field));
  160. while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
  161. if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
  162. $this->_matches[] = $index->currentTerm();
  163. if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
  164. require_once 'Zend/Search/Lucene/Exception.php';
  165. throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
  166. }
  167. }
  168. $index->nextTerm();
  169. }
  170. }
  171. $index->closeTermsStream();
  172. }
  173. if (count($this->_matches) == 0) {
  174. require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
  175. return new Zend_Search_Lucene_Search_Query_Empty();
  176. } elseif (count($this->_matches) == 1) {
  177. require_once 'Zend/Search/Lucene/Search/Query/Term.php';
  178. return new Zend_Search_Lucene_Search_Query_Term(reset($this->_matches));
  179. } else {
  180. require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
  181. $rewrittenQuery = new Zend_Search_Lucene_Search_Query_MultiTerm();
  182. foreach ($this->_matches as $matchedTerm) {
  183. $rewrittenQuery->addTerm($matchedTerm);
  184. }
  185. return $rewrittenQuery;
  186. }
  187. }
  188. /**
  189. * Optimize query in the context of specified index
  190. *
  191. * @param Zend_Search_Lucene_Interface $index
  192. * @return Zend_Search_Lucene_Search_Query
  193. */
  194. public function optimize(Zend_Search_Lucene_Interface $index)
  195. {
  196. require_once 'Zend/Search/Lucene/Exception.php';
  197. throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
  198. }
  199. /**
  200. * Returns query pattern
  201. *
  202. * @return Zend_Search_Lucene_Index_Term
  203. */
  204. public function getPattern()
  205. {
  206. return $this->_pattern;
  207. }
  208. /**
  209. * Return query terms
  210. *
  211. * @return array
  212. * @throws Zend_Search_Lucene_Exception
  213. */
  214. public function getQueryTerms()
  215. {
  216. if ($this->_matches === null) {
  217. require_once 'Zend/Search/Lucene/Exception.php';
  218. throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
  219. }
  220. return $this->_matches;
  221. }
  222. /**
  223. * Constructs an appropriate Weight implementation for this query.
  224. *
  225. * @param Zend_Search_Lucene_Interface $reader
  226. * @return Zend_Search_Lucene_Search_Weight
  227. * @throws Zend_Search_Lucene_Exception
  228. */
  229. public function createWeight(Zend_Search_Lucene_Interface $reader)
  230. {
  231. require_once 'Zend/Search/Lucene/Exception.php';
  232. throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
  233. }
  234. /**
  235. * Execute query in context of index reader
  236. * It also initializes necessary internal structures
  237. *
  238. * @param Zend_Search_Lucene_Interface $reader
  239. * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
  240. * @throws Zend_Search_Lucene_Exception
  241. */
  242. public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
  243. {
  244. require_once 'Zend/Search/Lucene/Exception.php';
  245. throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
  246. }
  247. /**
  248. * Get document ids likely matching the query
  249. *
  250. * It's an array with document ids as keys (performance considerations)
  251. *
  252. * @return array
  253. * @throws Zend_Search_Lucene_Exception
  254. */
  255. public function matchedDocs()
  256. {
  257. require_once 'Zend/Search/Lucene/Exception.php';
  258. throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
  259. }
  260. /**
  261. * Score specified document
  262. *
  263. * @param integer $docId
  264. * @param Zend_Search_Lucene_Interface $reader
  265. * @return float
  266. * @throws Zend_Search_Lucene_Exception
  267. */
  268. public function score($docId, Zend_Search_Lucene_Interface $reader)
  269. {
  270. require_once 'Zend/Search/Lucene/Exception.php';
  271. throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
  272. }
  273. /**
  274. * Query specific matches highlighting
  275. *
  276. * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
  277. */
  278. protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
  279. {
  280. $words = [];
  281. $matchExpression = '/^' . str_replace(
  282. ['\\?', '\\*'],
  283. ['.', '.*'],
  284. preg_quote($this->_pattern->text, '/')
  285. ) . '$/';
  286. if (@preg_match('/\pL/u', 'a') == 1) {
  287. // PCRE unicode support is turned on
  288. // add Unicode modifier to the match expression
  289. $matchExpression .= 'u';
  290. }
  291. $docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
  292. require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
  293. $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
  294. foreach ($tokens as $token) {
  295. if (preg_match($matchExpression, $token->getTermText()) === 1) {
  296. $words[] = $token->getTermText();
  297. }
  298. }
  299. $highlighter->highlight($words);
  300. }
  301. /**
  302. * Print a query
  303. *
  304. * @return string
  305. */
  306. public function __toString()
  307. {
  308. // It's used only for query visualisation, so we don't care about characters escaping
  309. if ($this->_pattern->field !== null) {
  310. $query = $this->_pattern->field . ':';
  311. } else {
  312. $query = '';
  313. }
  314. $query .= $this->_pattern->text;
  315. if ($this->getBoost() != 1) {
  316. $query = $query . '^' . round($this->getBoost(), 4);
  317. }
  318. return $query;
  319. }
  320. }