PageRenderTime 40ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/system/Zend/Search/Lucene/Search/Query/Preprocessing/Phrase.php

https://gitlab.com/Ltaimao/wecenter
PHP | 270 lines | 104 code | 38 blank | 128 comment | 18 complexity | 87f3d4bfb3031dd7f7449559e69572f1 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /** Zend_Search_Lucene_Search_Query_Processing */
  23. //require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
  24. /**
  25. * It's an internal abstract class intended to finalize ase a query processing after query parsing.
  26. * This type of query is not actually involved into query execution.
  27. *
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage Search
  31. * @internal
  32. * @copyright Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
  33. * @license http://framework.zend.com/license/new-bsd New BSD License
  34. */
  35. class Zend_Search_Lucene_Search_Query_Preprocessing_Phrase extends Zend_Search_Lucene_Search_Query_Preprocessing
  36. {
  37. /**
  38. * Phrase to find.
  39. *
  40. * @var string
  41. */
  42. private $_phrase;
  43. /**
  44. * Phrase encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
  45. *
  46. * @var string
  47. */
  48. private $_phraseEncoding;
  49. /**
  50. * Field name.
  51. *
  52. * @var string
  53. */
  54. private $_field;
  55. /**
  56. * Sets the number of other words permitted between words in query phrase.
  57. * If zero, then this is an exact phrase search. For larger values this works
  58. * like a WITHIN or NEAR operator.
  59. *
  60. * The slop is in fact an edit-distance, where the units correspond to
  61. * moves of terms in the query phrase out of position. For example, to switch
  62. * the order of two words requires two moves (the first move places the words
  63. * atop one another), so to permit re-orderings of phrases, the slop must be
  64. * at least two.
  65. * More exact matches are scored higher than sloppier matches, thus search
  66. * results are sorted by exactness.
  67. *
  68. * The slop is zero by default, requiring exact matches.
  69. *
  70. * @var integer
  71. */
  72. private $_slop;
  73. /**
  74. * Class constructor. Create a new preprocessing object for prase query.
  75. *
  76. * @param string $phrase Phrase to search.
  77. * @param string $phraseEncoding Phrase encoding.
  78. * @param string $fieldName Field name.
  79. */
  80. public function __construct($phrase, $phraseEncoding, $fieldName)
  81. {
  82. $this->_phrase = $phrase;
  83. $this->_phraseEncoding = $phraseEncoding;
  84. $this->_field = $fieldName;
  85. }
  86. /**
  87. * Set slop
  88. *
  89. * @param integer $slop
  90. */
  91. public function setSlop($slop)
  92. {
  93. $this->_slop = $slop;
  94. }
  95. /**
  96. * Get slop
  97. *
  98. * @return integer
  99. */
  100. public function getSlop()
  101. {
  102. return $this->_slop;
  103. }
  104. /**
  105. * Re-write query into primitive queries in the context of specified index
  106. *
  107. * @param Zend_Search_Lucene_Interface $index
  108. * @return Zend_Search_Lucene_Search_Query
  109. */
  110. public function rewrite(Zend_Search_Lucene_Interface $index)
  111. {
  112. // Allow to use wildcards within phrases
  113. // They are either removed by text analyzer or used as a part of keyword for keyword fields
  114. //
  115. // if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
  116. // //require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  117. // throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
  118. // }
  119. // Split query into subqueries if field name is not specified
  120. if ($this->_field === null) {
  121. //require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
  122. $query = new Zend_Search_Lucene_Search_Query_Boolean();
  123. $query->setBoost($this->getBoost());
  124. //require_once 'Zend/Search/Lucene.php';
  125. if (Zend_Search_Lucene::getDefaultSearchField() === null) {
  126. $searchFields = $index->getFieldNames(true);
  127. } else {
  128. $searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
  129. }
  130. foreach ($searchFields as $fieldName) {
  131. $subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase,
  132. $this->_phraseEncoding,
  133. $fieldName);
  134. $subquery->setSlop($this->getSlop());
  135. $query->addSubquery($subquery->rewrite($index));
  136. }
  137. $this->_matches = $query->getQueryTerms();
  138. return $query;
  139. }
  140. // Recognize exact term matching (it corresponds to Keyword fields stored in the index)
  141. // encoding is not used since we expect binary matching
  142. //require_once 'Zend/Search/Lucene/Index/Term.php';
  143. $term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field);
  144. if ($index->hasTerm($term)) {
  145. //require_once 'Zend/Search/Lucene/Search/Query/Term.php';
  146. $query = new Zend_Search_Lucene_Search_Query_Term($term);
  147. $query->setBoost($this->getBoost());
  148. $this->_matches = $query->getQueryTerms();
  149. return $query;
  150. }
  151. // tokenize phrase using current analyzer and process it as a phrase query
  152. //require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
  153. $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
  154. if (count($tokens) == 0) {
  155. $this->_matches = array();
  156. //require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
  157. return new Zend_Search_Lucene_Search_Query_Insignificant();
  158. }
  159. if (count($tokens) == 1) {
  160. //require_once 'Zend/Search/Lucene/Index/Term.php';
  161. $term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
  162. //require_once 'Zend/Search/Lucene/Search/Query/Term.php';
  163. $query = new Zend_Search_Lucene_Search_Query_Term($term);
  164. $query->setBoost($this->getBoost());
  165. $this->_matches = $query->getQueryTerms();
  166. return $query;
  167. }
  168. //It's non-trivial phrase query
  169. $position = -1;
  170. //require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
  171. $query = new Zend_Search_Lucene_Search_Query_Phrase();
  172. //require_once 'Zend/Search/Lucene/Index/Term.php';
  173. foreach ($tokens as $token) {
  174. $position += $token->getPositionIncrement();
  175. $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
  176. $query->addTerm($term, $position);
  177. $query->setSlop($this->getSlop());
  178. }
  179. $this->_matches = $query->getQueryTerms();
  180. return $query;
  181. }
  182. /**
  183. * Query specific matches highlighting
  184. *
  185. * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
  186. */
  187. protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
  188. {
  189. /** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
  190. /** Skip exact term matching recognition, keyword fields highlighting is not supported */
  191. /** Skip wildcard queries recognition. Supported wildcards are removed by text analyzer */
  192. // tokenize phrase using current analyzer and process it as a phrase query
  193. //require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
  194. $tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
  195. if (count($tokens) == 0) {
  196. // Do nothing
  197. return;
  198. }
  199. if (count($tokens) == 1) {
  200. $highlighter->highlight($tokens[0]->getTermText());
  201. return;
  202. }
  203. //It's non-trivial phrase query
  204. $words = array();
  205. foreach ($tokens as $token) {
  206. $words[] = $token->getTermText();
  207. }
  208. $highlighter->highlight($words);
  209. }
  210. /**
  211. * Print a query
  212. *
  213. * @return string
  214. */
  215. public function __toString()
  216. {
  217. // It's used only for query visualisation, so we don't care about characters escaping
  218. if ($this->_field !== null) {
  219. $query = $this->_field . ':';
  220. } else {
  221. $query = '';
  222. }
  223. $query .= '"' . $this->_phrase . '"';
  224. if ($this->_slop != 0) {
  225. $query .= '~' . $this->_slop;
  226. }
  227. if ($this->getBoost() != 1) {
  228. $query .= '^' . round($this->getBoost(), 4);
  229. }
  230. return $query;
  231. }
  232. }