PageRenderTime 23ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Search/Lucene/Search/QueryParserContext.php

https://github.com/kervin/kyzstudio
PHP | 401 lines | 166 code | 65 blank | 170 comment | 32 complexity | 5a76f304adc592115df8f711311f300a MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: QueryParserContext.php 20096 2010-01-06 02:05:09Z bkarwin $
  21. */
  22. /** Zend_Search_Lucene_Search_QueryToken */
  23. #require_once 'Zend/Search/Lucene/Search/QueryToken.php';
  24. /**
  25. * @category Zend
  26. * @package Zend_Search_Lucene
  27. * @subpackage Search
  28. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  29. * @license http://framework.zend.com/license/new-bsd New BSD License
  30. */
  31. class Zend_Search_Lucene_Search_QueryParserContext
  32. {
  33. /**
  34. * Default field for the context.
  35. *
  36. * null means, that term should be searched through all fields
  37. * Zend_Search_Lucene_Search_Query::rewriteQuery($index) transletes such queries to several
  38. *
  39. * @var string|null
  40. */
  41. private $_defaultField;
  42. /**
  43. * Field specified for next entry
  44. *
  45. * @var string
  46. */
  47. private $_nextEntryField = null;
  48. /**
  49. * True means, that term is required.
  50. * False means, that term is prohibited.
  51. * null means, that term is neither prohibited, nor required
  52. *
  53. * @var boolean
  54. */
  55. private $_nextEntrySign = null;
  56. /**
  57. * Entries grouping mode
  58. */
  59. const GM_SIGNS = 0; // Signs mode: '+term1 term2 -term3 +(subquery1) -(subquery2)'
  60. const GM_BOOLEAN = 1; // Boolean operators mode: 'term1 and term2 or (subquery1) and not (subquery2)'
  61. /**
  62. * Grouping mode
  63. *
  64. * @var integer
  65. */
  66. private $_mode = null;
  67. /**
  68. * Entries signs.
  69. * Used in GM_SIGNS grouping mode
  70. *
  71. * @var arrays
  72. */
  73. private $_signs = array();
  74. /**
  75. * Query entries
  76. * Each entry is a Zend_Search_Lucene_Search_QueryEntry object or
  77. * boolean operator (Zend_Search_Lucene_Search_QueryToken class constant)
  78. *
  79. * @var array
  80. */
  81. private $_entries = array();
  82. /**
  83. * Query string encoding
  84. *
  85. * @var string
  86. */
  87. private $_encoding;
  88. /**
  89. * Context object constructor
  90. *
  91. * @param string $encoding
  92. * @param string|null $defaultField
  93. */
  94. public function __construct($encoding, $defaultField = null)
  95. {
  96. $this->_encoding = $encoding;
  97. $this->_defaultField = $defaultField;
  98. }
  99. /**
  100. * Get context default field
  101. *
  102. * @return string|null
  103. */
  104. public function getField()
  105. {
  106. return ($this->_nextEntryField !== null) ? $this->_nextEntryField : $this->_defaultField;
  107. }
  108. /**
  109. * Set field for next entry
  110. *
  111. * @param string $field
  112. */
  113. public function setNextEntryField($field)
  114. {
  115. $this->_nextEntryField = $field;
  116. }
  117. /**
  118. * Set sign for next entry
  119. *
  120. * @param integer $sign
  121. * @throws Zend_Search_Lucene_Exception
  122. */
  123. public function setNextEntrySign($sign)
  124. {
  125. if ($this->_mode === self::GM_BOOLEAN) {
  126. #require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  127. throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
  128. }
  129. $this->_mode = self::GM_SIGNS;
  130. if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_REQUIRED) {
  131. $this->_nextEntrySign = true;
  132. } else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
  133. $this->_nextEntrySign = false;
  134. } else {
  135. #require_once 'Zend/Search/Lucene/Exception.php';
  136. throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
  137. }
  138. }
  139. /**
  140. * Add entry to a query
  141. *
  142. * @param Zend_Search_Lucene_Search_QueryEntry $entry
  143. */
  144. public function addEntry(Zend_Search_Lucene_Search_QueryEntry $entry)
  145. {
  146. if ($this->_mode !== self::GM_BOOLEAN) {
  147. $this->_signs[] = $this->_nextEntrySign;
  148. }
  149. $this->_entries[] = $entry;
  150. $this->_nextEntryField = null;
  151. $this->_nextEntrySign = null;
  152. }
  153. /**
  154. * Process fuzzy search or proximity search modifier
  155. *
  156. * @throws Zend_Search_Lucene_Search_QueryParserException
  157. */
  158. public function processFuzzyProximityModifier($parameter = null)
  159. {
  160. // Check, that modifier has came just after word or phrase
  161. if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
  162. #require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  163. throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
  164. }
  165. $lastEntry = array_pop($this->_entries);
  166. if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
  167. // there are no entries or last entry is boolean operator
  168. #require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  169. throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
  170. }
  171. $lastEntry->processFuzzyProximityModifier($parameter);
  172. $this->_entries[] = $lastEntry;
  173. }
  174. /**
  175. * Set boost factor to the entry
  176. *
  177. * @param float $boostFactor
  178. */
  179. public function boost($boostFactor)
  180. {
  181. // Check, that modifier has came just after word or phrase
  182. if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
  183. #require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  184. throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
  185. }
  186. $lastEntry = array_pop($this->_entries);
  187. if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
  188. // there are no entries or last entry is boolean operator
  189. #require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  190. throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
  191. }
  192. $lastEntry->boost($boostFactor);
  193. $this->_entries[] = $lastEntry;
  194. }
  195. /**
  196. * Process logical operator
  197. *
  198. * @param integer $operator
  199. */
  200. public function addLogicalOperator($operator)
  201. {
  202. if ($this->_mode === self::GM_SIGNS) {
  203. #require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  204. throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
  205. }
  206. $this->_mode = self::GM_BOOLEAN;
  207. $this->_entries[] = $operator;
  208. }
  209. /**
  210. * Generate 'signs style' query from the context
  211. * '+term1 term2 -term3 +(<subquery1>) ...'
  212. *
  213. * @return Zend_Search_Lucene_Search_Query
  214. */
  215. public function _signStyleExpressionQuery()
  216. {
  217. #require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
  218. $query = new Zend_Search_Lucene_Search_Query_Boolean();
  219. #require_once 'Zend/Search/Lucene/Search/QueryParser.php';
  220. if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
  221. $defaultSign = true; // required
  222. } else {
  223. // Zend_Search_Lucene_Search_QueryParser::B_OR
  224. $defaultSign = null; // optional
  225. }
  226. foreach ($this->_entries as $entryId => $entry) {
  227. $sign = ($this->_signs[$entryId] !== null) ? $this->_signs[$entryId] : $defaultSign;
  228. $query->addSubquery($entry->getQuery($this->_encoding), $sign);
  229. }
  230. return $query;
  231. }
  232. /**
  233. * Generate 'boolean style' query from the context
  234. * 'term1 and term2 or term3 and (<subquery1>) and not (<subquery2>)'
  235. *
  236. * @return Zend_Search_Lucene_Search_Query
  237. * @throws Zend_Search_Lucene
  238. */
  239. private function _booleanExpressionQuery()
  240. {
  241. /**
  242. * We treat each level of an expression as a boolean expression in
  243. * a Disjunctive Normal Form
  244. *
  245. * AND operator has higher precedence than OR
  246. *
  247. * Thus logical query is a disjunction of one or more conjunctions of
  248. * one or more query entries
  249. */
  250. #require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
  251. $expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
  252. #require_once 'Zend/Search/Lucene/Exception.php';
  253. try {
  254. foreach ($this->_entries as $entry) {
  255. if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
  256. $expressionRecognizer->processLiteral($entry);
  257. } else {
  258. switch ($entry) {
  259. case Zend_Search_Lucene_Search_QueryToken::TT_AND_LEXEME:
  260. $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_AND_OPERATOR);
  261. break;
  262. case Zend_Search_Lucene_Search_QueryToken::TT_OR_LEXEME:
  263. $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_OR_OPERATOR);
  264. break;
  265. case Zend_Search_Lucene_Search_QueryToken::TT_NOT_LEXEME:
  266. $expressionRecognizer->processOperator(Zend_Search_Lucene_Search_BooleanExpressionRecognizer::IN_NOT_OPERATOR);
  267. break;
  268. default:
  269. throw new Zend_Search_Lucene('Boolean expression error. Unknown operator type.');
  270. }
  271. }
  272. }
  273. $conjuctions = $expressionRecognizer->finishExpression();
  274. } catch (Zend_Search_Exception $e) {
  275. // throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
  276. // $e->getMessage() . '\'.' );
  277. // It's query syntax error message and it should be user friendly. So FSM message is omitted
  278. #require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  279. throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.', 0, $e);
  280. }
  281. // Remove 'only negative' conjunctions
  282. foreach ($conjuctions as $conjuctionId => $conjuction) {
  283. $nonNegativeEntryFound = false;
  284. foreach ($conjuction as $conjuctionEntry) {
  285. if ($conjuctionEntry[1]) {
  286. $nonNegativeEntryFound = true;
  287. break;
  288. }
  289. }
  290. if (!$nonNegativeEntryFound) {
  291. unset($conjuctions[$conjuctionId]);
  292. }
  293. }
  294. $subqueries = array();
  295. foreach ($conjuctions as $conjuction) {
  296. // Check, if it's a one term conjuction
  297. if (count($conjuction) == 1) {
  298. $subqueries[] = $conjuction[0][0]->getQuery($this->_encoding);
  299. } else {
  300. #require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
  301. $subquery = new Zend_Search_Lucene_Search_Query_Boolean();
  302. foreach ($conjuction as $conjuctionEntry) {
  303. $subquery->addSubquery($conjuctionEntry[0]->getQuery($this->_encoding), $conjuctionEntry[1]);
  304. }
  305. $subqueries[] = $subquery;
  306. }
  307. }
  308. if (count($subqueries) == 0) {
  309. #require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
  310. return new Zend_Search_Lucene_Search_Query_Insignificant();
  311. }
  312. if (count($subqueries) == 1) {
  313. return $subqueries[0];
  314. }
  315. #require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
  316. $query = new Zend_Search_Lucene_Search_Query_Boolean();
  317. foreach ($subqueries as $subquery) {
  318. // Non-requirered entry/subquery
  319. $query->addSubquery($subquery);
  320. }
  321. return $query;
  322. }
  323. /**
  324. * Generate query from current context
  325. *
  326. * @return Zend_Search_Lucene_Search_Query
  327. */
  328. public function getQuery()
  329. {
  330. if ($this->_mode === self::GM_BOOLEAN) {
  331. return $this->_booleanExpressionQuery();
  332. } else {
  333. return $this->_signStyleExpressionQuery();
  334. }
  335. }
  336. }