/search/Zend/Search/Lucene/Search/QueryLexer.php

https://github.com/jarednipper/HSU-common-code · PHP · 508 lines · 292 code · 88 blank · 128 comment · 29 complexity · b9d5ebbbf20474c8662674b9a292a515 MD5 · raw file

  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Search
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_FSM */
  22. require_once 'Zend/Search/Lucene/FSM.php';
  23. /** Zend_Search_Lucene_Search_QueryParser */
  24. require_once 'Zend/Search/Lucene/Search/QueryToken.php';
  25. /** Zend_Search_Lucene_Exception */
  26. require_once 'Zend/Search/Lucene/Exception.php';
  27. /** Zend_Search_Lucene_Search_QueryParserException */
  28. require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
  29. /**
  30. * @category Zend
  31. * @package Zend_Search_Lucene
  32. * @subpackage Search
  33. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  34. * @license http://framework.zend.com/license/new-bsd New BSD License
  35. */
  36. class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
  37. {
  38. /** State Machine states */
  39. const ST_WHITE_SPACE = 0;
  40. const ST_SYNT_LEXEME = 1;
  41. const ST_LEXEME = 2;
  42. const ST_QUOTED_LEXEME = 3;
  43. const ST_ESCAPED_CHAR = 4;
  44. const ST_ESCAPED_QCHAR = 5;
  45. const ST_LEXEME_MODIFIER = 6;
  46. const ST_NUMBER = 7;
  47. const ST_MANTISSA = 8;
  48. const ST_ERROR = 9;
  49. /** Input symbols */
  50. const IN_WHITE_SPACE = 0;
  51. const IN_SYNT_CHAR = 1;
  52. const IN_LEXEME_MODIFIER = 2;
  53. const IN_ESCAPE_CHAR = 3;
  54. const IN_QUOTE = 4;
  55. const IN_DECIMAL_POINT = 5;
  56. const IN_ASCII_DIGIT = 6;
  57. const IN_CHAR = 7;
  58. const IN_MUTABLE_CHAR = 8;
  59. const QUERY_WHITE_SPACE_CHARS = " \n\r\t";
  60. const QUERY_SYNT_CHARS = ':()[]{}!|&';
  61. const QUERY_MUTABLE_CHARS = '+-';
  62. const QUERY_DOUBLECHARLEXEME_CHARS = '|&';
  63. const QUERY_LEXEMEMODIFIER_CHARS = '~^';
  64. const QUERY_ASCIIDIGITS_CHARS = '0123456789';
  65. /**
  66. * List of recognized lexemes
  67. *
  68. * @var array
  69. */
  70. private $_lexemes;
  71. /**
  72. * Query string (array of single- or non single-byte characters)
  73. *
  74. * @var array
  75. */
  76. private $_queryString;
  77. /**
  78. * Current position within a query string
  79. * Used to create appropriate error messages
  80. *
  81. * @var integer
  82. */
  83. private $_queryStringPosition;
  84. /**
  85. * Recognized part of current lexeme
  86. *
  87. * @var string
  88. */
  89. private $_currentLexeme;
  90. public function __construct()
  91. {
  92. parent::__construct( array(self::ST_WHITE_SPACE,
  93. self::ST_SYNT_LEXEME,
  94. self::ST_LEXEME,
  95. self::ST_QUOTED_LEXEME,
  96. self::ST_ESCAPED_CHAR,
  97. self::ST_ESCAPED_QCHAR,
  98. self::ST_LEXEME_MODIFIER,
  99. self::ST_NUMBER,
  100. self::ST_MANTISSA,
  101. self::ST_ERROR),
  102. array(self::IN_WHITE_SPACE,
  103. self::IN_SYNT_CHAR,
  104. self::IN_MUTABLE_CHAR,
  105. self::IN_LEXEME_MODIFIER,
  106. self::IN_ESCAPE_CHAR,
  107. self::IN_QUOTE,
  108. self::IN_DECIMAL_POINT,
  109. self::IN_ASCII_DIGIT,
  110. self::IN_CHAR));
  111. $lexemeModifierErrorAction = new Zend_Search_Lucene_FSMAction($this, 'lexModifierErrException');
  112. $quoteWithinLexemeErrorAction = new Zend_Search_Lucene_FSMAction($this, 'quoteWithinLexemeErrException');
  113. $wrongNumberErrorAction = new Zend_Search_Lucene_FSMAction($this, 'wrongNumberErrException');
  114. $this->addRules(array( array(self::ST_WHITE_SPACE, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  115. array(self::ST_WHITE_SPACE, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  116. array(self::ST_WHITE_SPACE, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  117. array(self::ST_WHITE_SPACE, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  118. array(self::ST_WHITE_SPACE, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  119. array(self::ST_WHITE_SPACE, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  120. array(self::ST_WHITE_SPACE, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  121. array(self::ST_WHITE_SPACE, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  122. array(self::ST_WHITE_SPACE, self::IN_CHAR, self::ST_LEXEME)
  123. ));
  124. $this->addRules(array( array(self::ST_SYNT_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  125. array(self::ST_SYNT_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  126. array(self::ST_SYNT_LEXEME, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  127. array(self::ST_SYNT_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  128. array(self::ST_SYNT_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  129. array(self::ST_SYNT_LEXEME, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  130. array(self::ST_SYNT_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  131. array(self::ST_SYNT_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  132. array(self::ST_SYNT_LEXEME, self::IN_CHAR, self::ST_LEXEME)
  133. ));
  134. $this->addRules(array( array(self::ST_LEXEME, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  135. array(self::ST_LEXEME, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  136. array(self::ST_LEXEME, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
  137. array(self::ST_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  138. array(self::ST_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_CHAR),
  139. // IN_QUOTE not allowed
  140. array(self::ST_LEXEME, self::IN_QUOTE, self::ST_ERROR, $quoteWithinLexemeErrorAction),
  141. array(self::ST_LEXEME, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  142. array(self::ST_LEXEME, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  143. array(self::ST_LEXEME, self::IN_CHAR, self::ST_LEXEME)
  144. ));
  145. $this->addRules(array( array(self::ST_QUOTED_LEXEME, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
  146. array(self::ST_QUOTED_LEXEME, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
  147. array(self::ST_QUOTED_LEXEME, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
  148. array(self::ST_QUOTED_LEXEME, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
  149. array(self::ST_QUOTED_LEXEME, self::IN_ESCAPE_CHAR, self::ST_ESCAPED_QCHAR),
  150. array(self::ST_QUOTED_LEXEME, self::IN_QUOTE, self::ST_WHITE_SPACE),
  151. array(self::ST_QUOTED_LEXEME, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
  152. array(self::ST_QUOTED_LEXEME, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
  153. array(self::ST_QUOTED_LEXEME, self::IN_CHAR, self::ST_QUOTED_LEXEME)
  154. ));
  155. $this->addRules(array( array(self::ST_ESCAPED_CHAR, self::IN_WHITE_SPACE, self::ST_LEXEME),
  156. array(self::ST_ESCAPED_CHAR, self::IN_SYNT_CHAR, self::ST_LEXEME),
  157. array(self::ST_ESCAPED_CHAR, self::IN_MUTABLE_CHAR, self::ST_LEXEME),
  158. array(self::ST_ESCAPED_CHAR, self::IN_LEXEME_MODIFIER, self::ST_LEXEME),
  159. array(self::ST_ESCAPED_CHAR, self::IN_ESCAPE_CHAR, self::ST_LEXEME),
  160. array(self::ST_ESCAPED_CHAR, self::IN_QUOTE, self::ST_LEXEME),
  161. array(self::ST_ESCAPED_CHAR, self::IN_DECIMAL_POINT, self::ST_LEXEME),
  162. array(self::ST_ESCAPED_CHAR, self::IN_ASCII_DIGIT, self::ST_LEXEME),
  163. array(self::ST_ESCAPED_CHAR, self::IN_CHAR, self::ST_LEXEME)
  164. ));
  165. $this->addRules(array( array(self::ST_ESCAPED_QCHAR, self::IN_WHITE_SPACE, self::ST_QUOTED_LEXEME),
  166. array(self::ST_ESCAPED_QCHAR, self::IN_SYNT_CHAR, self::ST_QUOTED_LEXEME),
  167. array(self::ST_ESCAPED_QCHAR, self::IN_MUTABLE_CHAR, self::ST_QUOTED_LEXEME),
  168. array(self::ST_ESCAPED_QCHAR, self::IN_LEXEME_MODIFIER, self::ST_QUOTED_LEXEME),
  169. array(self::ST_ESCAPED_QCHAR, self::IN_ESCAPE_CHAR, self::ST_QUOTED_LEXEME),
  170. array(self::ST_ESCAPED_QCHAR, self::IN_QUOTE, self::ST_QUOTED_LEXEME),
  171. array(self::ST_ESCAPED_QCHAR, self::IN_DECIMAL_POINT, self::ST_QUOTED_LEXEME),
  172. array(self::ST_ESCAPED_QCHAR, self::IN_ASCII_DIGIT, self::ST_QUOTED_LEXEME),
  173. array(self::ST_ESCAPED_QCHAR, self::IN_CHAR, self::ST_QUOTED_LEXEME)
  174. ));
  175. $this->addRules(array( array(self::ST_LEXEME_MODIFIER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  176. array(self::ST_LEXEME_MODIFIER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  177. array(self::ST_LEXEME_MODIFIER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  178. array(self::ST_LEXEME_MODIFIER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  179. // IN_ESCAPE_CHAR not allowed
  180. array(self::ST_LEXEME_MODIFIER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
  181. // IN_QUOTE not allowed
  182. array(self::ST_LEXEME_MODIFIER, self::IN_QUOTE, self::ST_ERROR, $lexemeModifierErrorAction),
  183. array(self::ST_LEXEME_MODIFIER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
  184. array(self::ST_LEXEME_MODIFIER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
  185. // IN_CHAR not allowed
  186. array(self::ST_LEXEME_MODIFIER, self::IN_CHAR, self::ST_ERROR, $lexemeModifierErrorAction),
  187. ));
  188. $this->addRules(array( array(self::ST_NUMBER, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  189. array(self::ST_NUMBER, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  190. array(self::ST_NUMBER, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  191. array(self::ST_NUMBER, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  192. // IN_ESCAPE_CHAR not allowed
  193. array(self::ST_NUMBER, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  194. // IN_QUOTE not allowed
  195. array(self::ST_NUMBER, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
  196. array(self::ST_NUMBER, self::IN_DECIMAL_POINT, self::ST_MANTISSA),
  197. array(self::ST_NUMBER, self::IN_ASCII_DIGIT, self::ST_NUMBER),
  198. // IN_CHAR not allowed
  199. array(self::ST_NUMBER, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  200. ));
  201. $this->addRules(array( array(self::ST_MANTISSA, self::IN_WHITE_SPACE, self::ST_WHITE_SPACE),
  202. array(self::ST_MANTISSA, self::IN_SYNT_CHAR, self::ST_SYNT_LEXEME),
  203. array(self::ST_MANTISSA, self::IN_MUTABLE_CHAR, self::ST_SYNT_LEXEME),
  204. array(self::ST_MANTISSA, self::IN_LEXEME_MODIFIER, self::ST_LEXEME_MODIFIER),
  205. // IN_ESCAPE_CHAR not allowed
  206. array(self::ST_MANTISSA, self::IN_ESCAPE_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  207. // IN_QUOTE not allowed
  208. array(self::ST_MANTISSA, self::IN_QUOTE, self::ST_ERROR, $wrongNumberErrorAction),
  209. // IN_DECIMAL_POINT not allowed
  210. array(self::ST_MANTISSA, self::IN_DECIMAL_POINT, self::ST_ERROR, $wrongNumberErrorAction),
  211. array(self::ST_MANTISSA, self::IN_ASCII_DIGIT, self::ST_MANTISSA),
  212. // IN_CHAR not allowed
  213. array(self::ST_MANTISSA, self::IN_CHAR, self::ST_ERROR, $wrongNumberErrorAction),
  214. ));
  215. /** Actions */
  216. $syntaxLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuerySyntaxLexeme');
  217. $lexemeModifierAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeModifier');
  218. $addLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addLexeme');
  219. $addQuotedLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addQuotedLexeme');
  220. $addNumberLexemeAction = new Zend_Search_Lucene_FSMAction($this, 'addNumberLexeme');
  221. $addLexemeCharAction = new Zend_Search_Lucene_FSMAction($this, 'addLexemeChar');
  222. /** Syntax lexeme */
  223. $this->addEntryAction(self::ST_SYNT_LEXEME, $syntaxLexemeAction);
  224. // Two lexemes in succession
  225. $this->addTransitionAction(self::ST_SYNT_LEXEME, self::ST_SYNT_LEXEME, $syntaxLexemeAction);
  226. /** Lexeme */
  227. $this->addEntryAction(self::ST_LEXEME, $addLexemeCharAction);
  228. $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME, $addLexemeCharAction);
  229. // ST_ESCAPED_CHAR => ST_LEXEME transition is covered by ST_LEXEME entry action
  230. $this->addTransitionAction(self::ST_LEXEME, self::ST_WHITE_SPACE, $addLexemeAction);
  231. $this->addTransitionAction(self::ST_LEXEME, self::ST_SYNT_LEXEME, $addLexemeAction);
  232. $this->addTransitionAction(self::ST_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeAction);
  233. $this->addTransitionAction(self::ST_LEXEME, self::ST_LEXEME_MODIFIER, $addLexemeAction);
  234. $this->addTransitionAction(self::ST_LEXEME, self::ST_NUMBER, $addLexemeAction);
  235. $this->addTransitionAction(self::ST_LEXEME, self::ST_MANTISSA, $addLexemeAction);
  236. /** Quoted lexeme */
  237. // We don't need entry action (skeep quote)
  238. $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
  239. $this->addTransitionAction(self::ST_ESCAPED_QCHAR, self::ST_QUOTED_LEXEME, $addLexemeCharAction);
  240. // Closing quote changes state to the ST_WHITE_SPACE other states are not used
  241. $this->addTransitionAction(self::ST_QUOTED_LEXEME, self::ST_WHITE_SPACE, $addQuotedLexemeAction);
  242. /** Lexeme modifier */
  243. $this->addEntryAction(self::ST_LEXEME_MODIFIER, $lexemeModifierAction);
  244. /** Number */
  245. $this->addEntryAction(self::ST_NUMBER, $addLexemeCharAction);
  246. $this->addEntryAction(self::ST_MANTISSA, $addLexemeCharAction);
  247. $this->addTransitionAction(self::ST_NUMBER, self::ST_NUMBER, $addLexemeCharAction);
  248. // ST_NUMBER => ST_MANTISSA transition is covered by ST_MANTISSA entry action
  249. $this->addTransitionAction(self::ST_MANTISSA, self::ST_MANTISSA, $addLexemeCharAction);
  250. $this->addTransitionAction(self::ST_NUMBER, self::ST_WHITE_SPACE, $addNumberLexemeAction);
  251. $this->addTransitionAction(self::ST_NUMBER, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
  252. $this->addTransitionAction(self::ST_NUMBER, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
  253. $this->addTransitionAction(self::ST_MANTISSA, self::ST_WHITE_SPACE, $addNumberLexemeAction);
  254. $this->addTransitionAction(self::ST_MANTISSA, self::ST_SYNT_LEXEME, $addNumberLexemeAction);
  255. $this->addTransitionAction(self::ST_MANTISSA, self::ST_LEXEME_MODIFIER, $addNumberLexemeAction);
  256. }
  257. /**
  258. * Translate input char to an input symbol of state machine
  259. *
  260. * @param string $char
  261. * @return integer
  262. */
  263. private function _translateInput($char)
  264. {
  265. if (strpos(self::QUERY_WHITE_SPACE_CHARS, $char) !== false) { return self::IN_WHITE_SPACE;
  266. } else if (strpos(self::QUERY_SYNT_CHARS, $char) !== false) { return self::IN_SYNT_CHAR;
  267. } else if (strpos(self::QUERY_MUTABLE_CHARS, $char) !== false) { return self::IN_MUTABLE_CHAR;
  268. } else if (strpos(self::QUERY_LEXEMEMODIFIER_CHARS, $char) !== false) { return self::IN_LEXEME_MODIFIER;
  269. } else if (strpos(self::QUERY_ASCIIDIGITS_CHARS, $char) !== false) { return self::IN_ASCII_DIGIT;
  270. } else if ($char === '"' ) { return self::IN_QUOTE;
  271. } else if ($char === '.' ) { return self::IN_DECIMAL_POINT;
  272. } else if ($char === '\\') { return self::IN_ESCAPE_CHAR;
  273. } else { return self::IN_CHAR;
  274. }
  275. }
  276. /**
  277. * This method is used to tokenize query string into lexemes
  278. *
  279. * @param string $inputString
  280. * @param string $encoding
  281. * @return array
  282. * @throws Zend_Search_Lucene_Search_QueryParserException
  283. */
  284. public function tokenize($inputString, $encoding)
  285. {
  286. $this->reset();
  287. $this->_lexemes = array();
  288. $this->_queryString = array();
  289. $strLength = iconv_strlen($inputString, $encoding);
  290. // Workaround for iconv_substr bug
  291. $inputString .= ' ';
  292. for ($count = 0; $count < $strLength; $count++) {
  293. $this->_queryString[$count] = iconv_substr($inputString, $count, 1, $encoding);
  294. }
  295. for ($this->_queryStringPosition = 0;
  296. $this->_queryStringPosition < count($this->_queryString);
  297. $this->_queryStringPosition++) {
  298. $this->process($this->_translateInput($this->_queryString[$this->_queryStringPosition]));
  299. }
  300. $this->process(self::IN_WHITE_SPACE);
  301. if ($this->getState() != self::ST_WHITE_SPACE) {
  302. throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
  303. }
  304. $this->_queryString = null;
  305. return $this->_lexemes;
  306. }
  307. /*********************************************************************
  308. * Actions implementation
  309. *
  310. * Actions affect on recognized lexemes list
  311. *********************************************************************/
  312. /**
  313. * Add query syntax lexeme
  314. *
  315. * @throws Zend_Search_Lucene_Search_QueryParserException
  316. */
  317. public function addQuerySyntaxLexeme()
  318. {
  319. $lexeme = $this->_queryString[$this->_queryStringPosition];
  320. // Process two char lexemes
  321. if (strpos(self::QUERY_DOUBLECHARLEXEME_CHARS, $lexeme) !== false) {
  322. // increase current position in a query string
  323. $this->_queryStringPosition++;
  324. // check,
  325. if ($this->_queryStringPosition == count($this->_queryString) ||
  326. $this->_queryString[$this->_queryStringPosition] != $lexeme) {
  327. throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
  328. }
  329. // duplicate character
  330. $lexeme .= $lexeme;
  331. }
  332. $token = new Zend_Search_Lucene_Search_QueryToken(
  333. Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
  334. $lexeme,
  335. $this->_queryStringPosition);
  336. // Skip this lexeme if it's a field indicator ':' and treat previous as 'field' instead of 'word'
  337. if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
  338. $token = array_pop($this->_lexemes);
  339. if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
  340. throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
  341. }
  342. $token->type = Zend_Search_Lucene_Search_QueryToken::TT_FIELD;
  343. }
  344. $this->_lexemes[] = $token;
  345. }
  346. /**
  347. * Add lexeme modifier
  348. */
  349. public function addLexemeModifier()
  350. {
  351. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  352. Zend_Search_Lucene_Search_QueryToken::TC_SYNTAX_ELEMENT,
  353. $this->_queryString[$this->_queryStringPosition],
  354. $this->_queryStringPosition);
  355. }
  356. /**
  357. * Add lexeme
  358. */
  359. public function addLexeme()
  360. {
  361. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  362. Zend_Search_Lucene_Search_QueryToken::TC_WORD,
  363. $this->_currentLexeme,
  364. $this->_queryStringPosition - 1);
  365. $this->_currentLexeme = '';
  366. }
  367. /**
  368. * Add quoted lexeme
  369. */
  370. public function addQuotedLexeme()
  371. {
  372. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  373. Zend_Search_Lucene_Search_QueryToken::TC_PHRASE,
  374. $this->_currentLexeme,
  375. $this->_queryStringPosition);
  376. $this->_currentLexeme = '';
  377. }
  378. /**
  379. * Add number lexeme
  380. */
  381. public function addNumberLexeme()
  382. {
  383. $this->_lexemes[] = new Zend_Search_Lucene_Search_QueryToken(
  384. Zend_Search_Lucene_Search_QueryToken::TC_NUMBER,
  385. $this->_currentLexeme,
  386. $this->_queryStringPosition - 1);
  387. $this->_currentLexeme = '';
  388. }
  389. /**
  390. * Extend lexeme by one char
  391. */
  392. public function addLexemeChar()
  393. {
  394. $this->_currentLexeme .= $this->_queryString[$this->_queryStringPosition];
  395. }
  396. /**
  397. * Position message
  398. *
  399. * @return string
  400. */
  401. private function _positionMsg()
  402. {
  403. return 'Position is ' . $this->_queryStringPosition . '.';
  404. }
  405. /*********************************************************************
  406. * Syntax errors actions
  407. *********************************************************************/
  408. public function lexModifierErrException()
  409. {
  410. throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
  411. }
  412. public function quoteWithinLexemeErrException()
  413. {
  414. throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
  415. }
  416. public function wrongNumberErrException()
  417. {
  418. throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
  419. }
  420. }