/library/Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php
PHP | 278 lines | 100 code | 42 blank | 136 comment | 8 complexity | 09a255fb3441b1271728044206d50024 MD5 | raw file
Possible License(s): AGPL-1.0
1<?php
2/**
3 * Zend Framework
4 *
5 * LICENSE
6 *
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
14 *
15 * @category Zend
16 * @package Zend_Search_Lucene
17 * @subpackage Search
18 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 * @version $Id: BooleanExpressionRecognizer.php 24594 2012-01-05 21:27:01Z matthew $
21 */
22
23
24/** Zend_Search_Lucene_FSM */
25require_once 'Zend/Search/Lucene/FSM.php';
26
27/**
28 * @category Zend
29 * @package Zend_Search_Lucene
30 * @subpackage Search
31 * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
32 * @license http://framework.zend.com/license/new-bsd New BSD License
33 */
34class Zend_Search_Lucene_Search_BooleanExpressionRecognizer extends Zend_Search_Lucene_FSM
35{
36 /** State Machine states */
37 const ST_START = 0;
38 const ST_LITERAL = 1;
39 const ST_NOT_OPERATOR = 2;
40 const ST_AND_OPERATOR = 3;
41 const ST_OR_OPERATOR = 4;
42
43 /** Input symbols */
44 const IN_LITERAL = 0;
45 const IN_NOT_OPERATOR = 1;
46 const IN_AND_OPERATOR = 2;
47 const IN_OR_OPERATOR = 3;
48
49
50 /**
51 * NOT operator signal
52 *
53 * @var boolean
54 */
55 private $_negativeLiteral = false;
56
57 /**
58 * Current literal
59 *
60 * @var mixed
61 */
62 private $_literal;
63
64
65 /**
66 * Set of boolean query conjunctions
67 *
68 * Each conjunction is an array of conjunction elements
69 * Each conjunction element is presented with two-elements array:
70 * array(<literal>, <is_negative>)
71 *
72 * So, it has a structure:
73 * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
74 * array(<literal>, <is_negative>), // second literal of first conjuction
75 * ...
76 * array(<literal>, <is_negative>)
77 * ), // end of first conjuction
78 * array( array(<literal>, <is_negative>), // first literal of second conjuction
79 * array(<literal>, <is_negative>), // second literal of second conjuction
80 * ...
81 * array(<literal>, <is_negative>)
82 * ), // end of second conjuction
83 * ...
84 * ) // end of structure
85 *
86 * @var array
87 */
88 private $_conjunctions = array();
89
90 /**
91 * Current conjuction
92 *
93 * @var array
94 */
95 private $_currentConjunction = array();
96
97
98 /**
99 * Object constructor
100 */
101 public function __construct()
102 {
103 parent::__construct( array(self::ST_START,
104 self::ST_LITERAL,
105 self::ST_NOT_OPERATOR,
106 self::ST_AND_OPERATOR,
107 self::ST_OR_OPERATOR),
108 array(self::IN_LITERAL,
109 self::IN_NOT_OPERATOR,
110 self::IN_AND_OPERATOR,
111 self::IN_OR_OPERATOR));
112
113 $emptyOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyOperatorAction');
114 $emptyNotOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'emptyNotOperatorAction');
115
116 $this->addRules(array( array(self::ST_START, self::IN_LITERAL, self::ST_LITERAL),
117 array(self::ST_START, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
118
119 array(self::ST_LITERAL, self::IN_AND_OPERATOR, self::ST_AND_OPERATOR),
120 array(self::ST_LITERAL, self::IN_OR_OPERATOR, self::ST_OR_OPERATOR),
121 array(self::ST_LITERAL, self::IN_LITERAL, self::ST_LITERAL, $emptyOperatorAction),
122 array(self::ST_LITERAL, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR, $emptyNotOperatorAction),
123
124 array(self::ST_NOT_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
125
126 array(self::ST_AND_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
127 array(self::ST_AND_OPERATOR, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
128
129 array(self::ST_OR_OPERATOR, self::IN_LITERAL, self::ST_LITERAL),
130 array(self::ST_OR_OPERATOR, self::IN_NOT_OPERATOR, self::ST_NOT_OPERATOR),
131 ));
132
133 $notOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'notOperatorAction');
134 $orOperatorAction = new Zend_Search_Lucene_FSMAction($this, 'orOperatorAction');
135 $literalAction = new Zend_Search_Lucene_FSMAction($this, 'literalAction');
136
137
138 $this->addEntryAction(self::ST_NOT_OPERATOR, $notOperatorAction);
139 $this->addEntryAction(self::ST_OR_OPERATOR, $orOperatorAction);
140 $this->addEntryAction(self::ST_LITERAL, $literalAction);
141 }
142
143
144 /**
145 * Process next operator.
146 *
147 * Operators are defined by class constants: IN_AND_OPERATOR, IN_OR_OPERATOR and IN_NOT_OPERATOR
148 *
149 * @param integer $operator
150 */
151 public function processOperator($operator)
152 {
153 $this->process($operator);
154 }
155
156 /**
157 * Process expression literal.
158 *
159 * @param integer $operator
160 */
161 public function processLiteral($literal)
162 {
163 $this->_literal = $literal;
164
165 $this->process(self::IN_LITERAL);
166 }
167
168 /**
169 * Finish an expression and return result
170 *
171 * Result is a set of boolean query conjunctions
172 *
173 * Each conjunction is an array of conjunction elements
174 * Each conjunction element is presented with two-elements array:
175 * array(<literal>, <is_negative>)
176 *
177 * So, it has a structure:
178 * array( array( array(<literal>, <is_negative>), // first literal of first conjuction
179 * array(<literal>, <is_negative>), // second literal of first conjuction
180 * ...
181 * array(<literal>, <is_negative>)
182 * ), // end of first conjuction
183 * array( array(<literal>, <is_negative>), // first literal of second conjuction
184 * array(<literal>, <is_negative>), // second literal of second conjuction
185 * ...
186 * array(<literal>, <is_negative>)
187 * ), // end of second conjuction
188 * ...
189 * ) // end of structure
190 *
191 * @return array
192 * @throws Zend_Search_Lucene_Exception
193 */
194 public function finishExpression()
195 {
196 if ($this->getState() != self::ST_LITERAL) {
197 require_once 'Zend/Search/Lucene/Exception.php';
198 throw new Zend_Search_Lucene_Exception('Literal expected.');
199 }
200
201 $this->_conjunctions[] = $this->_currentConjunction;
202
203 return $this->_conjunctions;
204 }
205
206
207
208 /*********************************************************************
209 * Actions implementation
210 *********************************************************************/
211
212 /**
213 * default (omitted) operator processing
214 */
215 public function emptyOperatorAction()
216 {
217 /** Zend_Search_Lucene_Search_QueryParser */
218 require_once 'Zend/Search/Lucene/Search/QueryParser.php';
219
220 if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
221 // Do nothing
222 } else {
223 $this->orOperatorAction();
224 }
225
226 // Process literal
227 $this->literalAction();
228 }
229
230 /**
231 * default (omitted) + NOT operator processing
232 */
233 public function emptyNotOperatorAction()
234 {
235 /** Zend_Search_Lucene_Search_QueryParser */
236 require_once 'Zend/Search/Lucene/Search/QueryParser.php';
237
238 if (Zend_Search_Lucene_Search_QueryParser::getDefaultOperator() == Zend_Search_Lucene_Search_QueryParser::B_AND) {
239 // Do nothing
240 } else {
241 $this->orOperatorAction();
242 }
243
244 // Process NOT operator
245 $this->notOperatorAction();
246 }
247
248
249 /**
250 * NOT operator processing
251 */
252 public function notOperatorAction()
253 {
254 $this->_negativeLiteral = true;
255 }
256
257 /**
258 * OR operator processing
259 * Close current conjunction
260 */
261 public function orOperatorAction()
262 {
263 $this->_conjunctions[] = $this->_currentConjunction;
264 $this->_currentConjunction = array();
265 }
266
267 /**
268 * Literal processing
269 */
270 public function literalAction()
271 {
272 // Add literal to the current conjunction
273 $this->_currentConjunction[] = array($this->_literal, !$this->_negativeLiteral);
274
275 // Switch off negative signal
276 $this->_negativeLiteral = false;
277 }
278}