/Search/src/query_builder.php

https://github.com/F5/zetacomponents · PHP · 318 lines · 190 code · 24 blank · 104 comment · 20 complexity · 5c97bdafc863dd9faca0f21f4fb4d69d MD5 · raw file

  1. <?php
  2. /**
  3. * File containing the ezcSearchQueryBuilder class.
  4. *
  5. * Licensed to the Apache Software Foundation (ASF) under one
  6. * or more contributor license agreements. See the NOTICE file
  7. * distributed with this work for additional information
  8. * regarding copyright ownership. The ASF licenses this file
  9. * to you under the Apache License, Version 2.0 (the
  10. * "License"); you may not use this file except in compliance
  11. * with the License. You may obtain a copy of the License at
  12. *
  13. * http://www.apache.org/licenses/LICENSE-2.0
  14. *
  15. * Unless required by applicable law or agreed to in writing,
  16. * software distributed under the License is distributed on an
  17. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18. * KIND, either express or implied. See the License for the
  19. * specific language governing permissions and limitations
  20. * under the License.
  21. *
  22. * @package Search
  23. * @version //autogen//
  24. * @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
  25. */
  26. /**
  27. * ezcSearchQueryBuilder provides a method to add a natural language search
  28. * query to an exisiting query object.
  29. *
  30. * @package Search
  31. * @version //autogen//
  32. * @mainclass
  33. */
  34. class ezcSearchQueryBuilder
  35. {
  36. /**
  37. * Holds the parser's state
  38. *
  39. * @var string
  40. */
  41. private $state;
  42. /**
  43. * Keeps a list of where clauses per nested level
  44. *
  45. * @var array(array(string))
  46. */
  47. private $stack;
  48. /**
  49. * Contains the current stack level
  50. *
  51. * @var int
  52. */
  53. private $stackLevel;
  54. /**
  55. * Contains the current stack elements query type ('default', 'and' or 'or').
  56. *
  57. * @var string
  58. */
  59. private $stackType;
  60. /**
  61. * Contains a prefix for the following clause ('+', '-' or null).
  62. *
  63. * @var mixed
  64. */
  65. private $prefix;
  66. /**
  67. * Parses the $searchQuery and adds the selection clauses to the $query object
  68. *
  69. * @param ezcSearchQuery $query
  70. * @param string $searchQuery
  71. * @param array(string) $searchFields
  72. */
  73. public function parseSearchQuery( ezcSearchQuery $query, $searchQuery, $searchFields )
  74. {
  75. $this->reset();
  76. $tokens = $this->tokenize( $searchQuery );
  77. $this->buildQuery( $query, $tokens, $searchFields );
  78. if ( $this->stackType[0] == 'and' || $this->stackType[0] == 'default' )
  79. {
  80. foreach ( $this->stack[0] as $element )
  81. {
  82. $query->where( $element );
  83. }
  84. }
  85. else
  86. {
  87. $query->where( $query->lOr( $this->stack[0] ) );
  88. }
  89. }
  90. /**
  91. * Resets the parser to its initial state.
  92. */
  93. public function reset()
  94. {
  95. $this->state = 'normal';
  96. $this->stackLevel = 0;
  97. $this->stack = array();
  98. $this->stack[$this->stackLevel] = array();
  99. $this->stackType = array();
  100. $this->stackType[$this->stackLevel] = 'default';
  101. $this->prefix = null;
  102. }
  103. /**
  104. * Tokenizes the search query into tokens
  105. *
  106. * @param string $searchQuery
  107. * @return array(ezcSearchQueryToken)
  108. */
  109. static protected function tokenize( $searchQuery )
  110. {
  111. $map = array(
  112. ' ' => ezcSearchQueryToken::SPACE,
  113. '\t' => ezcSearchQueryToken::SPACE,
  114. '"' => ezcSearchQueryToken::QUOTE,
  115. '+' => ezcSearchQueryToken::PLUS,
  116. '-' => ezcSearchQueryToken::MINUS,
  117. '(' => ezcSearchQueryToken::BRACE_OPEN,
  118. ')' => ezcSearchQueryToken::BRACE_CLOSE,
  119. 'and' => ezcSearchQueryToken::LOGICAL_AND,
  120. 'or' => ezcSearchQueryToken::LOGICAL_OR,
  121. ':' => ezcSearchQueryToken::COLON,
  122. );
  123. $tokens = array();
  124. $tokenArray = preg_split( '@(\s)|(["+():-])@', $searchQuery, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
  125. foreach ( $tokenArray as $token )
  126. {
  127. if ( isset( $map[strtolower( $token )] ) )
  128. {
  129. $tokens[] = new ezcSearchQueryToken( $map[strtolower( $token )], $token );
  130. }
  131. else
  132. {
  133. $tokens[] = new ezcSearchQueryToken( ezcSearchQueryToken::STRING, $token );
  134. }
  135. }
  136. return $tokens;
  137. }
  138. /**
  139. * Applies the current prefix to the clause in $string
  140. *
  141. * @param ezcSearchQuery $q
  142. * @param string $string
  143. *
  144. * @return string
  145. */
  146. private function processPrefix( ezcSearchQuery $q, $string )
  147. {
  148. switch ( $this->prefix )
  149. {
  150. case ezcSearchQueryToken::PLUS:
  151. $string = $q->important( $string );
  152. break;
  153. case ezcSearchQueryToken::MINUS:
  154. $string = $q->not( $string );
  155. break;
  156. }
  157. return $string;
  158. }
  159. /**
  160. * Assembles a query part for a search term for the fields passed in $searchFields
  161. *
  162. * If there is only one search field, it just processes the prefix. In case
  163. * there are multiple fields they are joined together with OR, unless the
  164. * whole clause is negated. In that case they're joined by AND.
  165. *
  166. * @param ezcSearchQuery $q
  167. * @param string $term
  168. * @param array(string) $searchFields
  169. *
  170. * @return string
  171. */
  172. private function constructSearchWhereClause( ezcSearchQuery $q, $term, $searchFields )
  173. {
  174. if ( count( $searchFields ) > 1 )
  175. {
  176. $parts = array();
  177. foreach ( $searchFields as $searchField )
  178. {
  179. $parts[] = $this->processPrefix( $q, $q->eq( $searchField, $term ) );
  180. }
  181. if ( $this->prefix == ezcSearchQueryToken::MINUS )
  182. {
  183. $ret = $q->lAnd( $parts );
  184. }
  185. else
  186. {
  187. $ret = $q->lOr( $parts );
  188. }
  189. }
  190. else
  191. {
  192. $ret = $this->processPrefix( $q, $q->eq( $searchFields[0], $term ) );
  193. }
  194. $this->prefix = null;
  195. return $ret;
  196. }
  197. /**
  198. * Walks over the $tokens and builds the query $q from them and the $searchFields
  199. *
  200. * @param ezcSearchQuery $q
  201. * @param array(ezcSearchQueryToken) $tokens
  202. * @param array(string) $searchFields
  203. *
  204. * @throws ezcSearchBuildQueryException if there is an uneven set of quotes.
  205. */
  206. protected function buildQuery( ezcSearchQuery $q, $tokens, $searchFields )
  207. {
  208. foreach ( $tokens as $token )
  209. {
  210. switch ( $this->state )
  211. {
  212. case 'normal':
  213. switch ( $token->type )
  214. {
  215. case ezcSearchQueryToken::SPACE:
  216. /* ignore */
  217. break;
  218. case ezcSearchQueryToken::STRING:
  219. $this->stack[$this->stackLevel][] = $this->constructSearchWhereClause( $q, $token->token, $searchFields );
  220. break;
  221. case ezcSearchQueryToken::QUOTE:
  222. $this->state = 'in-quotes';
  223. $string = '';
  224. break;
  225. case ezcSearchQueryToken::LOGICAL_OR:
  226. if ( $this->stackType[$this->stackLevel] === 'and' )
  227. {
  228. throw new ezcSearchBuildQueryException( 'You can not mix AND and OR without using "(" and ")".' );
  229. }
  230. else
  231. {
  232. $this->stackType[$this->stackLevel] = 'or';
  233. }
  234. break;
  235. case ezcSearchQueryToken::LOGICAL_AND:
  236. if ( $this->stackType[$this->stackLevel] === 'or' )
  237. {
  238. throw new ezcSearchBuildQueryException( 'You can not mix OR and AND without using "(" and ")".' );
  239. }
  240. else
  241. {
  242. $this->stackType[$this->stackLevel] = 'and';
  243. }
  244. break;
  245. case ezcSearchQueryToken::BRACE_OPEN:
  246. $this->stackLevel++;
  247. $this->stackType[$this->stackLevel] = 'default';
  248. break;
  249. case ezcSearchQueryToken::BRACE_CLOSE:
  250. $this->stackLevel--;
  251. if ( $this->stackType[$this->stackLevel + 1] == 'and' || $this->stackType[$this->stackLevel + 1] == 'default' )
  252. {
  253. $this->stack[$this->stackLevel][] = $q->lAnd( $this->stack[$this->stackLevel + 1] );
  254. }
  255. else
  256. {
  257. $this->stack[$this->stackLevel][] = $q->lOr( $this->stack[$this->stackLevel + 1] );
  258. }
  259. break;
  260. case ezcSearchQueryToken::PLUS:
  261. case ezcSearchQueryToken::MINUS:
  262. $this->prefix = $token->type;
  263. break;
  264. }
  265. break;
  266. case 'in-quotes':
  267. switch ( $token->type )
  268. {
  269. case ezcSearchQueryToken::QUOTE:
  270. $this->stack[$this->stackLevel][] = $this->constructSearchWhereClause( $q, $string, $searchFields );
  271. $this->state = 'normal';
  272. break;
  273. case ezcSearchQueryToken::STRING:
  274. case ezcSearchQueryToken::COLON:
  275. case ezcSearchQueryToken::SPACE:
  276. case ezcSearchQueryToken::LOGICAL_AND:
  277. case ezcSearchQueryToken::LOGICAL_OR:
  278. case ezcSearchQueryToken::PLUS:
  279. case ezcSearchQueryToken::MINUS:
  280. case ezcSearchQueryToken::BRACE_OPEN:
  281. case ezcSearchQueryToken::BRACE_CLOSE:
  282. $string .= $token->token;
  283. break;
  284. }
  285. break;
  286. }
  287. }
  288. if ( $this->state == 'in-quotes' )
  289. {
  290. throw new ezcSearchBuildQueryException( 'Unterminated quotes in query string.' );
  291. }
  292. }
  293. }
  294. ?>