PageRenderTime 40ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/library/Solarium/Core/Query/Helper.php

http://github.com/basdenooijer/solarium
PHP | 506 lines | 192 code | 44 blank | 270 comment | 25 complexity | debe05c4d75e7493464f2ca0e1a01927 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * Copyright 2011 Bas de Nooijer. All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice,
  9. * this list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this listof conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  19. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25. * POSSIBILITY OF SUCH DAMAGE.
  26. *
  27. * The views and conclusions contained in the software and documentation are
  28. * those of the authors and should not be interpreted as representing official
  29. * policies, either expressed or implied, of the copyright holder.
  30. *
  31. * @copyright Copyright 2011 Bas de Nooijer <solarium@raspberry.nl>
  32. * @license http://github.com/basdenooijer/solarium/raw/master/COPYING
  33. * @link http://www.solarium-project.org/
  34. */
  35. /**
  36. * @namespace
  37. */
  38. namespace Solarium\Core\Query;
  39. use Solarium\Exception\InvalidArgumentException;
  40. /**
  41. * Query helper
  42. *
  43. * Generates small snippets for use in queries, filterqueries and sorting
  44. */
  45. class Helper
  46. {
  47. /**
  48. * Placeholder pattern for use in the assemble method
  49. *
  50. * @var string
  51. */
  52. protected $placeHolderPattern = '/%(L|P|T|)([0-9]+)%/i';
  53. /**
  54. * Array of parts to use for assembling a query string
  55. *
  56. * @var array
  57. */
  58. protected $assembleParts;
  59. /**
  60. * Counter to keep dereferenced params unique (within a single query instance)
  61. *
  62. * @var int
  63. */
  64. protected $derefencedParamsLastKey = 0;
  65. /**
  66. * Solarium Query instance, optional.
  67. * Used for dereferenced params.
  68. *
  69. * @var Query
  70. */
  71. protected $query;
  72. /**
  73. * Constructor
  74. *
  75. * @param Query $query
  76. */
  77. public function __construct($query = null)
  78. {
  79. $this->query = $query;
  80. }
  81. /**
  82. * Escape a term
  83. *
  84. * A term is a single word.
  85. * All characters that have a special meaning in a Solr query are escaped.
  86. *
  87. * If you want to use the input as a phrase please use the {@link phrase()}
  88. * method, because a phrase requires much less escaping.\
  89. *
  90. * @link http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
  91. *
  92. * @param string $input
  93. * @return string
  94. */
  95. public function escapeTerm($input)
  96. {
  97. $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\/|\\\)/';
  98. return preg_replace($pattern, '\\\$1', $input);
  99. }
  100. /**
  101. * Escape a phrase
  102. *
  103. * A phrase is a group of words.
  104. * Special characters will be escaped and the phrase will be surrounded by
  105. * double quotes to group the input into a single phrase. So don't put
  106. * quotes around the input.
  107. *
  108. * Do mind that you cannot build a complete query first and then pass it to
  109. * this method, the whole query will be escaped. You need to escape only the
  110. * 'content' of your query.
  111. *
  112. * @param string $input
  113. * @return string
  114. */
  115. public function escapePhrase($input)
  116. {
  117. return '"' . preg_replace('/("|\\\)/', '\\\$1', $input) . '"';
  118. }
  119. /**
  120. * Format a date to the expected formatting used in SOLR
  121. *
  122. * This format was derived to be standards compliant (ISO 8601)
  123. * A date field shall be of the form 1995-12-31T23:59:59Z The trailing "Z" designates UTC time and is mandatory
  124. *
  125. * @see http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
  126. *
  127. * @param int|string|\DateTime $input accepted formats: timestamp, date string or DateTime
  128. * @return string|boolean false is returned in case of invalid input
  129. */
  130. public function formatDate($input)
  131. {
  132. switch (true) {
  133. // input of datetime object
  134. case $input instanceof \DateTime:
  135. // no work needed
  136. break;
  137. // input of timestamp or date/time string
  138. case is_string($input) || is_numeric($input):
  139. // if date/time string: convert to timestamp first
  140. if (is_string($input)) {
  141. $input = strtotime($input);
  142. }
  143. // now try converting the timestamp to a datetime instance, on failure return false
  144. try {
  145. $input = new \DateTime('@' . $input);
  146. } catch (\Exception $e) {
  147. $input = false;
  148. }
  149. break;
  150. // any other input formats can be added in additional cases here...
  151. // case $input instanceof Zend_Date:
  152. // unsupported input format
  153. default:
  154. $input = false;
  155. break;
  156. }
  157. // handle the filtered input
  158. if ($input) {
  159. // when we get here the input is always a datetime object
  160. $input->setTimezone(new \DateTimeZone('UTC'));
  161. $iso8601 = $input->format(\DateTime::ISO8601);
  162. $iso8601 = strstr($iso8601, '+', true); //strip timezone
  163. $iso8601 .= 'Z';
  164. return $iso8601;
  165. } else {
  166. // unsupported input
  167. return false;
  168. }
  169. }
  170. /**
  171. * Render a range query
  172. *
  173. * From and to can be any type of data. For instance int, string or point.
  174. * If they are null, then '*' will be used.
  175. *
  176. * Example: rangeQuery('store', '45,-94', '46,-93')
  177. * Returns: store:[45,-94 TO 46,-93]
  178. *
  179. * Example: rangeQuery('store', '5', '*', false)
  180. * Returns: store:{5 TO *}
  181. *
  182. * @param string $field
  183. * @param string $from
  184. * @param string $to
  185. * @param boolean $inclusive
  186. * @return string
  187. */
  188. public function rangeQuery($field, $from, $to, $inclusive = true)
  189. {
  190. if ($from === null) {
  191. $from = '*';
  192. }
  193. if ($to === null) {
  194. $to = '*';
  195. }
  196. if ($inclusive) {
  197. return $field . ':[' . $from . ' TO ' . $to . ']';
  198. } else {
  199. return $field . ':{' . $from . ' TO ' . $to . '}';
  200. }
  201. }
  202. /**
  203. * Render a geofilt (distance) filter
  204. *
  205. * Find all entries within the distance of a certain point.
  206. *
  207. * @param $field
  208. * @param $pointX
  209. * @param $pointY
  210. * @param $distance
  211. * @param boolean $dereferenced
  212. * @return string
  213. */
  214. public function geofilt($field, $pointX, $pointY, $distance, $dereferenced = false)
  215. {
  216. return $this->qparser(
  217. 'geofilt',
  218. array(
  219. 'pt' => $pointX.','.$pointY,
  220. 'sfield' => $field,
  221. 'd' => $distance
  222. ),
  223. $dereferenced
  224. );
  225. }
  226. /**
  227. * Render a bbox (boundingbox) filter
  228. *
  229. * Exact distance calculations can be somewhat expensive and it can often
  230. * make sense to use a quick approximation instead. The bbox filter is
  231. * guaranteed to encompass all of the points of interest, but it may also
  232. * include other points that are slightly outside of the required distance.
  233. *
  234. * @param string $field
  235. * @param string $pointX
  236. * @param string $pointY
  237. * @param string $distance
  238. * @param boolean $dereferenced
  239. * @return string
  240. */
  241. public function bbox($field, $pointX, $pointY, $distance, $dereferenced = false)
  242. {
  243. return $this->qparser(
  244. 'bbox',
  245. array(
  246. 'pt' => $pointX.','.$pointY,
  247. 'sfield' => $field,
  248. 'd' => $distance
  249. ),
  250. $dereferenced
  251. );
  252. }
  253. /**
  254. * Render a geodist function call
  255. *
  256. * geodist is a function query that yields the calculated distance.
  257. * This gives the flexibility to do a number of interesting things,
  258. * such as sorting by the distance (Solr can sort by any function query),
  259. * or combining the distance with the relevancy score,
  260. * such as boosting by the inverse of the distance.
  261. *
  262. * @param $field
  263. * @param $pointX
  264. * @param $pointY
  265. * @param boolean $dereferenced
  266. * @return string
  267. */
  268. public function geodist($field, $pointX, $pointY, $dereferenced = false)
  269. {
  270. return $this->functionCall(
  271. 'geodist',
  272. array('sfield' => $field, 'pt' => $pointX.','.$pointY),
  273. $dereferenced
  274. );
  275. }
  276. /**
  277. * Render a qparser plugin call
  278. *
  279. * @throws InvalidArgumentException
  280. * @param string $name
  281. * @param array $params
  282. * @param boolean $dereferenced
  283. * @param boolean $forceKeys
  284. * @return string
  285. */
  286. public function qparser($name, $params = array(), $dereferenced = false, $forceKeys = false)
  287. {
  288. if ($dereferenced) {
  289. if (!$this->query) {
  290. throw new InvalidArgumentException(
  291. 'Dereferenced params can only be used in a Solarium query helper instance retrieved from the query '
  292. . 'by using the getHelper() method, this instance was manually created'
  293. );
  294. }
  295. foreach ($params as $paramKey => $paramValue) {
  296. if (is_int($paramKey) || $forceKeys) {
  297. $this->derefencedParamsLastKey++;
  298. $derefKey = 'deref_' . $this->derefencedParamsLastKey;
  299. } else {
  300. $derefKey = $paramKey;
  301. }
  302. $this->query->addParam($derefKey, $paramValue);
  303. $params[$paramKey] = '$'.$derefKey;
  304. }
  305. }
  306. $output = '{!'.$name;
  307. foreach ($params as $key => $value) {
  308. if (!$dereferenced || $forceKeys || is_int($key)) {
  309. $output .= ' ' . $key . '=' . $value;
  310. }
  311. }
  312. $output .= '}';
  313. return $output;
  314. }
  315. /**
  316. * Render a functionCall
  317. *
  318. * @param string $name
  319. * @param array $params
  320. * @param boolean $dereferenced
  321. * @return string
  322. */
  323. public function functionCall($name, $params = array(), $dereferenced = false)
  324. {
  325. if ($dereferenced) {
  326. foreach ($params as $key => $value) {
  327. $this->query->addParam($key, $value);
  328. }
  329. return $name . '()';
  330. } else {
  331. return $name . '(' . implode($params, ',') . ')';
  332. }
  333. }
  334. /**
  335. * Assemble a querystring with placeholders
  336. *
  337. * These placeholder modes are supported:
  338. * %1% = no mode, will default to literal
  339. * %L2% = literal
  340. * %P3% = phrase-escaped
  341. * %T4% = term-escaped
  342. *
  343. * Numbering starts at 1, so number 1 refers to the first entry
  344. * of $parts (which has array key 0)
  345. * You can use the same part multiple times, even in multiple modes.
  346. * The mode letters are not case sensitive.
  347. *
  348. * The mode matching pattern can be customized by overriding the
  349. * value of $this->placeHolderPattern
  350. *
  351. * @since 2.1.0
  352. *
  353. * @param string $query
  354. * @param array $parts Array of strings
  355. * @return string
  356. */
  357. public function assemble($query, $parts)
  358. {
  359. $this->assembleParts = $parts;
  360. return preg_replace_callback(
  361. $this->placeHolderPattern,
  362. array($this, 'renderPlaceHolder'),
  363. $query
  364. );
  365. }
  366. /**
  367. * Render placeholders in a querystring
  368. *
  369. * @throws InvalidArgumentException
  370. * @param array $matches
  371. * @return string
  372. */
  373. protected function renderPlaceHolder($matches)
  374. {
  375. $partNumber = $matches[2];
  376. $partMode = strtoupper($matches[1]);
  377. if (isset($this->assembleParts[$partNumber-1])) {
  378. $value = $this->assembleParts[$partNumber-1];
  379. } else {
  380. throw new InvalidArgumentException('No value supplied for part #' . $partNumber . ' in query assembler');
  381. }
  382. switch ($partMode) {
  383. case 'P':
  384. $value = $this->escapePhrase($value);
  385. break;
  386. case 'T':
  387. $value = $this->escapeTerm($value);
  388. break;
  389. }
  390. return $value;
  391. }
  392. /**
  393. * Render join localparams syntax
  394. *
  395. * @see http://wiki.apache.org/solr/Join
  396. * @since 2.4.0
  397. *
  398. * @param string $from
  399. * @param string $to
  400. * @param boolean $dereferenced
  401. * @return string
  402. */
  403. public function join($from, $to, $dereferenced = false)
  404. {
  405. return $this->qparser('join', array('from' => $from, 'to' => $to), $dereferenced, $dereferenced);
  406. }
  407. /**
  408. * Render term query
  409. *
  410. * Useful for avoiding query parser escaping madness when drilling into facets via fq parameters, example:
  411. * {!term f=weight}1.5
  412. *
  413. * This is a Solr 3.2+ feature.
  414. *
  415. * @see http://wiki.apache.org/solr/SolrQuerySyntax#Other_built-in_useful_query_parsers
  416. *
  417. * @param string $field
  418. * @param float $weight
  419. * @return string
  420. */
  421. public function qparserTerm($field, $weight)
  422. {
  423. return $this->qparser('term', array('f' => $field)) . $weight;
  424. }
  425. /**
  426. * Render cache control param for use in filterquery
  427. *
  428. * This is a Solr 3.4+ feature.
  429. *
  430. * @see http://wiki.apache.org/solr/CommonQueryParameters#Caching_of_filters
  431. *
  432. * @param boolean $useCache
  433. * @param float|null $cost
  434. * @return string
  435. */
  436. public function cacheControl($useCache, $cost = null)
  437. {
  438. if ($useCache === true) {
  439. $cache = 'true';
  440. } else {
  441. $cache = 'false';
  442. }
  443. $result = '{!cache='.$cache;
  444. if (null !== $cost) {
  445. $result .= ' cost='.$cost;
  446. }
  447. $result .= '}';
  448. return $result;
  449. }
  450. /**
  451. * Filters control characters that cause issues with servlet containers.
  452. *
  453. * Mainly useful to filter data before adding it to a document for the update query.
  454. *
  455. * @param $data
  456. * @return mixed
  457. */
  458. public function filterControlCharacters($data)
  459. {
  460. return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $data);
  461. }
  462. }