PageRenderTime 54ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/branch/2.0.2/solr/Apache/Solr/Service.php

http://records-authority.googlecode.com/
PHP | 884 lines | 363 code | 120 blank | 401 comment | 27 complexity | 9d2fec16324bc13966ba0dd49b49b7b9 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
  1. <?php
  2. /**
  3. * @copyright Copyright 2007 Conduit Internet Technologies, Inc. (http://conduit-it.com)
  4. * @license Apache Licence, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. *
  18. * @package Apache
  19. * @subpackage Solr
  20. * @author Donovan Jimenez <djimenez@conduit-it.com>
  21. */
  22. require_once('Apache/Solr/Document.php');
  23. require_once('Apache/Solr/Response.php');
  24. /**
  25. * Starting point for the Solr API. Represents a Solr server resource and has
  26. * methods for pinging, adding, deleting, committing, optimizing and searching.
  27. *
  28. * Example Usage:
  29. * <code>
  30. * ...
  31. * $solr = new Apache_Solr_Service(); //or explicitly new Apache_Solr_Service('localhost', 8180, '/solr')
  32. *
  33. * if ($solr->ping())
  34. * {
  35. * $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :)
  36. *
  37. * $document = new Apache_Solr_Document();
  38. * $document->id = uniqid(); //or something else suitably unique
  39. *
  40. * $document->title = 'Some Title';
  41. * $document->content = 'Some content for this wonderful document. Blah blah blah.';
  42. *
  43. * $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments
  44. * //with an array of documents is faster
  45. *
  46. * $solr->commit(); //commit to see the deletes and the document
  47. * $solr->optimize(); //merges multiple segments into one
  48. *
  49. * //and the one we all care about, search!
  50. * //any other common or custom parameters to the request handler can go in the
  51. * //optional 4th array argument.
  52. * $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc'));
  53. * }
  54. * ...
  55. * </code>
  56. *
  57. * @todo Investigate using other HTTP clients other than file_get_contents built-in handler. Could provide performance
  58. * improvements when dealing with multiple requests by using HTTP's keep alive functionality
  59. */
  60. class Apache_Solr_Service
  61. {
  62. /**
  63. * Response version we support
  64. */
  65. const SOLR_VERSION = '1.2';
  66. /**
  67. * Response writer we support
  68. *
  69. * @todo Solr 1.3 release may change this to SerializedPHP or PHP implementation
  70. */
  71. const SOLR_WRITER = 'json';
  72. /**
  73. * NamedList Treatment constants
  74. */
  75. const NAMED_LIST_FLAT = 'flat';
  76. const NAMED_LIST_MAP = 'map';
  77. /**
  78. * Servlet mappings
  79. */
  80. const PING_SERVLET = 'admin/ping';
  81. const UPDATE_SERVLET = 'update';
  82. const SEARCH_SERVLET = 'select';
  83. const THREADS_SERVLET = 'admin/threads';
  84. /**
  85. * Server identification strings
  86. *
  87. * @var string
  88. */
  89. protected $_host, $_port, $_path;
  90. /**
  91. * Whether {@link Apache_Solr_Response} objects should create {@link Apache_Solr_Document}s in
  92. * the returned parsed data
  93. *
  94. * @var boolean
  95. */
  96. protected $_createDocuments = true;
  97. /**
  98. * Whether {@link Apache_Solr_Response} objects should have multivalue fields with only a single value
  99. * collapsed to appear as a single value would.
  100. *
  101. * @var boolean
  102. */
  103. protected $_collapseSingleValueArrays = true;
  104. /**
  105. * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values
  106. * are {@link Apache_Solr_Service::NAMED_LIST_MAP} (default) or {@link Apache_Solr_Service::NAMED_LIST_FLAT}.
  107. *
  108. * @var string
  109. */
  110. protected $_namedListTreatment = self::NAMED_LIST_MAP;
  111. /**
  112. * Query delimiters. Someone might want to be able to change
  113. * these (to use &amp; instead of & for example), so I've provided them.
  114. *
  115. * @var string
  116. */
  117. protected $_queryDelimiter = '?', $_queryStringDelimiter = '&';
  118. /**
  119. * Constructed servlet full path URLs
  120. *
  121. * @var string
  122. */
  123. protected $_updateUrl, $_searchUrl, $_threadsUrl;
  124. /**
  125. * Keep track of whether our URLs have been constructed
  126. *
  127. * @var boolean
  128. */
  129. protected $_urlsInited = false;
  130. /**
  131. * Stream context for posting
  132. *
  133. * @var resource
  134. */
  135. protected $_postContext;
  136. /**
  137. * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc.
  138. *
  139. * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead
  140. *
  141. * @param string $value
  142. * @return string
  143. */
  144. static public function escape($value)
  145. {
  146. //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
  147. $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/';
  148. $replace = '\\\$1';
  149. return preg_replace($pattern, $replace, $value);
  150. }
  151. /**
  152. * Escape a value meant to be contained in a phrase for special query characters
  153. *
  154. * @param string $value
  155. * @return string
  156. */
  157. static public function escapePhrase($value)
  158. {
  159. $pattern = '/("|\\\)/';
  160. $replace = '\\\$1';
  161. return preg_replace($pattern, $replace, $value);
  162. }
  163. /**
  164. * Convenience function for creating phrase syntax from a value
  165. *
  166. * @param string $value
  167. * @return string
  168. */
  169. static public function phrase($value)
  170. {
  171. return '"' . self::escapePhrase($value) . '"';
  172. }
  173. /**
  174. * Constructor. All parameters are optional and will take on default values
  175. * if not specified.
  176. *
  177. * @param string $host
  178. * @param string $port
  179. * @param string $path
  180. */
  181. public function __construct($host = 'localhost', $port = 8180, $path = '/solr/')
  182. {
  183. $this->setHost($host);
  184. $this->setPort($port);
  185. $this->setPath($path);
  186. $this->_initUrls();
  187. //set up the stream context for posting with file_get_contents
  188. $contextOpts = array(
  189. 'http' => array(
  190. 'method' => 'POST',
  191. 'header' => "Content-Type: text/xml; charset=UTF-8\r\n" //php.net example showed \r\n at the end
  192. )
  193. );
  194. $this->_postContext = stream_context_create($contextOpts);
  195. }
  196. /**
  197. * Return a valid http URL given this server's host, port and path and a provided servlet name
  198. *
  199. * @param string $servlet
  200. * @return string
  201. */
  202. protected function _constructUrl($servlet, $params = array())
  203. {
  204. if (count($params))
  205. {
  206. //escape all parameters appropriately for inclusion in the query string
  207. $escapedParams = array();
  208. foreach ($params as $key => $value)
  209. {
  210. $escapedParams[] = urlencode($key) . '=' . urlencode($value);
  211. }
  212. $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams);
  213. }
  214. else
  215. {
  216. $queryString = '';
  217. }
  218. return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString;
  219. }
  220. /**
  221. * Construct the Full URLs for the three servlets we reference
  222. */
  223. protected function _initUrls()
  224. {
  225. //Initialize our full servlet URLs now that we have server information
  226. $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER ));
  227. $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET);
  228. $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER ));
  229. $this->_urlsInited = true;
  230. }
  231. /**
  232. * Central method for making a get operation against this Solr Server
  233. *
  234. * @param string $url
  235. * @param float $timeout Read timeout in seconds
  236. * @return Apache_Solr_Response
  237. *
  238. * @todo implement timeout ability
  239. * @throws Exception If a non 200 response status is returned
  240. */
  241. protected function _sendRawGet($url, $timeout = FALSE)
  242. {
  243. //$http_response_header is set by file_get_contents
  244. $response = new Apache_Solr_Response(@file_get_contents($url), $http_response_header, $this->_createDocuments, $this->_collapseSingleValueArrays);
  245. if ($response->getHttpStatus() != 200)
  246. {
  247. throw new Exception('"' . $response->getHttpStatus() . '" Status: ' . $response->getHttpStatusMessage(), $response->getHttpStatus());
  248. }
  249. return $response;
  250. }
  251. /**
  252. * Central method for making a post operation against this Solr Server
  253. *
  254. * @param string $url
  255. * @param string $rawPost
  256. * @param float $timeout Read timeout in seconds
  257. * @param string $contentType
  258. * @return Apache_Solr_Response
  259. *
  260. * @throws Exception If a non 200 response status is returned
  261. */
  262. protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8')
  263. {
  264. //ensure content type is correct
  265. stream_context_set_option($this->_postContext, 'http', 'header', 'Content-Type: ' . $contentType);
  266. //set the read timeout if specified
  267. if ($timeout !== FALSE)
  268. {
  269. stream_context_set_option($this->_postContext, 'http', 'timeout', $timeout);
  270. }
  271. //set the content
  272. stream_context_set_option($this->_postContext, 'http', 'content', $rawPost);
  273. //$http_response_header is set by file_get_contents
  274. $response = new Apache_Solr_Response(@file_get_contents($url, false, $this->_postContext), $http_response_header, $this->_createDocuments, $this->_collapseSingleValueArrays);
  275. if ($response->getHttpStatus() != 200)
  276. {
  277. throw new Exception('"' . $response->getHttpStatus() . '" Status: ' . $response->getHttpStatusMessage(), $response->getHttpStatus());
  278. }
  279. return $response;
  280. }
  281. /**
  282. * Returns the set host
  283. *
  284. * @return string
  285. */
  286. public function getHost()
  287. {
  288. return $this->_host;
  289. }
  290. /**
  291. * Set the host used. If empty will fallback to constants
  292. *
  293. * @param string $host
  294. */
  295. public function setHost($host)
  296. {
  297. //Use the provided host or use the default
  298. if (empty($host))
  299. {
  300. throw new Exception('Host parameter is empty');
  301. }
  302. else
  303. {
  304. $this->_host = $host;
  305. }
  306. if ($this->_urlsInited)
  307. {
  308. $this->_initUrls();
  309. }
  310. }
  311. /**
  312. * Get the set port
  313. *
  314. * @return integer
  315. */
  316. public function getPort()
  317. {
  318. return $this->_port;
  319. }
  320. /**
  321. * Set the port used. If empty will fallback to constants
  322. *
  323. * @param integer $port
  324. */
  325. public function setPort($port)
  326. {
  327. //Use the provided port or use the default
  328. $port = (int) $port;
  329. if ($port <= 0)
  330. {
  331. throw new Exception('Port is not a valid port number');
  332. }
  333. else
  334. {
  335. $this->_port = $port;
  336. }
  337. if ($this->_urlsInited)
  338. {
  339. $this->_initUrls();
  340. }
  341. }
  342. /**
  343. * Get the set path.
  344. *
  345. * @return string
  346. */
  347. public function getPath()
  348. {
  349. return $this->_path;
  350. }
  351. /**
  352. * Set the path used. If empty will fallback to constants
  353. *
  354. * @param string $path
  355. */
  356. public function setPath($path)
  357. {
  358. $path = trim($path, '/');
  359. $this->_path = '/' . $path . '/';
  360. if ($this->_urlsInited)
  361. {
  362. $this->_initUrls();
  363. }
  364. }
  365. /**
  366. * Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will
  367. * parse the response and create {@link Apache_Solr_Document} instances in place.
  368. *
  369. * @param unknown_type $createDocuments
  370. */
  371. public function setCreateDocuments($createDocuments)
  372. {
  373. $this->_createDocuments = (bool) $createDocuments;
  374. }
  375. /**
  376. * Get the current state of teh create documents flag.
  377. *
  378. * @return boolean
  379. */
  380. public function getCreateDocuments()
  381. {
  382. return $this->_createDocuments;
  383. }
  384. /**
  385. * Set the collapse single value arrays flag.
  386. *
  387. * @param boolean $collapseSingleValueArrays
  388. */
  389. public function setCollapseSingleValueArrays($collapseSingleValueArrays)
  390. {
  391. $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays;
  392. }
  393. /**
  394. * Get the current state of the collapse single value arrays flag.
  395. *
  396. * @return boolean
  397. */
  398. public function getCollapseSingleValueArrays()
  399. {
  400. return $this->_collapseSingleValueArrays;
  401. }
  402. /**
  403. * Set how NamedLists should be formatted in the response data. This mainly effects
  404. * the facet counts format.
  405. *
  406. * @param string $namedListTreatment
  407. * @throws Exception If invalid option is set
  408. */
  409. public function setNamedListTreatmet($namedListTreatment)
  410. {
  411. switch ((string) $namedListTreatment)
  412. {
  413. case Apache_Solr_Service::NAMED_LIST_FLAT:
  414. $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_FLAT;
  415. break;
  416. case Apache_Solr_Service::NAMED_LIST_MAP:
  417. $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_MAP;
  418. break;
  419. default:
  420. throw new Exception('Not a valid named list treatement option');
  421. }
  422. }
  423. /**
  424. * Get the current setting for named list treatment.
  425. *
  426. * @return string
  427. */
  428. public function getNamedListTreatment()
  429. {
  430. return $this->_namedListTreatment;
  431. }
  432. /**
  433. * Set the string used to separate the path form the query string.
  434. * Defaulted to '?'
  435. *
  436. * @param string $queryDelimiter
  437. */
  438. public function setQueryDelimiter($queryDelimiter)
  439. {
  440. $this->_queryDelimiter = $queryDelimiter;
  441. }
  442. /**
  443. * Set the string used to separate the parameters in thequery string
  444. * Defaulted to '&'
  445. *
  446. * @param string $queryStringDelimiter
  447. */
  448. public function setQueryStringDelimiter($queryStringDelimiter)
  449. {
  450. $this->_queryStringDelimiter = $queryStringDelimiter;
  451. }
  452. /**
  453. * Call the /admin/ping servlet, can be used to quickly tell if a connection to the
  454. * server is able to be made.
  455. *
  456. * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2)
  457. * @return float Actual time taken to ping the server, FALSE if timeout occurs
  458. */
  459. public function ping($timeout = 2)
  460. {
  461. $timeout = (float) $timeout;
  462. if ($timeout <= 0)
  463. {
  464. $timeout = -1;
  465. }
  466. $start = microtime(true);
  467. //to prevent strict errors
  468. $errno = 0;
  469. $errstr = '';
  470. //try to connect to the host with timeout
  471. $fp = fsockopen($this->_host, $this->_port, $errno, $errstr, $timeout);
  472. if ($fp)
  473. {
  474. //If we have a timeout set, then determine the amount of time we have left
  475. //in the request and set the stream timeout for the write operation
  476. if ($timeout > 0)
  477. {
  478. //do the calculation
  479. $writeTimeout = $timeout - (microtime(true) - $start);
  480. //check if we're out of time
  481. if ($writeTimeout <= 0)
  482. {
  483. fclose($fp);
  484. return false;
  485. }
  486. //convert to microseconds and set the stream timeout
  487. $writeTimeoutInMicroseconds = (int) $writeTimeout * 1000000;
  488. stream_set_timeout($fp, 0, $writeTimeoutInMicroseconds);
  489. }
  490. $request = 'HEAD ' . $this->_path . self::PING_SERVLET . ' HTTP/1.1' . "\r\n" .
  491. 'host: ' . $this->_host . "\r\n" .
  492. 'Connection: close' . "\r\n" .
  493. "\r\n";
  494. fwrite($fp, $request);
  495. //check the stream meta data to see if we timed out during the operation
  496. $metaData = stream_get_meta_data($fp);
  497. if (isset($metaData['timeout']) && $metaData['timeout'])
  498. {
  499. fclose($fp);
  500. return false;
  501. }
  502. //if we have a timeout set and have made it this far, determine the amount of time
  503. //still remaining and set the timeout appropriately before the read operation
  504. if ($timeout > 0)
  505. {
  506. //do the calculation
  507. $readTimeout = $timeout - (microtime(true) - $start);
  508. //check if we've run out of time
  509. if ($readTimeout <= 0)
  510. {
  511. fclose($fp);
  512. return false;
  513. }
  514. //convert to microseconds and set the stream timeout
  515. $readTimeoutInMicroseconds = $readTimeout * 1000000;
  516. stream_set_timeout($fp, 0, $readTimeoutInMicroseconds);
  517. }
  518. //at the very least we should get a response header line of
  519. //HTTP/1.1 200 OK
  520. $response = fread($fp, 15);
  521. //check the stream meta data to see if we timed out during the operation
  522. $metaData = stream_get_meta_data($fp);
  523. fclose($fp); //we're done with the connection - ignore the rest
  524. if (isset($metaData['timeout']) && $metaData['timeout'])
  525. {
  526. return false;
  527. }
  528. //finally, check the response header line
  529. if ($response != 'HTTP/1.1 200 OK')
  530. {
  531. return false;
  532. }
  533. //we made it, return the approximate ping time
  534. return microtime(true) - $start;
  535. }
  536. //we weren't able to make a connection
  537. return false;
  538. }
  539. /**
  540. * Call the /admin/threads servlet and retrieve information about all threads in the
  541. * Solr servlet's thread group. Useful for diagnostics.
  542. *
  543. * @return Apache_Solr_Response
  544. *
  545. * @throws Exception If an error occurs during the service call
  546. */
  547. public function threads()
  548. {
  549. return $this->_sendRawGet($this->_threadsUrl);
  550. }
  551. /**
  552. * Raw Add Method. Takes a raw post body and sends it to the update service. Post body
  553. * should be a complete and well formed "add" xml document.
  554. *
  555. * @param string $rawPost
  556. * @return Apache_Solr_Response
  557. *
  558. * @throws Exception If an error occurs during the service call
  559. */
  560. public function add($rawPost)
  561. {
  562. return $this->_sendRawPost($this->_updateUrl, $rawPost);
  563. }
  564. /**
  565. * Add a Solr Document to the index
  566. *
  567. * @param Apache_Solr_Document $document
  568. * @param boolean $allowDups
  569. * @param boolean $overwritePending
  570. * @param boolean $overwriteCommitted
  571. * @return Apache_Solr_Response
  572. *
  573. * @throws Exception If an error occurs during the service call
  574. */
  575. public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true)
  576. {
  577. $dupValue = $allowDups ? 'true' : 'false';
  578. $pendingValue = $overwritePending ? 'true' : 'false';
  579. $committedValue = $overwriteCommitted ? 'true' : 'false';
  580. $rawPost = '<add allowDups="' . $dupValue . '" overwritePending="' . $pendingValue . '" overwriteCommitted="' . $committedValue . '">';
  581. $rawPost .= $this->_documentToXmlFragment($document);
  582. $rawPost .= '</add>';
  583. return $this->add($rawPost);
  584. }
  585. /**
  586. * Add an array of Solr Documents to the index all at once
  587. *
  588. * @param array $documents Should be an array of Apache_Solr_Document instances
  589. * @param boolean $allowDups
  590. * @param boolean $overwritePending
  591. * @param boolean $overwriteCommitted
  592. * @return Apache_Solr_Response
  593. *
  594. * @throws Exception If an error occurs during the service call
  595. */
  596. public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true)
  597. {
  598. $dupValue = $allowDups ? 'true' : 'false';
  599. $pendingValue = $overwritePending ? 'true' : 'false';
  600. $committedValue = $overwriteCommitted ? 'true' : 'false';
  601. $rawPost = '<add allowDups="' . $dupValue . '" overwritePending="' . $pendingValue . '" overwriteCommitted="' . $committedValue . '">';
  602. foreach ($documents as $document)
  603. {
  604. if ($document instanceof Apache_Solr_Document)
  605. {
  606. $rawPost .= $this->_documentToXmlFragment($document);
  607. }
  608. }
  609. $rawPost .= '</add>';
  610. return $this->add($rawPost);
  611. }
  612. /**
  613. * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call
  614. *
  615. * @return string
  616. */
  617. protected function _documentToXmlFragment(Apache_Solr_Document $document)
  618. {
  619. $xml = '<doc>';
  620. foreach ($document as $key => $value)
  621. {
  622. $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8');
  623. if (is_array($value))
  624. {
  625. foreach ($value as $multivalue)
  626. {
  627. $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8');
  628. $xml .= '<field name="' . $key . '">' . $multivalue . '</field>';
  629. }
  630. }
  631. else
  632. {
  633. $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8');
  634. $xml .= '<field name="' . $key . '">' . $value . '</field>';
  635. }
  636. }
  637. $xml .= '</doc>';
  638. return $xml;
  639. }
  640. /**
  641. * Send a commit command. Will be synchronous unless both wait parameters are set to false.
  642. *
  643. * @param boolean $optimize Defaults to true
  644. * @param boolean $waitFlush Defaults to true
  645. * @param boolean $waitSearcher Defaults to true
  646. * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour
  647. * @return Apache_Solr_Response
  648. *
  649. * @throws Exception If an error occurs during the service call
  650. */
  651. public function commit($optimize = true, $waitFlush = true, $waitSearcher = true, $timeout = 3600)
  652. {
  653. $optimizeValue = $optimize ? 'true' : 'false';
  654. $flushValue = $waitFlush ? 'true' : 'false';
  655. $searcherValue = $waitSearcher ? 'true' : 'false';
  656. $rawPost = '<commit optimize="' . $optimizeValue . '" waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />';
  657. return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
  658. }
  659. /**
  660. * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be
  661. * a complete and well formed "delete" xml document
  662. *
  663. * @param string $rawPost Expected to be utf-8 encoded xml document
  664. * @return Apache_Solr_Response
  665. *
  666. * @throws Exception If an error occurs during the service call
  667. */
  668. public function delete($rawPost)
  669. {
  670. return $this->_sendRawPost($this->_updateUrl, $rawPost);
  671. }
  672. /**
  673. * Create a delete document based on document ID
  674. *
  675. * @param string $id Expected to be utf-8 encoded
  676. * @param boolean $fromPending
  677. * @param boolean $fromCommitted
  678. * @return Apache_Solr_Response
  679. *
  680. * @throws Exception If an error occurs during the service call
  681. */
  682. public function deleteById($id, $fromPending = true, $fromCommitted = true)
  683. {
  684. $pendingValue = $fromPending ? 'true' : 'false';
  685. $committedValue = $fromCommitted ? 'true' : 'false';
  686. //escape special xml characters
  687. $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
  688. $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><id>' . $id . '</id></delete>';
  689. return $this->delete($rawPost);
  690. }
  691. /**
  692. * Create a delete document based on a query and submit it
  693. *
  694. * @param string $rawQuery Expected to be utf-8 encoded
  695. * @param boolean $fromPending
  696. * @param boolean $fromCommitted
  697. * @return Apache_Solr_Response
  698. *
  699. * @throws Exception If an error occurs during the service call
  700. */
  701. public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true)
  702. {
  703. $pendingValue = $fromPending ? 'true' : 'false';
  704. $committedValue = $fromCommitted ? 'true' : 'false';
  705. // escape special xml characters
  706. $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8');
  707. $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><query>' . $rawQuery . '</query></delete>';
  708. return $this->delete($rawPost);
  709. }
  710. /**
  711. * Send an optimize command. Will be synchronous unless both wait parameters are set
  712. * to false.
  713. *
  714. * @param boolean $waitFlush
  715. * @param boolean $waitSearcher
  716. * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
  717. * @return Apache_Solr_Response
  718. *
  719. * @throws Exception If an error occurs during the service call
  720. */
  721. public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600)
  722. {
  723. $flushValue = $waitFlush ? 'true' : 'false';
  724. $searcherValue = $waitSearcher ? 'true' : 'false';
  725. $rawPost = '<optimize waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />';
  726. return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
  727. }
  728. /**
  729. * Simple Search interface
  730. *
  731. * @param string $query The raw query string
  732. * @param int $offset The starting offset for result documents
  733. * @param int $limit The maximum number of result documents to return
  734. * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
  735. * @return Apache_Solr_Response
  736. *
  737. * @throws Exception If an error occurs during the service call
  738. */
  739. public function search($query, $offset = 0, $limit = 10, $params = array())
  740. {
  741. if (!is_array($params))
  742. {
  743. $params = array();
  744. }
  745. // construct our full parameters
  746. // sending the version is important in case the format changes
  747. $params['version'] = self::SOLR_VERSION;
  748. // common parameters in this interface
  749. $params['wt'] = self::SOLR_WRITER;
  750. $params['json.nl'] = $this->_namedListTreatment;
  751. $params['q'] = $query;
  752. $params['start'] = $offset;
  753. $params['rows'] = $limit;
  754. // use http_build_query to encode our arguments because its faster
  755. // than urlencoding all the parts ourselves in a loop
  756. $queryString = http_build_query($params, null, $this->_queryStringDelimiter);
  757. // because http_build_query treats arrays differently than we want to, correct the query
  758. // string by changing foo[#]=bar (# being an actual number) parameter strings to just
  759. // multiple foo=bar strings. This regex should always work since '=' will be urlencoded
  760. // anywhere else the regex isn't expecting it
  761. $queryString = preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString);
  762. return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString);
  763. }
  764. }