PageRenderTime 45ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/incubator/library/Zym/Search/Lucene/Index.php

https://github.com/robinsk/zym
PHP | 427 lines | 300 code | 22 blank | 105 comment | 2 complexity | e1f2703c6c6f5bc539de315f94e3f172 MD5 | raw file
  1. <?php
  2. /**
  3. * Zym
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. *
  10. * @author Jurrien Stutterheim
  11. * @category Zym
  12. * @package Zym_Search_Lucene
  13. * @copyright Copyright (c) 2008 Zym. (http://www.zym-project.com/)
  14. * @license http://www.zym-project.com/license New BSD License
  15. */
  16. /**
  17. * @see Zend_Paginator
  18. */
  19. require_once 'Zend/Paginator.php';
  20. /**
  21. * @see Zend_Search_Lucene
  22. */
  23. require_once 'Zend/Search/Lucene.php';
  24. /**
  25. * @see Zend_Search_Lucene_Index_Term
  26. */
  27. require_once 'Zend/Search/Lucene/Index/Term.php';
  28. /**
  29. * @see Zend_Registry
  30. */
  31. require_once 'Zend/Registry.php';
  32. /**
  33. * @author Jurrien Stutterheim
  34. * @category Zym
  35. * @package Zym_Search_Lucene
  36. * @copyright Copyright (c) 2008 Zym. (http://www.zym-project.com/)
  37. * @license http://www.zym-project.com/license New BSD License
  38. */
  39. class Zym_Search_Lucene_Index
  40. {
  41. /**
  42. * Registry key prefix
  43. *
  44. */
  45. const REGISTRY_PREFIX = 'lucene://';
  46. /**
  47. * Default index class
  48. *
  49. * @var string
  50. */
  51. protected static $_defaultIndexClass = 'Zym_Search_Lucene_Index';
  52. /**
  53. * Default path for the search index.
  54. * Usefull when the application has just one search index.
  55. *
  56. * Only used if set.
  57. *
  58. * @var string
  59. */
  60. protected static $_defaultIndexPath = null;
  61. /**
  62. * Default resultset limit
  63. *
  64. * @var int
  65. */
  66. protected static $_defaultResultSetLimit = 0;
  67. /**
  68. * Result cache
  69. *
  70. * @var Zend_Cache_Core
  71. */
  72. protected static $_resultCache = null;
  73. /**
  74. * Default record ID key
  75. *
  76. * @var string
  77. */
  78. public static $defaultIdKey = 'zsl_record_id';
  79. /**
  80. * Record ID key
  81. *
  82. * @var string
  83. */
  84. protected $_idKey = null;
  85. /**
  86. * The search index
  87. *
  88. * @var Zend_Search_Lucene_Interface
  89. */
  90. protected $_searchIndex = null;
  91. /**
  92. * Set the default index class
  93. *
  94. * @param string $path
  95. */
  96. public static function setDefaultIndexClass($class)
  97. {
  98. self::$_defaultIndexClass = $class;
  99. }
  100. /**
  101. * Set the default index path
  102. *
  103. * @param string $path
  104. */
  105. public static function setDefaultIndexPath($path)
  106. {
  107. self::$_defaultIndexPath = $path;
  108. }
  109. /**
  110. * Set the default resultset limit
  111. *
  112. * @param int $limit
  113. */
  114. public static function setDefaultResultSetLimit($limit)
  115. {
  116. self::$_defaultResultSetLimit = (int) $limit;
  117. }
  118. /**
  119. * Set the result cache
  120. *
  121. * @param Zend_Cache_Core $cache
  122. */
  123. public static function setResultCache(Zend_Cache_Core $cache)
  124. {
  125. self::$_resultCache = $cache;
  126. }
  127. /**
  128. * Get a Zend_Search_Lucene instance
  129. *
  130. * @param string $indexPath
  131. * @param array $params
  132. * @return Zym_Search_Lucene_Index
  133. */
  134. public static function factory($indexPath = null, array $params = array())
  135. {
  136. $defaultParams = array('useDefaultPath' => true,
  137. 'createIfNotExists' => true,
  138. 'indexClass' => self::$_defaultIndexClass);
  139. $params = array_merge($defaultParams, $params);
  140. $useDefaultPath = $params['useDefaultPath'];
  141. $createIfNotExists = $params['createIfNotExists'];
  142. $indexClass = $params['indexClass'];
  143. if (!$indexPath && !self::$_defaultIndexPath) {
  144. /**
  145. * @see Zym_Search_Lucene_Exception
  146. */
  147. require_once 'Zym/Search/Lucene/Exception.php';
  148. throw new Zym_Search_Lucene_Exception('No index path specified');
  149. }
  150. $trimMask = '/\\';
  151. rtrim($indexPath, $trimMask);
  152. if ($useDefaultPath) {
  153. $indexPath = rtrim(self::$_defaultIndexPath, $trimMask)
  154. . DIRECTORY_SEPARATOR . ltrim($indexPath, $trimMask);
  155. }
  156. $registryKey = self::REGISTRY_PREFIX . $indexPath;
  157. if (Zend_Registry::isRegistered($registryKey)) {
  158. $index = Zend_Registry::get($registryKey);
  159. } else {
  160. if (file_exists($indexPath)) {
  161. $index = Zend_Search_Lucene::open($indexPath);
  162. } else {
  163. if (!$createIfNotExists) {
  164. /**
  165. * @see Zym_Search_Lucene_Exception
  166. */
  167. require_once 'Zym/Search/Lucene/Exception.php';
  168. throw new Zym_Search_Lucene_Exception('Index "' . $indexPath . '" does not exists');
  169. }
  170. $index = Zend_Search_Lucene::create($indexPath);
  171. }
  172. Zend_Registry::set($registryKey, $index);
  173. }
  174. return new $indexClass($index);
  175. }
  176. /**
  177. * Construct the indexer
  178. *
  179. * @param Zend_Search_Lucene_Interface $index
  180. * @param string $idKey
  181. */
  182. public function __construct(Zend_Search_Lucene_Interface $searchIndex, $idKey = null)
  183. {
  184. if (!$idKey) {
  185. $idKey = self::$defaultIdKey;
  186. }
  187. $this->_searchIndex = $searchIndex;
  188. $this->_idKey = $idKey;
  189. }
  190. /**
  191. * Remove a record from the search index
  192. *
  193. * @param string $value
  194. * @param string $searchField
  195. * @return Zym_Search_Lucene_Index
  196. */
  197. public function delete($value, $searchField = null)
  198. {
  199. if (!$searchField) {
  200. $searchField = $this->_idKey;
  201. }
  202. $documentIds = $this->getDocumentIds($value, $searchField);
  203. foreach ($documentIds as $id) {
  204. $this->_searchIndex->delete($id);
  205. }
  206. return $this;
  207. }
  208. /**
  209. * Get the Lucene document IDs by search the specified search field.
  210. * If no search field is specified, the default ID field is used.
  211. *
  212. * @param string $value
  213. * @param string $searchField
  214. * @return array
  215. */
  216. public function getDocumentIds($value, $searchField = null)
  217. {
  218. if (!$searchField) {
  219. $searchField = $this->_idKey;
  220. }
  221. $term = new Zend_Search_Lucene_Index_Term($value, $searchField);
  222. $docIds = $this->_searchIndex->termDocs($term);
  223. return $docIds;
  224. }
  225. /**
  226. * Get the ID key
  227. */
  228. public function getIdKey()
  229. {
  230. return $this->_idKey;
  231. }
  232. /**
  233. * Set the ID key
  234. *
  235. * @return Zym_Search_Lucene_Index
  236. */
  237. public function setIdKey($idKey)
  238. {
  239. $this->_idKey = $idKey;
  240. return $this;
  241. }
  242. /**
  243. * Get the search index
  244. *
  245. * @return Zend_Search_Lucene_Interface
  246. */
  247. public function getSearchIndex()
  248. {
  249. return $this->_searchIndex;
  250. }
  251. /**
  252. * Index an Zym_Search_Lucene_Indexable_Interface
  253. *
  254. * @throws Zym_Search_Lucene_Exception
  255. * @param Zym_Search_Lucene_Indexable_Interface|array $indexables
  256. * @param boolean $update
  257. * @param string $searchField
  258. * @return Zym_Search_Lucene_Index
  259. */
  260. public function index($indexables, $update = true, $searchField = null)
  261. {
  262. if (!is_array($indexables)) {
  263. $indexables = array($indexables);
  264. }
  265. if (!$searchField) {
  266. $searchField = $this->_idKey;
  267. }
  268. foreach ($indexables as $indexable) {
  269. if (!$indexable instanceof Zym_Search_Lucene_IIndexable) {
  270. /**
  271. * @see Zym_Search_Lucene_Exception
  272. */
  273. require_once 'Zym/Search/Lucene/Exception.php';
  274. throw new Zym_Search_Lucene_Exception('The object of type "' . get_class($indexable) . '" '
  275. . 'is not an instance of Zym_Search_Lucene_Indexable_Interface.');
  276. }
  277. if ($update) {
  278. $recordId = $indexable->getRecordId();
  279. if (!$recordId) {
  280. /**
  281. * @see Zym_Search_Lucene_Exception
  282. */
  283. require_once 'Zym/Search/Lucene/Exception.php';
  284. throw new Zym_Search_Lucene_Exception('You must provide a valid record ID.');
  285. }
  286. $this->delete($recordId, $searchField);
  287. }
  288. $document = $indexable->getSearchDocument();
  289. if (!$document instanceof Zend_Search_Lucene_Document) {
  290. /**
  291. * @see Zym_Search_Lucene_Exception
  292. */
  293. require_once 'Zym/Search/Lucene/Exception.php';
  294. throw new Zym_Search_Lucene_Exception('The provided search-document is not '
  295. . 'an instance of Zend_Search_Lucene_Document.');
  296. }
  297. $this->_searchIndex->addDocument($document);
  298. }
  299. return $this;
  300. }
  301. /**
  302. * Execute the query
  303. *
  304. * @param string|Zym_Search_Lucene_IQuery $query
  305. * @param int $resultSetLimit
  306. * @return array
  307. */
  308. public function search($query, $resultSetLimit = null)
  309. {
  310. // If the query is an instance of Zym_Search_Lucene_IQuery serialize it to a string
  311. $query = (string) $query;
  312. if (!$resultSetLimit) {
  313. $resultSetLimit = self::$_defaultResultSetLimit;
  314. }
  315. Zend_Search_Lucene::setResultSetLimit((int) $resultSetLimit);
  316. $cache = self::$_resultCache;
  317. if (null !== $cache) {
  318. $queryHash = $this->_getQueryHash($query);
  319. if (!($results = $cache->load($queryHash))) {
  320. $results = $this->_executeSearch($query, $resultSetLimit);
  321. $this->_cacheResults($results);
  322. }
  323. } else {
  324. $results = $this->_executeSearch($query, $resultSetLimit);
  325. }
  326. return $results;
  327. }
  328. /**
  329. * Execute the search and return the results
  330. *
  331. * @param string|Zym_Search_Lucene_IQuery $query
  332. * @return array
  333. */
  334. protected function _executeSearch($query)
  335. {
  336. return $this->_searchIndex->find((string) $query);
  337. }
  338. /**
  339. * Cache the search results
  340. *
  341. * @param Zend_Cache_Core $cache
  342. * @param array $results
  343. */
  344. protected function _cacheResults(Zend_Cache_Core $cache, $results)
  345. {
  346. $cache->clean(Zend_Cache::CLEANING_MODE_OLD);
  347. $cache->save($results);
  348. }
  349. /**
  350. * Get the query hash
  351. *
  352. * @param string|Zym_Search_Lucene_IQuery $query
  353. * @return string
  354. */
  355. protected function _getQueryHash($query)
  356. {
  357. return md5($query);
  358. }
  359. }