PageRenderTime 53ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/inc/lib/Zend/Search/Lucene/Index/Writer.php

https://bitbucket.org/yoander/mtrack
PHP | 841 lines | 451 code | 117 blank | 273 comment | 102 complexity | 0ab8aefff5da8cc22437ddc9f6e27b22 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: Writer.php 23775 2011-03-01 17:25:24Z ralph $
  21. */
  22. /** Zend_Search_Lucene_LockManager */
  23. require_once 'Zend/Search/Lucene/LockManager.php';
  24. /**
  25. * @category Zend
  26. * @package Zend_Search_Lucene
  27. * @subpackage Index
  28. * @copyright Copyright (c) 2005-2011 Zend Technologies USA Inc. (http://www.zend.com)
  29. * @license http://framework.zend.com/license/new-bsd New BSD License
  30. */
  31. class Zend_Search_Lucene_Index_Writer
  32. {
  33. /**
  34. * @todo Implement Analyzer substitution
  35. * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
  36. * temporary index files
  37. * @todo Directory lock processing
  38. */
  39. /**
  40. * Number of documents required before the buffered in-memory
  41. * documents are written into a new Segment
  42. *
  43. * Default value is 10
  44. *
  45. * @var integer
  46. */
  47. public $maxBufferedDocs = 10;
  48. /**
  49. * Largest number of documents ever merged by addDocument().
  50. * Small values (e.g., less than 10,000) are best for interactive indexing,
  51. * as this limits the length of pauses while indexing to a few seconds.
  52. * Larger values are best for batched indexing and speedier searches.
  53. *
  54. * Default value is PHP_INT_MAX
  55. *
  56. * @var integer
  57. */
  58. public $maxMergeDocs = PHP_INT_MAX;
  59. /**
  60. * Determines how often segment indices are merged by addDocument().
  61. *
  62. * With smaller values, less RAM is used while indexing,
  63. * and searches on unoptimized indices are faster,
  64. * but indexing speed is slower.
  65. *
  66. * With larger values, more RAM is used during indexing,
  67. * and while searches on unoptimized indices are slower,
  68. * indexing is faster.
  69. *
  70. * Thus larger values (> 10) are best for batch index creation,
  71. * and smaller values (< 10) for indices that are interactively maintained.
  72. *
  73. * Default value is 10
  74. *
  75. * @var integer
  76. */
  77. public $mergeFactor = 10;
  78. /**
  79. * File system adapter.
  80. *
  81. * @var Zend_Search_Lucene_Storage_Directory
  82. */
  83. private $_directory = null;
  84. /**
  85. * Changes counter.
  86. *
  87. * @var integer
  88. */
  89. private $_versionUpdate = 0;
  90. /**
  91. * List of the segments, created by index writer
  92. * Array of Zend_Search_Lucene_Index_SegmentInfo objects
  93. *
  94. * @var array
  95. */
  96. private $_newSegments = array();
  97. /**
  98. * List of segments to be deleted on commit
  99. *
  100. * @var array
  101. */
  102. private $_segmentsToDelete = array();
  103. /**
  104. * Current segment to add documents
  105. *
  106. * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
  107. */
  108. private $_currentSegment = null;
  109. /**
  110. * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
  111. *
  112. * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
  113. *
  114. * @var array Zend_Search_Lucene_Index_SegmentInfo
  115. */
  116. private $_segmentInfos;
  117. /**
  118. * Index target format version
  119. *
  120. * @var integer
  121. */
  122. private $_targetFormatVersion;
  123. /**
  124. * List of indexfiles extensions
  125. *
  126. * @var array
  127. */
  128. private static $_indexExtensions = array('.cfs' => '.cfs',
  129. '.cfx' => '.cfx',
  130. '.fnm' => '.fnm',
  131. '.fdx' => '.fdx',
  132. '.fdt' => '.fdt',
  133. '.tis' => '.tis',
  134. '.tii' => '.tii',
  135. '.frq' => '.frq',
  136. '.prx' => '.prx',
  137. '.tvx' => '.tvx',
  138. '.tvd' => '.tvd',
  139. '.tvf' => '.tvf',
  140. '.del' => '.del',
  141. '.sti' => '.sti' );
  142. /**
  143. * Create empty index
  144. *
  145. * @param Zend_Search_Lucene_Storage_Directory $directory
  146. * @param integer $generation
  147. * @param integer $nameCount
  148. */
  149. public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount)
  150. {
  151. if ($generation == 0) {
  152. // Create index in pre-2.1 mode
  153. foreach ($directory->fileList() as $file) {
  154. if ($file == 'deletable' ||
  155. $file == 'segments' ||
  156. isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
  157. preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
  158. $directory->deleteFile($file);
  159. }
  160. }
  161. $segmentsFile = $directory->createFile('segments');
  162. $segmentsFile->writeInt((int)0xFFFFFFFF);
  163. // write version (initialized by current time)
  164. $segmentsFile->writeLong(round(microtime(true)));
  165. // write name counter
  166. $segmentsFile->writeInt($nameCount);
  167. // write segment counter
  168. $segmentsFile->writeInt(0);
  169. $deletableFile = $directory->createFile('deletable');
  170. // write counter
  171. $deletableFile->writeInt(0);
  172. } else {
  173. $genFile = $directory->createFile('segments.gen');
  174. $genFile->writeInt((int)0xFFFFFFFE);
  175. // Write generation two times
  176. $genFile->writeLong($generation);
  177. $genFile->writeLong($generation);
  178. $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
  179. $segmentsFile->writeInt((int)0xFFFFFFFD);
  180. // write version (initialized by current time)
  181. $segmentsFile->writeLong(round(microtime(true)));
  182. // write name counter
  183. $segmentsFile->writeInt($nameCount);
  184. // write segment counter
  185. $segmentsFile->writeInt(0);
  186. }
  187. }
  188. /**
  189. * Open the index for writing
  190. *
  191. * @param Zend_Search_Lucene_Storage_Directory $directory
  192. * @param array $segmentInfos
  193. * @param integer $targetFormatVersion
  194. * @param Zend_Search_Lucene_Storage_File $cleanUpLock
  195. */
  196. public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion)
  197. {
  198. $this->_directory = $directory;
  199. $this->_segmentInfos = &$segmentInfos;
  200. $this->_targetFormatVersion = $targetFormatVersion;
  201. }
  202. /**
  203. * Adds a document to this index.
  204. *
  205. * @param Zend_Search_Lucene_Document $document
  206. */
  207. public function addDocument(Zend_Search_Lucene_Document $document)
  208. {
  209. /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
  210. require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
  211. if ($this->_currentSegment === null) {
  212. $this->_currentSegment =
  213. new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName());
  214. }
  215. $this->_currentSegment->addDocument($document);
  216. if ($this->_currentSegment->count() >= $this->maxBufferedDocs) {
  217. $this->commit();
  218. }
  219. $this->_maybeMergeSegments();
  220. $this->_versionUpdate++;
  221. }
  222. /**
  223. * Check if we have anything to merge
  224. *
  225. * @return boolean
  226. */
  227. private function _hasAnythingToMerge()
  228. {
  229. $segmentSizes = array();
  230. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  231. $segmentSizes[$segName] = $segmentInfo->count();
  232. }
  233. $mergePool = array();
  234. $poolSize = 0;
  235. $sizeToMerge = $this->maxBufferedDocs;
  236. asort($segmentSizes, SORT_NUMERIC);
  237. foreach ($segmentSizes as $segName => $size) {
  238. // Check, if segment comes into a new merging block
  239. while ($size >= $sizeToMerge) {
  240. // Merge previous block if it's large enough
  241. if ($poolSize >= $sizeToMerge) {
  242. return true;
  243. }
  244. $mergePool = array();
  245. $poolSize = 0;
  246. $sizeToMerge *= $this->mergeFactor;
  247. if ($sizeToMerge > $this->maxMergeDocs) {
  248. return false;
  249. }
  250. }
  251. $mergePool[] = $this->_segmentInfos[$segName];
  252. $poolSize += $size;
  253. }
  254. if ($poolSize >= $sizeToMerge) {
  255. return true;
  256. }
  257. return false;
  258. }
  259. /**
  260. * Merge segments if necessary
  261. */
  262. private function _maybeMergeSegments()
  263. {
  264. if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
  265. return;
  266. }
  267. if (!$this->_hasAnythingToMerge()) {
  268. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  269. return;
  270. }
  271. // Update segments list to be sure all segments are not merged yet by another process
  272. //
  273. // Segment merging functionality is concentrated in this class and surrounded
  274. // by optimization lock obtaining/releasing.
  275. // _updateSegments() refreshes segments list from the latest index generation.
  276. // So only new segments can be added to the index while we are merging some already existing
  277. // segments.
  278. // Newly added segments will be also included into the index by the _updateSegments() call
  279. // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
  280. // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
  281. $this->_updateSegments();
  282. // Perform standard auto-optimization procedure
  283. $segmentSizes = array();
  284. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  285. $segmentSizes[$segName] = $segmentInfo->count();
  286. }
  287. $mergePool = array();
  288. $poolSize = 0;
  289. $sizeToMerge = $this->maxBufferedDocs;
  290. asort($segmentSizes, SORT_NUMERIC);
  291. foreach ($segmentSizes as $segName => $size) {
  292. // Check, if segment comes into a new merging block
  293. while ($size >= $sizeToMerge) {
  294. // Merge previous block if it's large enough
  295. if ($poolSize >= $sizeToMerge) {
  296. $this->_mergeSegments($mergePool);
  297. }
  298. $mergePool = array();
  299. $poolSize = 0;
  300. $sizeToMerge *= $this->mergeFactor;
  301. if ($sizeToMerge > $this->maxMergeDocs) {
  302. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  303. return;
  304. }
  305. }
  306. $mergePool[] = $this->_segmentInfos[$segName];
  307. $poolSize += $size;
  308. }
  309. if ($poolSize >= $sizeToMerge) {
  310. $this->_mergeSegments($mergePool);
  311. }
  312. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  313. }
  314. /**
  315. * Merge specified segments
  316. *
  317. * $segments is an array of SegmentInfo objects
  318. *
  319. * @param array $segments
  320. */
  321. private function _mergeSegments($segments)
  322. {
  323. $newName = $this->_newSegmentName();
  324. /** Zend_Search_Lucene_Index_SegmentMerger */
  325. require_once 'Zend/Search/Lucene/Index/SegmentMerger.php';
  326. $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory,
  327. $newName);
  328. foreach ($segments as $segmentInfo) {
  329. $merger->addSource($segmentInfo);
  330. $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName();
  331. }
  332. $newSegment = $merger->merge();
  333. if ($newSegment !== null) {
  334. $this->_newSegments[$newSegment->getName()] = $newSegment;
  335. }
  336. $this->commit();
  337. }
  338. /**
  339. * Update segments file by adding current segment to a list
  340. *
  341. * @throws Zend_Search_Lucene_Exception
  342. */
  343. private function _updateSegments()
  344. {
  345. // Get an exclusive index lock
  346. Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
  347. // Write down changes for the segments
  348. foreach ($this->_segmentInfos as $segInfo) {
  349. $segInfo->writeChanges();
  350. }
  351. $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
  352. $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
  353. $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
  354. try {
  355. $genFile = $this->_directory->getFileObject('segments.gen', false);
  356. } catch (Zend_Search_Lucene_Exception $e) {
  357. if (strpos($e->getMessage(), 'is not readable') !== false) {
  358. $genFile = $this->_directory->createFile('segments.gen');
  359. } else {
  360. throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
  361. }
  362. }
  363. $genFile->writeInt((int)0xFFFFFFFE);
  364. // Write generation (first copy)
  365. $genFile->writeLong($generation);
  366. try {
  367. // Write format marker
  368. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
  369. $newSegmentFile->writeInt((int)0xFFFFFFFD);
  370. } else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  371. $newSegmentFile->writeInt((int)0xFFFFFFFC);
  372. }
  373. // Read src file format identifier
  374. $format = $segmentsFile->readInt();
  375. if ($format == (int)0xFFFFFFFF) {
  376. $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
  377. } else if ($format == (int)0xFFFFFFFD) {
  378. $srcFormat = Zend_Search_Lucene::FORMAT_2_1;
  379. } else if ($format == (int)0xFFFFFFFC) {
  380. $srcFormat = Zend_Search_Lucene::FORMAT_2_3;
  381. } else {
  382. throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
  383. }
  384. $version = $segmentsFile->readLong() + $this->_versionUpdate;
  385. $this->_versionUpdate = 0;
  386. $newSegmentFile->writeLong($version);
  387. // Write segment name counter
  388. $newSegmentFile->writeInt($segmentsFile->readInt());
  389. // Get number of segments offset
  390. $numOfSegmentsOffset = $newSegmentFile->tell();
  391. // Write dummy data (segment counter)
  392. $newSegmentFile->writeInt(0);
  393. // Read number of segemnts
  394. $segmentsCount = $segmentsFile->readInt();
  395. $segments = array();
  396. for ($count = 0; $count < $segmentsCount; $count++) {
  397. $segName = $segmentsFile->readString();
  398. $segSize = $segmentsFile->readInt();
  399. if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
  400. // pre-2.1 index format
  401. $delGen = 0;
  402. $hasSingleNormFile = false;
  403. $numField = (int)0xFFFFFFFF;
  404. $isCompoundByte = 0;
  405. $docStoreOptions = null;
  406. } else {
  407. $delGen = $segmentsFile->readLong();
  408. if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
  409. $docStoreOffset = $segmentsFile->readInt();
  410. if ($docStoreOffset != (int)0xFFFFFFFF) {
  411. $docStoreSegment = $segmentsFile->readString();
  412. $docStoreIsCompoundFile = $segmentsFile->readByte();
  413. $docStoreOptions = array('offset' => $docStoreOffset,
  414. 'segment' => $docStoreSegment,
  415. 'isCompound' => ($docStoreIsCompoundFile == 1));
  416. } else {
  417. $docStoreOptions = null;
  418. }
  419. } else {
  420. $docStoreOptions = null;
  421. }
  422. $hasSingleNormFile = $segmentsFile->readByte();
  423. $numField = $segmentsFile->readInt();
  424. $normGens = array();
  425. if ($numField != (int)0xFFFFFFFF) {
  426. for ($count1 = 0; $count1 < $numField; $count1++) {
  427. $normGens[] = $segmentsFile->readLong();
  428. }
  429. }
  430. $isCompoundByte = $segmentsFile->readByte();
  431. }
  432. if (!in_array($segName, $this->_segmentsToDelete)) {
  433. // Load segment if necessary
  434. if (!isset($this->_segmentInfos[$segName])) {
  435. if ($isCompoundByte == 0xFF) {
  436. // The segment is not a compound file
  437. $isCompound = false;
  438. } else if ($isCompoundByte == 0x00) {
  439. // The status is unknown
  440. $isCompound = null;
  441. } else if ($isCompoundByte == 0x01) {
  442. // The segment is a compound file
  443. $isCompound = true;
  444. }
  445. /** Zend_Search_Lucene_Index_SegmentInfo */
  446. require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
  447. $this->_segmentInfos[$segName] =
  448. new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
  449. $segName,
  450. $segSize,
  451. $delGen,
  452. $docStoreOptions,
  453. $hasSingleNormFile,
  454. $isCompound);
  455. } else {
  456. // Retrieve actual deletions file generation number
  457. $delGen = $this->_segmentInfos[$segName]->getDelGen();
  458. }
  459. $newSegmentFile->writeString($segName);
  460. $newSegmentFile->writeInt($segSize);
  461. $newSegmentFile->writeLong($delGen);
  462. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  463. if ($docStoreOptions !== null) {
  464. $newSegmentFile->writeInt($docStoreOffset);
  465. $newSegmentFile->writeString($docStoreSegment);
  466. $newSegmentFile->writeByte($docStoreIsCompoundFile);
  467. } else {
  468. // Set DocStoreOffset to -1
  469. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  470. }
  471. } else if ($docStoreOptions !== null) {
  472. // Release index write lock
  473. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  474. throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
  475. }
  476. $newSegmentFile->writeByte($hasSingleNormFile);
  477. $newSegmentFile->writeInt($numField);
  478. if ($numField != (int)0xFFFFFFFF) {
  479. foreach ($normGens as $normGen) {
  480. $newSegmentFile->writeLong($normGen);
  481. }
  482. }
  483. $newSegmentFile->writeByte($isCompoundByte);
  484. $segments[$segName] = $segSize;
  485. }
  486. }
  487. $segmentsFile->close();
  488. $segmentsCount = count($segments) + count($this->_newSegments);
  489. foreach ($this->_newSegments as $segName => $segmentInfo) {
  490. $newSegmentFile->writeString($segName);
  491. $newSegmentFile->writeInt($segmentInfo->count());
  492. // delete file generation: -1 (there is no delete file yet)
  493. $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF);
  494. if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
  495. // docStoreOffset: -1 (segment doesn't use shared doc store)
  496. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  497. }
  498. // HasSingleNormFile
  499. $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
  500. // NumField
  501. $newSegmentFile->writeInt((int)0xFFFFFFFF);
  502. // IsCompoundFile
  503. $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
  504. $segments[$segmentInfo->getName()] = $segmentInfo->count();
  505. $this->_segmentInfos[$segName] = $segmentInfo;
  506. }
  507. $this->_newSegments = array();
  508. $newSegmentFile->seek($numOfSegmentsOffset);
  509. $newSegmentFile->writeInt($segmentsCount); // Update segments count
  510. $newSegmentFile->close();
  511. } catch (Exception $e) {
  512. /** Restore previous index generation */
  513. $generation--;
  514. $genFile->seek(4, SEEK_SET);
  515. // Write generation number twice
  516. $genFile->writeLong($generation); $genFile->writeLong($generation);
  517. // Release index write lock
  518. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  519. // Throw the exception
  520. require_once 'Zend/Search/Lucene/Exception.php';
  521. throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
  522. }
  523. // Write generation (second copy)
  524. $genFile->writeLong($generation);
  525. // Check if another update or read process is not running now
  526. // If yes, skip clean-up procedure
  527. if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
  528. /**
  529. * Clean-up directory
  530. */
  531. $filesToDelete = array();
  532. $filesTypes = array();
  533. $filesNumbers = array();
  534. // list of .del files of currently used segments
  535. // each segment can have several generations of .del files
  536. // only last should not be deleted
  537. $delFiles = array();
  538. foreach ($this->_directory->fileList() as $file) {
  539. if ($file == 'deletable') {
  540. // 'deletable' file
  541. $filesToDelete[] = $file;
  542. $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1
  543. $filesNumbers[] = 0;
  544. } else if ($file == 'segments') {
  545. // 'segments' file
  546. $filesToDelete[] = $file;
  547. $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
  548. $filesNumbers[] = 0;
  549. } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
  550. // 'segments_xxx' file
  551. // Check if it's not a just created generation file
  552. if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
  553. $filesToDelete[] = $file;
  554. $filesTypes[] = 2; // first group of files for deletions
  555. $filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers
  556. }
  557. } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) {
  558. // one of per segment files ('<segment_name>.f<decimal_number>')
  559. // Check if it's not one of the segments in the current segments set
  560. if (!isset($segments[$matches[1]])) {
  561. $filesToDelete[] = $file;
  562. $filesTypes[] = 3; // second group of files for deletions
  563. $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
  564. }
  565. } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) {
  566. // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
  567. // Check if it's not one of the segments in the current segments set
  568. if (!isset($segments[$matches[1]])) {
  569. $filesToDelete[] = $file;
  570. $filesTypes[] = 3; // second group of files for deletions
  571. $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number
  572. } else {
  573. $segmentNumber = (int)base_convert($matches[2], 36, 10);
  574. $delGeneration = (int)base_convert($matches[4], 36, 10);
  575. if (!isset($delFiles[$segmentNumber])) {
  576. $delFiles[$segmentNumber] = array();
  577. }
  578. $delFiles[$segmentNumber][$delGeneration] = $file;
  579. }
  580. } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) {
  581. // one of per segment files ('<segment_name>.<ext>')
  582. $segmentName = substr($file, 0, strlen($file) - 4);
  583. // Check if it's not one of the segments in the current segments set
  584. if (!isset($segments[$segmentName]) &&
  585. ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) {
  586. $filesToDelete[] = $file;
  587. $filesTypes[] = 3; // second group of files for deletions
  588. $filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number
  589. }
  590. }
  591. }
  592. $maxGenNumber = 0;
  593. // process .del files of currently used segments
  594. foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
  595. ksort($delFiles[$segmentNumber], SORT_NUMERIC);
  596. array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting
  597. end($delFiles[$segmentNumber]);
  598. $lastGenNumber = key($delFiles[$segmentNumber]);
  599. if ($lastGenNumber > $maxGenNumber) {
  600. $maxGenNumber = $lastGenNumber;
  601. }
  602. }
  603. foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
  604. foreach ($segmentDelFiles as $delGeneration => $file) {
  605. $filesToDelete[] = $file;
  606. $filesTypes[] = 4; // third group of files for deletions
  607. $filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair
  608. }
  609. }
  610. // Reorder files for deleting
  611. array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC,
  612. $filesNumbers, SORT_ASC, SORT_NUMERIC,
  613. $filesToDelete, SORT_ASC, SORT_STRING);
  614. foreach ($filesToDelete as $file) {
  615. try {
  616. /** Skip shared docstore segments deleting */
  617. /** @todo Process '.cfx' files to check if them are already unused */
  618. if (substr($file, strlen($file)-4) != '.cfx') {
  619. $this->_directory->deleteFile($file);
  620. }
  621. } catch (Zend_Search_Lucene_Exception $e) {
  622. if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
  623. // That's not "file is under processing or already deleted" exception
  624. // Pass it through
  625. throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
  626. }
  627. }
  628. }
  629. // Return read lock into the previous state
  630. Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
  631. } else {
  632. // Only release resources if another index reader is running now
  633. foreach ($this->_segmentsToDelete as $segName) {
  634. foreach (self::$_indexExtensions as $ext) {
  635. $this->_directory->purgeFile($segName . $ext);
  636. }
  637. }
  638. }
  639. // Clean-up _segmentsToDelete container
  640. $this->_segmentsToDelete = array();
  641. // Release index write lock
  642. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  643. // Remove unused segments from segments list
  644. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  645. if (!isset($segments[$segName])) {
  646. unset($this->_segmentInfos[$segName]);
  647. }
  648. }
  649. }
  650. /**
  651. * Commit current changes
  652. */
  653. public function commit()
  654. {
  655. if ($this->_currentSegment !== null) {
  656. $newSegment = $this->_currentSegment->close();
  657. if ($newSegment !== null) {
  658. $this->_newSegments[$newSegment->getName()] = $newSegment;
  659. }
  660. $this->_currentSegment = null;
  661. }
  662. $this->_updateSegments();
  663. }
  664. /**
  665. * Merges the provided indexes into this index.
  666. *
  667. * @param array $readers
  668. * @return void
  669. */
  670. public function addIndexes($readers)
  671. {
  672. /**
  673. * @todo implementation
  674. */
  675. }
  676. /**
  677. * Merges all segments together into new one
  678. *
  679. * Returns true on success and false if another optimization or auto-optimization process
  680. * is running now
  681. *
  682. * @return boolean
  683. */
  684. public function optimize()
  685. {
  686. if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
  687. return false;
  688. }
  689. // Update segments list to be sure all segments are not merged yet by another process
  690. //
  691. // Segment merging functionality is concentrated in this class and surrounded
  692. // by optimization lock obtaining/releasing.
  693. // _updateSegments() refreshes segments list from the latest index generation.
  694. // So only new segments can be added to the index while we are merging some already existing
  695. // segments.
  696. // Newly added segments will be also included into the index by the _updateSegments() call
  697. // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
  698. // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
  699. $this->_updateSegments();
  700. $this->_mergeSegments($this->_segmentInfos);
  701. Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
  702. return true;
  703. }
  704. /**
  705. * Get name for new segment
  706. *
  707. * @return string
  708. */
  709. private function _newSegmentName()
  710. {
  711. Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
  712. $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
  713. $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
  714. $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
  715. $segmentNameCounter = $segmentsFile->readInt();
  716. $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
  717. $segmentsFile->writeInt($segmentNameCounter + 1);
  718. // Flash output to guarantee that wrong value will not be loaded between unlock and
  719. // return (which calls $segmentsFile destructor)
  720. $segmentsFile->flush();
  721. Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
  722. return '_' . base_convert($segmentNameCounter, 10, 36);
  723. }
  724. }