PageRenderTime 59ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/vendor/Zend/Search/Lucene/Index/SegmentMerger.php

https://bitbucket.org/anycode/sfluceneplugin
PHP | 271 lines | 137 code | 38 blank | 96 comment | 17 complexity | 65fd2414f40f694b794a4febf9446933 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id: SegmentMerger.php 20096 2010-01-06 02:05:09Z bkarwin $
  21. */
  22. /** Zend_Search_Lucene_Index_SegmentInfo */
  23. require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
  24. /**
  25. * @category Zend
  26. * @package Zend_Search_Lucene
  27. * @subpackage Index
  28. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  29. * @license http://framework.zend.com/license/new-bsd New BSD License
  30. */
  31. class Zend_Search_Lucene_Index_SegmentMerger
  32. {
  33. /**
  34. * Target segment writer
  35. *
  36. * @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter
  37. */
  38. private $_writer;
  39. /**
  40. * Number of docs in a new segment
  41. *
  42. * @var integer
  43. */
  44. private $_docCount;
  45. /**
  46. * A set of segments to be merged
  47. *
  48. * @var array Zend_Search_Lucene_Index_SegmentInfo
  49. */
  50. private $_segmentInfos = array();
  51. /**
  52. * Flag to signal, that merge is already done
  53. *
  54. * @var boolean
  55. */
  56. private $_mergeDone = false;
  57. /**
  58. * Field map
  59. * [<segment_name>][<field_number>] => <target_field_number>
  60. *
  61. * @var array
  62. */
  63. private $_fieldsMap = array();
  64. /**
  65. * Object constructor.
  66. *
  67. * Creates new segment merger with $directory as target to merge segments into
  68. * and $name as a name of new segment
  69. *
  70. * @param Zend_Search_Lucene_Storage_Directory $directory
  71. * @param string $name
  72. */
  73. public function __construct($directory, $name)
  74. {
  75. /** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
  76. require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
  77. $this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name);
  78. }
  79. /**
  80. * Add segmnet to a collection of segments to be merged
  81. *
  82. * @param Zend_Search_Lucene_Index_SegmentInfo $segment
  83. */
  84. public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo)
  85. {
  86. $this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo;
  87. }
  88. /**
  89. * Do merge.
  90. *
  91. * Returns number of documents in newly created segment
  92. *
  93. * @return Zend_Search_Lucene_Index_SegmentInfo
  94. * @throws Zend_Search_Lucene_Exception
  95. */
  96. public function merge()
  97. {
  98. if ($this->_mergeDone) {
  99. require_once 'Zend/Search/Lucene/Exception.php';
  100. throw new Zend_Search_Lucene_Exception('Merge is already done.');
  101. }
  102. if (count($this->_segmentInfos) < 1) {
  103. require_once 'Zend/Search/Lucene/Exception.php';
  104. throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
  105. . count($this->_segmentInfos)
  106. . ').');
  107. }
  108. $this->_mergeFields();
  109. $this->_mergeNorms();
  110. $this->_mergeStoredFields();
  111. $this->_mergeTerms();
  112. $this->_mergeDone = true;
  113. return $this->_writer->close();
  114. }
  115. /**
  116. * Merge fields information
  117. */
  118. private function _mergeFields()
  119. {
  120. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  121. foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {
  122. $this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo);
  123. }
  124. }
  125. }
  126. /**
  127. * Merge field's normalization factors
  128. */
  129. private function _mergeNorms()
  130. {
  131. foreach ($this->_writer->getFieldInfos() as $fieldInfo) {
  132. if ($fieldInfo->isIndexed) {
  133. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  134. if ($segmentInfo->hasDeletions()) {
  135. $srcNorm = $segmentInfo->normVector($fieldInfo->name);
  136. $norm = '';
  137. $docs = $segmentInfo->count();
  138. for ($count = 0; $count < $docs; $count++) {
  139. if (!$segmentInfo->isDeleted($count)) {
  140. $norm .= $srcNorm[$count];
  141. }
  142. }
  143. $this->_writer->addNorm($fieldInfo->name, $norm);
  144. } else {
  145. $this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name));
  146. }
  147. }
  148. }
  149. }
  150. }
  151. /**
  152. * Merge fields information
  153. */
  154. private function _mergeStoredFields()
  155. {
  156. $this->_docCount = 0;
  157. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  158. $fdtFile = $segmentInfo->openCompoundFile('.fdt');
  159. for ($count = 0; $count < $segmentInfo->count(); $count++) {
  160. $fieldCount = $fdtFile->readVInt();
  161. $storedFields = array();
  162. for ($count2 = 0; $count2 < $fieldCount; $count2++) {
  163. $fieldNum = $fdtFile->readVInt();
  164. $bits = $fdtFile->readByte();
  165. $fieldInfo = $segmentInfo->getField($fieldNum);
  166. if (!($bits & 2)) { // Text data
  167. $storedFields[] =
  168. new Zend_Search_Lucene_Field($fieldInfo->name,
  169. $fdtFile->readString(),
  170. 'UTF-8',
  171. true,
  172. $fieldInfo->isIndexed,
  173. $bits & 1 );
  174. } else { // Binary data
  175. $storedFields[] =
  176. new Zend_Search_Lucene_Field($fieldInfo->name,
  177. $fdtFile->readBinary(),
  178. '',
  179. true,
  180. $fieldInfo->isIndexed,
  181. $bits & 1,
  182. true);
  183. }
  184. }
  185. if (!$segmentInfo->isDeleted($count)) {
  186. $this->_docCount++;
  187. $this->_writer->addStoredFields($storedFields);
  188. }
  189. }
  190. }
  191. }
  192. /**
  193. * Merge fields information
  194. */
  195. private function _mergeTerms()
  196. {
  197. /** Zend_Search_Lucene_Index_TermsPriorityQueue */
  198. require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
  199. $segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
  200. $segmentStartId = 0;
  201. foreach ($this->_segmentInfos as $segName => $segmentInfo) {
  202. $segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
  203. // Skip "empty" segments
  204. if ($segmentInfo->currentTerm() !== null) {
  205. $segmentInfoQueue->put($segmentInfo);
  206. }
  207. }
  208. $this->_writer->initializeDictionaryFiles();
  209. $termDocs = array();
  210. while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
  211. // Merge positions array
  212. $termDocs += $segmentInfo->currentTermPositions();
  213. if ($segmentInfoQueue->top() === null ||
  214. $segmentInfoQueue->top()->currentTerm()->key() !=
  215. $segmentInfo->currentTerm()->key()) {
  216. // We got new term
  217. ksort($termDocs, SORT_NUMERIC);
  218. // Add term if it's contained in any document
  219. if (count($termDocs) > 0) {
  220. $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs);
  221. }
  222. $termDocs = array();
  223. }
  224. $segmentInfo->nextTerm();
  225. // check, if segment dictionary is finished
  226. if ($segmentInfo->currentTerm() !== null) {
  227. // Put segment back into the priority queue
  228. $segmentInfoQueue->put($segmentInfo);
  229. }
  230. }
  231. $this->_writer->closeDictionaryFiles();
  232. }
  233. }