PageRenderTime 30ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Zend/Search/Lucene/Index/SegmentInfo.php

https://bitbucket.org/baruffaldi/website-2008-computer-shopping-3
PHP | 1651 lines | 877 code | 251 blank | 523 comment | 234 complexity | 33ea5b6ddeadd90f24aa39ed7c8cacfa MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Index_DictionaryLoader */
  22. require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
  23. /** Zend_Search_Lucene_Exception */
  24. require_once 'Zend/Search/Lucene/Exception.php';
  25. /** Zend_Search_Lucene_LockManager */
  26. require_once 'Zend/Search/Lucene/LockManager.php';
  27. /**
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage Index
  31. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  32. * @license http://framework.zend.com/license/new-bsd New BSD License
  33. */
  34. class Zend_Search_Lucene_Index_SegmentInfo
  35. {
  36. /**
  37. * Number of docs in a segment
  38. *
  39. * @var integer
  40. */
  41. private $_docCount;
  42. /**
  43. * Segment name
  44. *
  45. * @var string
  46. */
  47. private $_name;
  48. /**
  49. * Term Dictionary Index
  50. *
  51. * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
  52. * of performance considerations)
  53. * [0] -> $termValue
  54. * [1] -> $termFieldNum
  55. *
  56. * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
  57. *
  58. * @var array
  59. */
  60. private $_termDictionary;
  61. /**
  62. * Term Dictionary Index TermInfos
  63. *
  64. * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
  65. * of performance considerations)
  66. * [0] -> $docFreq
  67. * [1] -> $freqPointer
  68. * [2] -> $proxPointer
  69. * [3] -> $skipOffset
  70. * [4] -> $indexPointer
  71. *
  72. * @var array
  73. */
  74. private $_termDictionaryInfos;
  75. /**
  76. * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
  77. *
  78. * @var array
  79. */
  80. private $_fields;
  81. /**
  82. * Field positions in a dictionary.
  83. * (Term dictionary contains filelds ordered by names)
  84. *
  85. * @var array
  86. */
  87. private $_fieldsDicPositions;
  88. /**
  89. * Associative array where the key is the file name and the value is data offset
  90. * in a compound segment file (.csf).
  91. *
  92. * @var array
  93. */
  94. private $_segFiles;
  95. /**
  96. * Associative array where the key is the file name and the value is file size (.csf).
  97. *
  98. * @var array
  99. */
  100. private $_segFileSizes;
  101. /**
  102. * Delete file generation number
  103. *
  104. * -2 means autodetect latest delete generation
  105. * -1 means 'there is no delete file'
  106. * 0 means pre-2.1 format delete file
  107. * X specifies used delete file
  108. *
  109. * @var integer
  110. */
  111. private $_delGen;
  112. /**
  113. * Segment has single norms file
  114. *
  115. * If true then one .nrm file is used for all fields
  116. * Otherwise .fN files are used
  117. *
  118. * @var boolean
  119. */
  120. private $_hasSingleNormFile;
  121. /**
  122. * Use compound segment file (*.cfs) to collect all other segment files
  123. * (excluding .del files)
  124. *
  125. * @var boolean
  126. */
  127. private $_isCompound;
  128. /**
  129. * File system adapter.
  130. *
  131. * @var Zend_Search_Lucene_Storage_Directory_Filesystem
  132. */
  133. private $_directory;
  134. /**
  135. * Normalization factors.
  136. * An array fieldName => normVector
  137. * normVector is a binary string.
  138. * Each byte corresponds to an indexed document in a segment and
  139. * encodes normalization factor (float value, encoded by
  140. * Zend_Search_Lucene_Search_Similarity::encodeNorm())
  141. *
  142. * @var array
  143. */
  144. private $_norms = array();
  145. /**
  146. * List of deleted documents.
  147. * bitset if bitset extension is loaded or array otherwise.
  148. *
  149. * @var mixed
  150. */
  151. private $_deleted = null;
  152. /**
  153. * $this->_deleted update flag
  154. *
  155. * @var boolean
  156. */
  157. private $_deletedDirty = false;
  158. /**
  159. * True if segment uses shared doc store
  160. *
  161. * @var boolean
  162. */
  163. private $_usesSharedDocStore;
  164. /*
  165. * Shared doc store options.
  166. * It's an assotiative array with the following items:
  167. * - 'offset' => $docStoreOffset The starting document in the shared doc store files where this segment's documents begin
  168. * - 'segment' => $docStoreSegment The name of the segment that has the shared doc store files.
  169. * - 'isCompound' => $docStoreIsCompoundFile True, if compound file format is used for the shared doc store files (.cfx file).
  170. */
  171. private $_sharedDocStoreOptions;
  172. /**
  173. * Zend_Search_Lucene_Index_SegmentInfo constructor
  174. *
  175. * @param Zend_Search_Lucene_Storage_Directory $directory
  176. * @param string $name
  177. * @param integer $docCount
  178. * @param integer $delGen
  179. * @param array|null $docStoreOptions
  180. * @param boolean $hasSingleNormFile
  181. * @param boolean $isCompound
  182. */
  183. public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null)
  184. {
  185. $this->_directory = $directory;
  186. $this->_name = $name;
  187. $this->_docCount = $docCount;
  188. if ($docStoreOptions !== null) {
  189. $this->_usesSharedDocStore = true;
  190. $this->_sharedDocStoreOptions = $docStoreOptions;
  191. if ($docStoreOptions['isCompound']) {
  192. $cfxFile = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx');
  193. $cfxFilesCount = $cfxFile->readVInt();
  194. $cfxFiles = array();
  195. $cfxFileSizes = array();
  196. for ($count = 0; $count < $cfxFilesCount; $count++) {
  197. $dataOffset = $cfxFile->readLong();
  198. if ($count != 0) {
  199. $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles);
  200. }
  201. $fileName = $cfxFile->readString();
  202. $cfxFiles[$fileName] = $dataOffset;
  203. }
  204. if ($count != 0) {
  205. $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset;
  206. }
  207. $this->_sharedDocStoreOptions['files'] = $cfxFiles;
  208. $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes;
  209. }
  210. }
  211. $this->_hasSingleNormFile = $hasSingleNormFile;
  212. $this->_delGen = $delGen;
  213. $this->_termDictionary = null;
  214. if ($isCompound !== null) {
  215. $this->_isCompound = $isCompound;
  216. } else {
  217. // It's a pre-2.1 segment or isCompound is set to 'unknown'
  218. // Detect if segment uses compound file
  219. try {
  220. // Try to open compound file
  221. $this->_directory->getFileObject($name . '.cfs');
  222. // Compound file is found
  223. $this->_isCompound = true;
  224. } catch (Zend_Search_Lucene_Exception $e) {
  225. if (strpos($e->getMessage(), 'is not readable') !== false) {
  226. // Compound file is not found or is not readable
  227. $this->_isCompound = false;
  228. } else {
  229. throw $e;
  230. }
  231. }
  232. }
  233. $this->_segFiles = array();
  234. if ($this->_isCompound) {
  235. $cfsFile = $this->_directory->getFileObject($name . '.cfs');
  236. $segFilesCount = $cfsFile->readVInt();
  237. for ($count = 0; $count < $segFilesCount; $count++) {
  238. $dataOffset = $cfsFile->readLong();
  239. if ($count != 0) {
  240. $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
  241. }
  242. $fileName = $cfsFile->readString();
  243. $this->_segFiles[$fileName] = $dataOffset;
  244. }
  245. if ($count != 0) {
  246. $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
  247. }
  248. }
  249. $fnmFile = $this->openCompoundFile('.fnm');
  250. $fieldsCount = $fnmFile->readVInt();
  251. $fieldNames = array();
  252. $fieldNums = array();
  253. $this->_fields = array();
  254. for ($count=0; $count < $fieldsCount; $count++) {
  255. $fieldName = $fnmFile->readString();
  256. $fieldBits = $fnmFile->readByte();
  257. $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
  258. $fieldBits & 1,
  259. $count,
  260. $fieldBits & 2 );
  261. if ($fieldBits & 0x10) {
  262. // norms are omitted for the indexed field
  263. $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
  264. }
  265. $fieldNums[$count] = $count;
  266. $fieldNames[$count] = $fieldName;
  267. }
  268. array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
  269. $this->_fieldsDicPositions = array_flip($fieldNums);
  270. if ($this->_delGen == -2) {
  271. $this->_detectLatestDelGen();
  272. }
  273. if ($this->_delGen == -1) {
  274. // There is no delete file for this segment
  275. // Do nothing
  276. } else if ($this->_delGen == 0) {
  277. // It's a segment with pre-2.1 format delete file
  278. // Try to find delete file
  279. try {
  280. // '.del' files always stored in a separate file
  281. // Segment compound is not used
  282. $delFile = $this->_directory->getFileObject($this->_name . '.del');
  283. $byteCount = $delFile->readInt();
  284. $byteCount = ceil($byteCount/8);
  285. $bitCount = $delFile->readInt();
  286. if ($bitCount == 0) {
  287. $delBytes = '';
  288. } else {
  289. $delBytes = $delFile->readBytes($byteCount);
  290. }
  291. if (extension_loaded('bitset')) {
  292. $this->_deleted = $delBytes;
  293. } else {
  294. $this->_deleted = array();
  295. for ($count = 0; $count < $byteCount; $count++) {
  296. $byte = ord($delBytes[$count]);
  297. for ($bit = 0; $bit < 8; $bit++) {
  298. if ($byte & (1<<$bit)) {
  299. $this->_deleted[$count*8 + $bit] = 1;
  300. }
  301. }
  302. }
  303. }
  304. } catch(Zend_Search_Exception $e) {
  305. if (strpos($e->getMessage(), 'is not readable') === false ) {
  306. throw $e;
  307. }
  308. // There is no delete file
  309. // Do nothing
  310. }
  311. } else {
  312. // It's 2.1+ format delete file
  313. $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
  314. $format = $delFile->readInt();
  315. if ($format == (int)0xFFFFFFFF) {
  316. if (extension_loaded('bitset')) {
  317. $this->_deleted = bitset_empty();
  318. } else {
  319. $this->_deleted = array();
  320. }
  321. $byteCount = $delFile->readInt();
  322. $bitCount = $delFile->readInt();
  323. $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
  324. $byteNum = 0;
  325. do {
  326. $dgap = $delFile->readVInt();
  327. $nonZeroByte = $delFile->readByte();
  328. $byteNum += $dgap;
  329. for ($bit = 0; $bit < 8; $bit++) {
  330. if ($nonZeroByte & (1<<$bit)) {
  331. if (extension_loaded('bitset')) {
  332. bitset_incl($this->_deleted, $byteNum*8 + $bit);
  333. } else {
  334. $this->_deleted[$byteNum*8 + $bit] = 1;
  335. }
  336. }
  337. }
  338. } while ($delFile->tell() < $delFileSize);
  339. } else {
  340. // $format is actually byte count
  341. $byteCount = ceil($format/8);
  342. $bitCount = $delFile->readInt();
  343. if ($bitCount == 0) {
  344. $delBytes = '';
  345. } else {
  346. $delBytes = $delFile->readBytes($byteCount);
  347. }
  348. if (extension_loaded('bitset')) {
  349. $this->_deleted = $delBytes;
  350. } else {
  351. $this->_deleted = array();
  352. for ($count = 0; $count < $byteCount; $count++) {
  353. $byte = ord($delBytes[$count]);
  354. for ($bit = 0; $bit < 8; $bit++) {
  355. if ($byte & (1<<$bit)) {
  356. $this->_deleted[$count*8 + $bit] = 1;
  357. }
  358. }
  359. }
  360. }
  361. }
  362. }
  363. }
  364. /**
  365. * Opens index file stoted within compound index file
  366. *
  367. * @param string $extension
  368. * @param boolean $shareHandler
  369. * @throws Zend_Search_Lucene_Exception
  370. * @return Zend_Search_Lucene_Storage_File
  371. */
  372. public function openCompoundFile($extension, $shareHandler = true)
  373. {
  374. if (($extension == '.fdx' || $extension == '.fdt') && $this->_usesSharedDocStore) {
  375. $fdxFName = $this->_sharedDocStoreOptions['segment'] . '.fdx';
  376. $fdtFName = $this->_sharedDocStoreOptions['segment'] . '.fdt';
  377. if (!$this->_sharedDocStoreOptions['isCompound']) {
  378. $fdxFile = $this->_directory->getFileObject($fdxFName, $shareHandler);
  379. $fdxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
  380. if ($extension == '.fdx') {
  381. // '.fdx' file is requested
  382. return $fdxFile;
  383. } else {
  384. // '.fdt' file is requested
  385. $fdtStartOffset = $fdxFile->readLong();
  386. $fdtFile = $this->_directory->getFileObject($fdtFName, $shareHandler);
  387. $fdtFile->seek($fdtStartOffset, SEEK_CUR);
  388. return $fdtFile;
  389. }
  390. }
  391. if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
  392. throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
  393. . $fdxFName . ' file.' );
  394. }
  395. if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
  396. throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
  397. . $fdtFName . ' file.' );
  398. }
  399. // Open shared docstore segment file
  400. $cfxFile = $this->_directory->getFileObject($this->_sharedDocStoreOptions['segment'] . '.cfx', $shareHandler);
  401. // Seek to the start of '.fdx' file within compound file
  402. $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdxFName]);
  403. // Seek to the start of current segment documents section
  404. $cfxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
  405. if ($extension == '.fdx') {
  406. // '.fdx' file is requested
  407. return $cfxFile;
  408. } else {
  409. // '.fdt' file is requested
  410. $fdtStartOffset = $cfxFile->readLong();
  411. // Seek to the start of '.fdt' file within compound file
  412. $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdtFName]);
  413. // Seek to the start of current segment documents section
  414. $cfxFile->seek($fdtStartOffset, SEEK_CUR);
  415. return $fdtFile;
  416. }
  417. }
  418. $filename = $this->_name . $extension;
  419. if (!$this->_isCompound) {
  420. return $this->_directory->getFileObject($filename, $shareHandler);
  421. }
  422. if( !isset($this->_segFiles[$filename]) ) {
  423. throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
  424. . $filename . ' file.' );
  425. }
  426. $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
  427. $file->seek($this->_segFiles[$filename]);
  428. return $file;
  429. }
  430. /**
  431. * Get compound file length
  432. *
  433. * @param string $extension
  434. * @return integer
  435. */
  436. public function compoundFileLength($extension)
  437. {
  438. if (($extension == '.fdx' || $extension == '.fdt') && $this->_usesSharedDocStore) {
  439. $filename = $this->_sharedDocStoreOptions['segment'] . $extension;
  440. if (!$this->_sharedDocStoreOptions['isCompound']) {
  441. return $this->_directory->fileLength($filename);
  442. }
  443. if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
  444. throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
  445. . $filename . ' file.' );
  446. }
  447. return $this->_sharedDocStoreOptions['fileSizes'][$filename];
  448. }
  449. $filename = $this->_name . $extension;
  450. // Try to get common file first
  451. if ($this->_directory->fileExists($filename)) {
  452. return $this->_directory->fileLength($filename);
  453. }
  454. if( !isset($this->_segFileSizes[$filename]) ) {
  455. throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
  456. . $filename . ' file.' );
  457. }
  458. return $this->_segFileSizes[$filename];
  459. }
  460. /**
  461. * Returns field index or -1 if field is not found
  462. *
  463. * @param string $fieldName
  464. * @return integer
  465. */
  466. public function getFieldNum($fieldName)
  467. {
  468. foreach( $this->_fields as $field ) {
  469. if( $field->name == $fieldName ) {
  470. return $field->number;
  471. }
  472. }
  473. return -1;
  474. }
  475. /**
  476. * Returns field info for specified field
  477. *
  478. * @param integer $fieldNum
  479. * @return Zend_Search_Lucene_Index_FieldInfo
  480. */
  481. public function getField($fieldNum)
  482. {
  483. return $this->_fields[$fieldNum];
  484. }
  485. /**
  486. * Returns array of fields.
  487. * if $indexed parameter is true, then returns only indexed fields.
  488. *
  489. * @param boolean $indexed
  490. * @return array
  491. */
  492. public function getFields($indexed = false)
  493. {
  494. $result = array();
  495. foreach( $this->_fields as $field ) {
  496. if( (!$indexed) || $field->isIndexed ) {
  497. $result[ $field->name ] = $field->name;
  498. }
  499. }
  500. return $result;
  501. }
  502. /**
  503. * Returns array of FieldInfo objects.
  504. *
  505. * @return array
  506. */
  507. public function getFieldInfos()
  508. {
  509. return $this->_fields;
  510. }
  511. /**
  512. * Returns actual deletions file generation number.
  513. *
  514. * @return integer
  515. */
  516. public function getDelGen()
  517. {
  518. return $this->_delGen;
  519. }
  520. /**
  521. * Returns the total number of documents in this segment (including deleted documents).
  522. *
  523. * @return integer
  524. */
  525. public function count()
  526. {
  527. return $this->_docCount;
  528. }
  529. /**
  530. * Returns number of deleted documents.
  531. *
  532. * @return integer
  533. */
  534. private function _deletedCount()
  535. {
  536. if ($this->_deleted === null) {
  537. return 0;
  538. }
  539. if (extension_loaded('bitset')) {
  540. return count(bitset_to_array($this->_deleted));
  541. } else {
  542. return count($this->_deleted);
  543. }
  544. }
  545. /**
  546. * Returns the total number of non-deleted documents in this segment.
  547. *
  548. * @return integer
  549. */
  550. public function numDocs()
  551. {
  552. if ($this->hasDeletions()) {
  553. return $this->_docCount - $this->_deletedCount();
  554. } else {
  555. return $this->_docCount;
  556. }
  557. }
  558. /**
  559. * Get field position in a fields dictionary
  560. *
  561. * @param integer $fieldNum
  562. * @return integer
  563. */
  564. private function _getFieldPosition($fieldNum) {
  565. // Treat values which are not in a translation table as a 'direct value'
  566. return isset($this->_fieldsDicPositions[$fieldNum]) ?
  567. $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
  568. }
  569. /**
  570. * Return segment name
  571. *
  572. * @return string
  573. */
  574. public function getName()
  575. {
  576. return $this->_name;
  577. }
  578. /**
  579. * TermInfo cache
  580. *
  581. * Size is 1024.
  582. * Numbers are used instead of class constants because of performance considerations
  583. *
  584. * @var array
  585. */
  586. private $_termInfoCache = array();
  587. private function _cleanUpTermInfoCache()
  588. {
  589. // Clean 256 term infos
  590. foreach ($this->_termInfoCache as $key => $termInfo) {
  591. unset($this->_termInfoCache[$key]);
  592. // leave 768 last used term infos
  593. if (count($this->_termInfoCache) == 768) {
  594. break;
  595. }
  596. }
  597. }
  598. /**
  599. * Load terms dictionary index
  600. *
  601. * @throws Zend_Search_Lucene_Exception
  602. */
  603. private function _loadDictionaryIndex()
  604. {
  605. // Check, if index is already serialized
  606. if ($this->_directory->fileExists($this->_name . '.sti')) {
  607. // Load serialized dictionary index data
  608. $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
  609. $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
  610. // Load dictionary index data
  611. if (($unserializedData = @unserialize($stiFileData)) !== false) {
  612. list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
  613. return;
  614. }
  615. }
  616. // Load data from .tii file and generate .sti file
  617. // Prefetch dictionary index data
  618. $tiiFile = $this->openCompoundFile('.tii');
  619. $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
  620. // Load dictionary index data
  621. list($this->_termDictionary, $this->_termDictionaryInfos) =
  622. Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
  623. $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
  624. $stiFile = $this->_directory->createFile($this->_name . '.sti');
  625. $stiFile->writeBytes($stiFileData);
  626. }
  627. /**
  628. * Scans terms dictionary and returns term info
  629. *
  630. * @param Zend_Search_Lucene_Index_Term $term
  631. * @return Zend_Search_Lucene_Index_TermInfo
  632. */
  633. public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
  634. {
  635. $termKey = $term->key();
  636. if (isset($this->_termInfoCache[$termKey])) {
  637. $termInfo = $this->_termInfoCache[$termKey];
  638. // Move termInfo to the end of cache
  639. unset($this->_termInfoCache[$termKey]);
  640. $this->_termInfoCache[$termKey] = $termInfo;
  641. return $termInfo;
  642. }
  643. if ($this->_termDictionary === null) {
  644. $this->_loadDictionaryIndex();
  645. }
  646. $searchField = $this->getFieldNum($term->field);
  647. if ($searchField == -1) {
  648. return null;
  649. }
  650. $searchDicField = $this->_getFieldPosition($searchField);
  651. // search for appropriate value in dictionary
  652. $lowIndex = 0;
  653. $highIndex = count($this->_termDictionary)-1;
  654. while ($highIndex >= $lowIndex) {
  655. // $mid = ($highIndex - $lowIndex)/2;
  656. $mid = ($highIndex + $lowIndex) >> 1;
  657. $midTerm = $this->_termDictionary[$mid];
  658. $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
  659. $delta = $searchDicField - $fieldNum;
  660. if ($delta == 0) {
  661. $delta = strcmp($term->text, $midTerm[1] /* text */);
  662. }
  663. if ($delta < 0) {
  664. $highIndex = $mid-1;
  665. } elseif ($delta > 0) {
  666. $lowIndex = $mid+1;
  667. } else {
  668. // return $this->_termDictionaryInfos[$mid]; // We got it!
  669. $a = $this->_termDictionaryInfos[$mid];
  670. $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
  671. // Put loaded termInfo into cache
  672. $this->_termInfoCache[$termKey] = $termInfo;
  673. return $termInfo;
  674. }
  675. }
  676. if ($highIndex == -1) {
  677. // Term is out of the dictionary range
  678. return null;
  679. }
  680. $prevPosition = $highIndex;
  681. $prevTerm = $this->_termDictionary[$prevPosition];
  682. $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
  683. $tisFile = $this->openCompoundFile('.tis');
  684. $tiVersion = $tisFile->readInt();
  685. if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
  686. $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
  687. throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
  688. }
  689. $termCount = $tisFile->readLong();
  690. $indexInterval = $tisFile->readInt();
  691. $skipInterval = $tisFile->readInt();
  692. if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
  693. $maxSkipLevels = $tisFile->readInt();
  694. }
  695. $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
  696. $termValue = $prevTerm[1] /* text */;
  697. $termFieldNum = $prevTerm[0] /* field */;
  698. $freqPointer = $prevTermInfo[1] /* freqPointer */;
  699. $proxPointer = $prevTermInfo[2] /* proxPointer */;
  700. for ($count = $prevPosition*$indexInterval + 1;
  701. $count <= $termCount &&
  702. ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
  703. ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
  704. strcmp($termValue, $term->text) < 0) );
  705. $count++) {
  706. $termPrefixLength = $tisFile->readVInt();
  707. $termSuffix = $tisFile->readString();
  708. $termFieldNum = $tisFile->readVInt();
  709. $termValue = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
  710. $docFreq = $tisFile->readVInt();
  711. $freqPointer += $tisFile->readVInt();
  712. $proxPointer += $tisFile->readVInt();
  713. if( $docFreq >= $skipInterval ) {
  714. $skipOffset = $tisFile->readVInt();
  715. } else {
  716. $skipOffset = 0;
  717. }
  718. }
  719. if ($termFieldNum == $searchField && $termValue == $term->text) {
  720. $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
  721. } else {
  722. $termInfo = null;
  723. }
  724. // Put loaded termInfo into cache
  725. $this->_termInfoCache[$termKey] = $termInfo;
  726. if (count($this->_termInfoCache) == 1024) {
  727. $this->_cleanUpTermInfoCache();
  728. }
  729. return $termInfo;
  730. }
  731. /**
  732. * Returns term freqs array.
  733. * Result array structure: array(docId => freq, ...)
  734. *
  735. * @param Zend_Search_Lucene_Index_Term $term
  736. * @param integer $shift
  737. * @return Zend_Search_Lucene_Index_TermInfo
  738. */
  739. public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0)
  740. {
  741. $termInfo = $this->getTermInfo($term);
  742. if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
  743. return array();
  744. }
  745. $frqFile = $this->openCompoundFile('.frq');
  746. $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
  747. $result = array();
  748. $docId = 0;
  749. for ($count = 0; $count < $termInfo->docFreq; $count++) {
  750. $docDelta = $frqFile->readVInt();
  751. if ($docDelta % 2 == 1) {
  752. $docId += ($docDelta-1)/2;
  753. $result[$shift + $docId] = 1;
  754. } else {
  755. $docId += $docDelta/2;
  756. $result[$shift + $docId] = $frqFile->readVInt();
  757. }
  758. }
  759. return $result;
  760. }
  761. /**
  762. * Returns term positions array.
  763. * Result array structure: array(docId => array(pos1, pos2, ...), ...)
  764. *
  765. * @param Zend_Search_Lucene_Index_Term $term
  766. * @param integer $shift
  767. * @return Zend_Search_Lucene_Index_TermInfo
  768. */
  769. public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0)
  770. {
  771. $termInfo = $this->getTermInfo($term);
  772. if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
  773. return array();
  774. }
  775. $frqFile = $this->openCompoundFile('.frq');
  776. $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
  777. $freqs = array();
  778. $docId = 0;
  779. for ($count = 0; $count < $termInfo->docFreq; $count++) {
  780. $docDelta = $frqFile->readVInt();
  781. if ($docDelta % 2 == 1) {
  782. $docId += ($docDelta-1)/2;
  783. $freqs[$docId] = 1;
  784. } else {
  785. $docId += $docDelta/2;
  786. $freqs[$docId] = $frqFile->readVInt();
  787. }
  788. }
  789. $result = array();
  790. $prxFile = $this->openCompoundFile('.prx');
  791. $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
  792. foreach ($freqs as $docId => $freq) {
  793. $termPosition = 0;
  794. $positions = array();
  795. for ($count = 0; $count < $freq; $count++ ) {
  796. $termPosition += $prxFile->readVInt();
  797. $positions[] = $termPosition;
  798. }
  799. $result[$shift + $docId] = $positions;
  800. }
  801. return $result;
  802. }
  803. /**
  804. * Load normalizatin factors from an index file
  805. *
  806. * @param integer $fieldNum
  807. * @throws Zend_Search_Lucene_Exception
  808. */
  809. private function _loadNorm($fieldNum)
  810. {
  811. if ($this->_hasSingleNormFile) {
  812. $normfFile = $this->openCompoundFile('.nrm');
  813. $header = $normfFile->readBytes(3);
  814. $headerFormatVersion = $normfFile->readByte();
  815. if ($header != 'NRM' || $headerFormatVersion != (int)0xFF) {
  816. throw new Zend_Search_Lucene_Exception('Wrong norms file format.');
  817. }
  818. foreach ($this->_fields as $fNum => $fieldInfo) {
  819. if ($fieldInfo->isIndexed) {
  820. $this->_norms[$fNum] = $normfFile->readBytes($this->_docCount);
  821. }
  822. }
  823. } else {
  824. $fFile = $this->openCompoundFile('.f' . $fieldNum);
  825. $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
  826. }
  827. }
  828. /**
  829. * Returns normalization factor for specified documents
  830. *
  831. * @param integer $id
  832. * @param string $fieldName
  833. * @return float
  834. */
  835. public function norm($id, $fieldName)
  836. {
  837. $fieldNum = $this->getFieldNum($fieldName);
  838. if ( !($this->_fields[$fieldNum]->isIndexed) ) {
  839. return null;
  840. }
  841. if (!isset($this->_norms[$fieldNum])) {
  842. $this->_loadNorm($fieldNum);
  843. }
  844. return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum][$id]) );
  845. }
  846. /**
  847. * Returns norm vector, encoded in a byte string
  848. *
  849. * @param string $fieldName
  850. * @return string
  851. */
  852. public function normVector($fieldName)
  853. {
  854. $fieldNum = $this->getFieldNum($fieldName);
  855. if ($fieldNum == -1 || !($this->_fields[$fieldNum]->isIndexed)) {
  856. $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
  857. return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
  858. $this->_docCount);
  859. }
  860. if (!isset($this->_norms[$fieldNum])) {
  861. $this->_loadNorm($fieldNum);
  862. }
  863. return $this->_norms[$fieldNum];
  864. }
  865. /**
  866. * Returns true if any documents have been deleted from this index segment.
  867. *
  868. * @return boolean
  869. */
  870. public function hasDeletions()
  871. {
  872. return $this->_deleted !== null;
  873. }
  874. /**
  875. * Returns true if segment has single norms file.
  876. *
  877. * @return boolean
  878. */
  879. public function hasSingleNormFile()
  880. {
  881. return $this->_hasSingleNormFile ? true : false;
  882. }
  883. /**
  884. * Returns true if segment is stored using compound segment file.
  885. *
  886. * @return boolean
  887. */
  888. public function isCompound()
  889. {
  890. return $this->_isCompound;
  891. }
  892. /**
  893. * Deletes a document from the index segment.
  894. * $id is an internal document id
  895. *
  896. * @param integer
  897. */
  898. public function delete($id)
  899. {
  900. $this->_deletedDirty = true;
  901. if (extension_loaded('bitset')) {
  902. if ($this->_deleted === null) {
  903. $this->_deleted = bitset_empty($id);
  904. }
  905. bitset_incl($this->_deleted, $id);
  906. } else {
  907. if ($this->_deleted === null) {
  908. $this->_deleted = array();
  909. }
  910. $this->_deleted[$id] = 1;
  911. }
  912. }
  913. /**
  914. * Checks, that document is deleted
  915. *
  916. * @param integer
  917. * @return boolean
  918. */
  919. public function isDeleted($id)
  920. {
  921. if ($this->_deleted === null) {
  922. return false;
  923. }
  924. if (extension_loaded('bitset')) {
  925. return bitset_in($this->_deleted, $id);
  926. } else {
  927. return isset($this->_deleted[$id]);
  928. }
  929. }
  930. /**
  931. * Detect latest delete generation
  932. *
  933. * Is actualy used from writeChanges() method or from the constructor if it's invoked from
  934. * Index writer. In both cases index write lock is already obtained, so we shouldn't care
  935. * about it
  936. */
  937. private function _detectLatestDelGen()
  938. {
  939. $delFileList = array();
  940. foreach ($this->_directory->fileList() as $file) {
  941. if ($file == $this->_name . '.del') {
  942. // Matches <segment_name>.del file name
  943. $delFileList[] = 0;
  944. } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
  945. // Matches <segment_name>_NNN.del file names
  946. $delFileList[] = (int)base_convert($matches[1], 36, 10);
  947. }
  948. }
  949. if (count($delFileList) == 0) {
  950. // There is no deletions file for current segment in the directory
  951. // Set detetions file generation number to 1
  952. $this->_delGen = -1;
  953. } else {
  954. // There are some deletions files for current segment in the directory
  955. // Set deletions file generation number to the highest nuber
  956. $this->_delGen = max($delFileList);
  957. }
  958. }
  959. /**
  960. * Write changes if it's necessary.
  961. *
  962. * This method must be invoked only from the Writer _updateSegments() method,
  963. * so index Write lock has to be already obtained.
  964. *
  965. * @internal
  966. */
  967. public function writeChanges()
  968. {
  969. if (!$this->_deletedDirty) {
  970. return;
  971. }
  972. if (extension_loaded('bitset')) {
  973. $delBytes = $this->_deleted;
  974. $bitCount = count(bitset_to_array($delBytes));
  975. } else {
  976. $byteCount = floor($this->_docCount/8)+1;
  977. $delBytes = str_repeat(chr(0), $byteCount);
  978. for ($count = 0; $count < $byteCount; $count++) {
  979. $byte = 0;
  980. for ($bit = 0; $bit < 8; $bit++) {
  981. if (isset($this->_deleted[$count*8 + $bit])) {
  982. $byte |= (1<<$bit);
  983. }
  984. }
  985. $delBytes[$count] = chr($byte);
  986. }
  987. $bitCount = count($this->_deleted);
  988. }
  989. // Get new generation number
  990. $this->_detectLatestDelGen();
  991. if ($this->_delGen == -1) {
  992. // Set delete file generation number to 1
  993. $this->_delGen = 1;
  994. } else {
  995. // Increase delete file generation number by 1
  996. $this->_delGen++;
  997. }
  998. $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
  999. $delFile->writeInt($this->_docCount);
  1000. $delFile->writeInt($bitCount);
  1001. $delFile->writeBytes($delBytes);
  1002. $this->_deletedDirty = false;
  1003. }
  1004. /**
  1005. * Term Dictionary File object for stream like terms reading
  1006. *
  1007. * @var Zend_Search_Lucene_Storage_File
  1008. */
  1009. private $_tisFile = null;
  1010. /**
  1011. * Actual offset of the .tis file data
  1012. *
  1013. * @var integer
  1014. */
  1015. private $_tisFileOffset;
  1016. /**
  1017. * Frequencies File object for stream like terms reading
  1018. *
  1019. * @var Zend_Search_Lucene_Storage_File
  1020. */
  1021. private $_frqFile = null;
  1022. /**
  1023. * Actual offset of the .frq file data
  1024. *
  1025. * @var integer
  1026. */
  1027. private $_frqFileOffset;
  1028. /**
  1029. * Positions File object for stream like terms reading
  1030. *
  1031. * @var Zend_Search_Lucene_Storage_File
  1032. */
  1033. private $_prxFile = null;
  1034. /**
  1035. * Actual offset of the .prx file in the compound file
  1036. *
  1037. * @var integer
  1038. */
  1039. private $_prxFileOffset;
  1040. /**
  1041. * Actual number of terms in term stream
  1042. *
  1043. * @var integer
  1044. */
  1045. private $_termCount = 0;
  1046. /**
  1047. * Overall number of terms in term stream
  1048. *
  1049. * @var integer
  1050. */
  1051. private $_termNum = 0;
  1052. /**
  1053. * Segment index interval
  1054. *
  1055. * @var integer
  1056. */
  1057. private $_indexInterval;
  1058. /**
  1059. * Segment skip interval
  1060. *
  1061. * @var integer
  1062. */
  1063. private $_skipInterval;
  1064. /**
  1065. * Last TermInfo in a terms stream
  1066. *
  1067. * @var Zend_Search_Lucene_Index_TermInfo
  1068. */
  1069. private $_lastTermInfo = null;
  1070. /**
  1071. * Last Term in a terms stream
  1072. *
  1073. * @var Zend_Search_Lucene_Index_Term
  1074. */
  1075. private $_lastTerm = null;
  1076. /**
  1077. * Map of the document IDs
  1078. * Used to get new docID after removing deleted documents.
  1079. * It's not very effective from memory usage point of view,
  1080. * but much more faster, then other methods
  1081. *
  1082. * @var array|null
  1083. */
  1084. private $_docMap = null;
  1085. /**
  1086. * An array of all term positions in the documents.
  1087. * Array structure: array( docId => array( pos1, pos2, ...), ...)
  1088. *
  1089. * Is set to null if term positions loading has to be skipped
  1090. *
  1091. * @var array|null
  1092. */
  1093. private $_lastTermPositions;
  1094. /**
  1095. * Terms scan mode
  1096. *
  1097. * Values:
  1098. *
  1099. * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
  1100. * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
  1101. * document numbers are compacted (shifted if segment has deleted documents)
  1102. *
  1103. * @var integer
  1104. */
  1105. private $_termsScanMode;
  1106. /** Scan modes */
  1107. const SM_TERMS_ONLY = 0; // terms are scanned, no additional info is retrieved
  1108. const SM_FULL_INFO = 1; // terms are scanned, frequency and position info is retrieved
  1109. const SM_MERGE_INFO = 2; // terms are scanned, frequency and position info is retrieved
  1110. // document numbers are compacted (shifted if segment contains deleted documents)
  1111. /**
  1112. * Reset terms stream
  1113. *
  1114. * $startId - id for the fist document
  1115. * $compact - remove deleted documents
  1116. *
  1117. * Returns start document id for the next segment
  1118. *
  1119. * @param integer $startId
  1120. * @param integer $mode
  1121. * @throws Zend_Search_Lucene_Exception
  1122. * @return integer
  1123. */
  1124. public function reset($startId = 0, $mode = self::SM_TERMS_ONLY)
  1125. {
  1126. if ($this->_tisFile !== null) {
  1127. $this->_tisFile = null;
  1128. }
  1129. $this->_tisFile = $this->openCompoundFile('.tis', false);
  1130. $this->_tisFileOffset = $this->_tisFile->tell();
  1131. $tiVersion = $this->_tisFile->readInt();
  1132. if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
  1133. $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
  1134. throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
  1135. }
  1136. $this->_termCount =
  1137. $this->_termNum = $this->_tisFile->readLong(); // Read terms count
  1138. $this->_indexInterval = $this->_tisFile->readInt(); // Read Index interval
  1139. $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval
  1140. if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
  1141. $maxSkipLevels = $this->_tisFile->readInt();
  1142. }
  1143. if ($this->_frqFile !== null) {
  1144. $this->_frqFile = null;
  1145. }
  1146. if ($this->_prxFile !== null) {
  1147. $this->_prxFile = null;
  1148. }
  1149. $this->_docMap = array();
  1150. $this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1);
  1151. $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
  1152. $this->_lastTermPositions = null;
  1153. $this->_termsScanMode = $mode;
  1154. switch ($mode) {
  1155. case self::SM_TERMS_ONLY:
  1156. // Do nothing
  1157. break;
  1158. case self::SM_FULL_INFO:
  1159. // break intentionally omitted
  1160. case self::SM_MERGE_INFO:
  1161. $this->_frqFile = $this->openCompoundFile('.frq', false);
  1162. $this->_frqFileOffset = $this->_frqFile->tell();
  1163. $this->_prxFile = $this->openCompoundFile('.prx', false);
  1164. $this->_prxFileOffset = $this->_prxFile->tell();
  1165. for ($count = 0; $count < $this->_docCount; $count++) {
  1166. if (!$this->isDeleted($count)) {
  1167. $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
  1168. }
  1169. }
  1170. break;
  1171. default:
  1172. throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
  1173. break;
  1174. }
  1175. $this->nextTerm();
  1176. return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
  1177. }
  1178. /**
  1179. * Skip terms stream up to specified term preffix.
  1180. *
  1181. * Prefix contains fully specified field info and portion of searched term
  1182. *
  1183. * @param Zend_Search_Lucene_Index_Term $prefix
  1184. * @throws Zend_Search_Lucene_Exception
  1185. */
  1186. public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
  1187. {
  1188. if ($this->_termDictionary === null) {
  1189. $this->_loadDictionaryIndex();
  1190. }
  1191. $searchField = $this->getFieldNum($prefix->field);
  1192. if ($searchField == -1) {
  1193. /**
  1194. * Field is not presented in this segment
  1195. * Go to the end of dictionary
  1196. */
  1197. $this->_tisFile = null;
  1198. $this->_frqFile = null;
  1199. $this->_prxFile = null;
  1200. $this->_lastTerm = null;
  1201. $this->_lastTermInfo = null;
  1202. $this->_lastTermPositions = null;
  1203. return;
  1204. }
  1205. $searchDicField = $this->_getFieldPosition($searchField);
  1206. // search for appropriate value in dictionary
  1207. $lowIndex = 0;
  1208. $highIndex = count($this->_termDictionary)-1;
  1209. while ($highIndex >= $lowIndex) {
  1210. // $mid = ($highIndex - $lowIndex)/2;
  1211. $mid = ($highIndex + $lowIndex) >> 1;
  1212. $midTerm = $this->_termDictionary[$mid];
  1213. $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
  1214. $delta = $searchDicField - $fieldNum;
  1215. if ($delta == 0) {
  1216. $delta = strcmp($prefix->text, $midTerm[1] /* text */);
  1217. }
  1218. if ($delta < 0) {
  1219. $highIndex = $mid-1;
  1220. } elseif ($delta > 0) {
  1221. $lowIndex = $mid+1;
  1222. } else {
  1223. // We have reached term we are looking for
  1224. break;
  1225. }
  1226. }
  1227. if ($highIndex == -1) {
  1228. // Term is out of the dictionary range
  1229. $this->_tisFile = null;
  1230. $this->_frqFile = null;
  1231. $this->_prxFile = null;
  1232. $this->_lastTerm = null;
  1233. $this->_lastTermInfo = null;
  1234. $this->_lastTermPositions = null;
  1235. return;
  1236. }
  1237. $prevPosition = $highIndex;
  1238. $prevTerm = $this->_termDictionary[$prevPosition];
  1239. $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
  1240. if ($this->_tisFile === null) {
  1241. // The end of terms stream is reached and terms dictionary file is closed
  1242. // Perform mini-reset operation
  1243. $this->_tisFile = $this->openCompoundFile('.tis', false);
  1244. if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
  1245. $this->_frqFile = $this->openCompoundFile('.frq', false);
  1246. $this->_prxFile = $this->openCompoundFile('.prx', false);
  1247. }
  1248. }
  1249. $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
  1250. $this->_lastTerm = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
  1251. ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
  1252. $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
  1253. $prevTermInfo[1] /* freqPointer */,
  1254. $prevTermInfo[2] /* proxPointer */,
  1255. $prevTermInfo[3] /* skipOffset */);
  1256. $this->_termCount = $this->_termNum - $prevPosition*$this->_indexInterval;
  1257. if ($highIndex == 0) {
  1258. // skip start entry
  1259. $this->nextTerm();
  1260. } else if ($prefix->field == $this->_lastTerm->field && $prefix->text == $this->_lastTerm->text) {
  1261. // We got exact match in the dictionary index
  1262. if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
  1263. $this->_lastTermPositions = array();
  1264. $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
  1265. $freqs = array(); $docId = 0;
  1266. for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
  1267. $docDelta = $this->_frqFile->readVInt();
  1268. if( $docDelta % 2 == 1 ) {
  1269. $docId += ($docDelta-1)/2;
  1270. $freqs[ $docId ] = 1;
  1271. } else {
  1272. $docId += $docDelta/2;
  1273. $freqs[ $docId ] = $this->_frqFile->readVInt();
  1274. }
  1275. }
  1276. $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
  1277. foreach ($freqs as $docId => $freq) {
  1278. $termPosition = 0; $positions = array();
  1279. for ($count = 0; $count < $freq; $count++ ) {
  1280. $termPosition += $this->_prxFile->readVInt();
  1281. $positions[] = $termPosition;
  1282. }
  1283. if (isset($this->_docMap[$docId])) {
  1284. $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
  1285. }
  1286. }
  1287. }
  1288. return;
  1289. }
  1290. // Search term matching specified prefix
  1291. while ($this->_lastTerm !== null) {
  1292. if ( strcmp($this->_lastTerm->field, $prefix->field) > 0 ||
  1293. ($prefix->field == $this->_lastTerm->field && strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
  1294. // Current term matches or greate than the pattern
  1295. return;
  1296. }
  1297. $this->nextTerm();
  1298. }
  1299. }
  1300. /**
  1301. * Scans terms dictionary and returns next term
  1302. *
  1303. * @return Zend_Search_Lucene_Index_Term|null
  1304. */
  1305. public function nextTerm()
  1306. {
  1307. if ($this->_tisFile === null || $this->_termCount == 0) {
  1308. $this->_lastTerm = null;
  1309. $this->_lastTermInfo = null;
  1310. $this->_lastTermPositions = null;
  1311. $this->_docMap = null;
  1312. // may be necessary for "empty" segment
  1313. $this->_tisFile = null;
  1314. $this->_frqFile = null;
  1315. $this->_prxFile = null;
  1316. return null;
  1317. }
  1318. $termPrefixLength = $this->_tisFile->readVInt();
  1319. $termSuffix = $this->_tisFile->readString();
  1320. $termFieldNum = $this->_tisFile->readVInt();
  1321. $termValue = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;
  1322. $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);
  1323. $docFreq = $this->_tisFile->readVInt();
  1324. $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();
  1325. $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();
  1326. if ($docFreq >= $this->_skipInterval) {
  1327. $skipOffset = $this->_tisFile->readVInt();
  1328. } else {
  1329. $skipOffset = 0;
  1330. }
  1331. $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
  1332. if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
  1333. $this->_lastTermPositions = array();
  1334. $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
  1335. $freqs = array(); $docId = 0;
  1336. for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
  1337. $docDelta = $this->_frqFile->readVInt();
  1338. if( $docDelta % 2 == 1 ) {
  1339. $docId += ($docDelta-1)/2;
  1340. $freqs[ $docId ] = 1;
  1341. } else {
  1342. $docId += $docDelta/2;
  1343. $freqs[ $docId ] = $this->_frqFile->readVInt();
  1344. }
  1345. }
  1346. $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
  1347. foreach ($freqs as $docId => $freq) {
  1348. $termPosition = 0; $positions = array();
  1349. for ($count = 0; $count < $freq; $count++ ) {
  1350. $termPosition += $this->_prxFile->readVInt();
  1351. $positions[] = $termPosition;
  1352. }
  1353. if (isset($this->_docMap[$docId])) {
  1354. $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
  1355. }
  1356. }
  1357. }
  1358. $this->_termCount--;
  1359. if ($this->_termCount == 0) {
  1360. $this->_tisFile = null;
  1361. $this->_frqFile = null;
  1362. $this->_prxFile = null;
  1363. }
  1364. return $this->_lastTerm;
  1365. }
  1366. /**
  1367. * Close terms stream
  1368. *
  1369. * Should be used for resources clean up if stream is not read up to the end
  1370. */
  1371. public function closeTermsStream()
  1372. {
  1373. $this->_tisFile = null;
  1374. $this->_frqFile = null;
  1375. $this->_prxFile = null;
  1376. $this->_lastTerm = null;
  1377. $this->_lastTermInfo = null;
  1378. $this->_lastTermPositions = null;
  1379. $this->_docMap = null;
  1380. }
  1381. /**
  1382. * Returns term in current position
  1383. *
  1384. * @return Zend_Search_Lucene_Index_Term|null
  1385. */
  1386. public function currentTerm()
  1387. {
  1388. return $this->_lastTerm;
  1389. }
  1390. /**
  1391. * Returns an array of all term positions in the documents.
  1392. * Return array structure: array( docId => array( pos1, pos2, ...), ...)
  1393. *
  1394. * @return array
  1395. */
  1396. public function currentTermPositions()
  1397. {
  1398. return $this->_lastTermPositions;
  1399. }
  1400. }