PageRenderTime 57ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/ZendFramework/library/Zend/Search/Lucene/Index/SegmentInfo.php

https://github.com/michaeljoyce/pkp-lib
PHP | 1533 lines | 806 code | 227 blank | 500 comment | 206 complexity | 0cf7c207db45e1ec8cee1919ed57491e MD5 | raw file
Possible License(s): LGPL-2.1, BSD-3-Clause
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Index
  18. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /** Zend_Search_Lucene_Index_DictionaryLoader */
  22. require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
  23. /** Zend_Search_Lucene_Exception */
  24. require_once 'Zend/Search/Lucene/Exception.php';
  25. /** Zend_Search_Lucene_LockManager */
  26. require_once 'Zend/Search/Lucene/LockManager.php';
  27. /**
  28. * @category Zend
  29. * @package Zend_Search_Lucene
  30. * @subpackage Index
  31. * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  32. * @license http://framework.zend.com/license/new-bsd New BSD License
  33. */
  34. class Zend_Search_Lucene_Index_SegmentInfo
  35. {
  36. /**
  37. * Number of docs in a segment
  38. *
  39. * @var integer
  40. */
  41. private $_docCount;
  42. /**
  43. * Segment name
  44. *
  45. * @var string
  46. */
  47. private $_name;
  48. /**
  49. * Term Dictionary Index
  50. *
  51. * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
  52. * of performance considerations)
  53. * [0] -> $termValue
  54. * [1] -> $termFieldNum
  55. *
  56. * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
  57. *
  58. * @var array
  59. */
  60. private $_termDictionary;
  61. /**
  62. * Term Dictionary Index TermInfos
  63. *
  64. * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
  65. * of performance considerations)
  66. * [0] -> $docFreq
  67. * [1] -> $freqPointer
  68. * [2] -> $proxPointer
  69. * [3] -> $skipOffset
  70. * [4] -> $indexPointer
  71. *
  72. * @var array
  73. */
  74. private $_termDictionaryInfos;
  75. /**
  76. * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
  77. *
  78. * @var array
  79. */
  80. private $_fields;
  81. /**
  82. * Field positions in a dictionary.
  83. * (Term dictionary contains filelds ordered by names)
  84. *
  85. * @var array
  86. */
  87. private $_fieldsDicPositions;
  88. /**
  89. * Associative array where the key is the file name and the value is data offset
  90. * in a compound segment file (.csf).
  91. *
  92. * @var array
  93. */
  94. private $_segFiles;
  95. /**
  96. * Associative array where the key is the file name and the value is file size (.csf).
  97. *
  98. * @var array
  99. */
  100. private $_segFileSizes;
  101. /**
  102. * Delete file generation number
  103. *
  104. * -2 means autodetect latest delete generation
  105. * -1 means 'there is no delete file'
  106. * 0 means pre-2.1 format delete file
  107. * X specifies used delete file
  108. *
  109. * @var integer
  110. */
  111. private $_delGen;
  112. /**
  113. * Segment has single norms file
  114. *
  115. * If true then one .nrm file is used for all fields
  116. * Otherwise .fN files are used
  117. *
  118. * @var boolean
  119. */
  120. private $_hasSingleNormFile;
  121. /**
  122. * Use compound segment file (*.cfs) to collect all other segment files
  123. * (excluding .del files)
  124. *
  125. * @var boolean
  126. */
  127. private $_isCompound;
  128. /**
  129. * File system adapter.
  130. *
  131. * @var Zend_Search_Lucene_Storage_Directory_Filesystem
  132. */
  133. private $_directory;
  134. /**
  135. * Normalization factors.
  136. * An array fieldName => normVector
  137. * normVector is a binary string.
  138. * Each byte corresponds to an indexed document in a segment and
  139. * encodes normalization factor (float value, encoded by
  140. * Zend_Search_Lucene_Search_Similarity::encodeNorm())
  141. *
  142. * @var array
  143. */
  144. private $_norms = array();
  145. /**
  146. * List of deleted documents.
  147. * bitset if bitset extension is loaded or array otherwise.
  148. *
  149. * @var mixed
  150. */
  151. private $_deleted = null;
  152. /**
  153. * $this->_deleted update flag
  154. *
  155. * @var boolean
  156. */
  157. private $_deletedDirty = false;
  158. /**
  159. * Zend_Search_Lucene_Index_SegmentInfo constructor
  160. *
  161. * @param Zend_Search_Lucene_Storage_Directory $directory
  162. * @param string $name
  163. * @param integer $docCount
  164. * @param integer $delGen
  165. * @param boolean $isCompound
  166. */
  167. public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $hasSingleNormFile = false, $isCompound = null)
  168. {
  169. $this->_directory = $directory;
  170. $this->_name = $name;
  171. $this->_docCount = $docCount;
  172. $this->_hasSingleNormFile = $hasSingleNormFile;
  173. $this->_delGen = $delGen;
  174. $this->_termDictionary = null;
  175. if ($isCompound !== null) {
  176. $this->_isCompound = $isCompound;
  177. } else {
  178. // It's a pre-2.1 segment or isCompound is set to 'unknown'
  179. // Detect if segment uses compound file
  180. try {
  181. // Try to open compound file
  182. $this->_directory->getFileObject($name . '.cfs');
  183. // Compound file is found
  184. $this->_isCompound = true;
  185. } catch (Zend_Search_Lucene_Exception $e) {
  186. if (strpos($e->getMessage(), 'is not readable') !== false) {
  187. // Compound file is not found or is not readable
  188. $this->_isCompound = false;
  189. } else {
  190. throw $e;
  191. }
  192. }
  193. }
  194. $this->_segFiles = array();
  195. if ($this->_isCompound) {
  196. $cfsFile = $this->_directory->getFileObject($name . '.cfs');
  197. $segFilesCount = $cfsFile->readVInt();
  198. for ($count = 0; $count < $segFilesCount; $count++) {
  199. $dataOffset = $cfsFile->readLong();
  200. if ($count != 0) {
  201. $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
  202. }
  203. $fileName = $cfsFile->readString();
  204. $this->_segFiles[$fileName] = $dataOffset;
  205. }
  206. if ($count != 0) {
  207. $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
  208. }
  209. }
  210. $fnmFile = $this->openCompoundFile('.fnm');
  211. $fieldsCount = $fnmFile->readVInt();
  212. $fieldNames = array();
  213. $fieldNums = array();
  214. $this->_fields = array();
  215. for ($count=0; $count < $fieldsCount; $count++) {
  216. $fieldName = $fnmFile->readString();
  217. $fieldBits = $fnmFile->readByte();
  218. $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
  219. $fieldBits & 1,
  220. $count,
  221. $fieldBits & 2 );
  222. if ($fieldBits & 0x10) {
  223. // norms are omitted for the indexed field
  224. $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
  225. }
  226. $fieldNums[$count] = $count;
  227. $fieldNames[$count] = $fieldName;
  228. }
  229. array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
  230. $this->_fieldsDicPositions = array_flip($fieldNums);
  231. if ($this->_delGen == -2) {
  232. $this->_detectLatestDelGen();
  233. }
  234. if ($this->_delGen == -1) {
  235. // There is no delete file for this segment
  236. // Do nothing
  237. } else if ($this->_delGen == 0) {
  238. // It's a segment with pre-2.1 format delete file
  239. // Try to find delete file
  240. try {
  241. // '.del' files always stored in a separate file
  242. // Segment compound is not used
  243. $delFile = $this->_directory->getFileObject($this->_name . '.del');
  244. $byteCount = $delFile->readInt();
  245. $byteCount = ceil($byteCount/8);
  246. $bitCount = $delFile->readInt();
  247. if ($bitCount == 0) {
  248. $delBytes = '';
  249. } else {
  250. $delBytes = $delFile->readBytes($byteCount);
  251. }
  252. if (extension_loaded('bitset')) {
  253. $this->_deleted = $delBytes;
  254. } else {
  255. $this->_deleted = array();
  256. for ($count = 0; $count < $byteCount; $count++) {
  257. $byte = ord($delBytes{$count});
  258. for ($bit = 0; $bit < 8; $bit++) {
  259. if ($byte & (1<<$bit)) {
  260. $this->_deleted[$count*8 + $bit] = 1;
  261. }
  262. }
  263. }
  264. }
  265. } catch(Zend_Search_Exception $e) {
  266. if (strpos($e->getMessage(), 'is not readable') === false ) {
  267. throw $e;
  268. }
  269. // There is no delete file
  270. // Do nothing
  271. }
  272. } else {
  273. // It's 2.1+ format delete file
  274. $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
  275. $format = $delFile->readInt();
  276. if ($format == (int)0xFFFFFFFF) {
  277. if (extension_loaded('bitset')) {
  278. $this->_deleted = bitset_empty();
  279. } else {
  280. $this->_deleted = array();
  281. }
  282. $byteCount = $delFile->readInt();
  283. $bitCount = $delFile->readInt();
  284. $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
  285. $byteNum = 0;
  286. do {
  287. $dgap = $delFile->readVInt();
  288. $nonZeroByte = $delFile->readByte();
  289. $byteNum += $dgap;
  290. for ($bit = 0; $bit < 8; $bit++) {
  291. if ($nonZeroByte & (1<<$bit)) {
  292. if (extension_loaded('bitset')) {
  293. bitset_incl($this->_deleted, $byteNum*8 + $bit);
  294. } else {
  295. $this->_deleted[$byteNum*8 + $bit] = 1;
  296. }
  297. }
  298. }
  299. } while ($delFile->tell() < $delFileSize);
  300. } else {
  301. // $format is actually byte count
  302. $byteCount = ceil($format/8);
  303. $bitCount = $delFile->readInt();
  304. if ($bitCount == 0) {
  305. $delBytes = '';
  306. } else {
  307. $delBytes = $delFile->readBytes($byteCount);
  308. }
  309. if (extension_loaded('bitset')) {
  310. $this->_deleted = $delBytes;
  311. } else {
  312. $this->_deleted = array();
  313. for ($count = 0; $count < $byteCount; $count++) {
  314. $byte = ord($delBytes{$count});
  315. for ($bit = 0; $bit < 8; $bit++) {
  316. if ($byte & (1<<$bit)) {
  317. $this->_deleted[$count*8 + $bit] = 1;
  318. }
  319. }
  320. }
  321. }
  322. }
  323. }
  324. }
  325. /**
  326. * Opens index file stoted within compound index file
  327. *
  328. * @param string $extension
  329. * @param boolean $shareHandler
  330. * @throws Zend_Search_Lucene_Exception
  331. * @return Zend_Search_Lucene_Storage_File
  332. */
  333. public function openCompoundFile($extension, $shareHandler = true)
  334. {
  335. $filename = $this->_name . $extension;
  336. if (!$this->_isCompound) {
  337. return $this->_directory->getFileObject($filename, $shareHandler);
  338. }
  339. if( !isset($this->_segFiles[$filename]) ) {
  340. throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
  341. . $filename . ' file.' );
  342. }
  343. $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
  344. $file->seek($this->_segFiles[$filename]);
  345. return $file;
  346. }
  347. /**
  348. * Get compound file length
  349. *
  350. * @param string $extension
  351. * @return integer
  352. */
  353. public function compoundFileLength($extension)
  354. {
  355. $filename = $this->_name . $extension;
  356. // Try to get common file first
  357. if ($this->_directory->fileExists($filename)) {
  358. return $this->_directory->fileLength($filename);
  359. }
  360. if( !isset($this->_segFileSizes[$filename]) ) {
  361. throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
  362. . $filename . ' file.' );
  363. }
  364. return $this->_segFileSizes[$filename];
  365. }
  366. /**
  367. * Returns field index or -1 if field is not found
  368. *
  369. * @param string $fieldName
  370. * @return integer
  371. */
  372. public function getFieldNum($fieldName)
  373. {
  374. foreach( $this->_fields as $field ) {
  375. if( $field->name == $fieldName ) {
  376. return $field->number;
  377. }
  378. }
  379. return -1;
  380. }
  381. /**
  382. * Returns field info for specified field
  383. *
  384. * @param integer $fieldNum
  385. * @return Zend_Search_Lucene_Index_FieldInfo
  386. */
  387. public function getField($fieldNum)
  388. {
  389. return $this->_fields[$fieldNum];
  390. }
  391. /**
  392. * Returns array of fields.
  393. * if $indexed parameter is true, then returns only indexed fields.
  394. *
  395. * @param boolean $indexed
  396. * @return array
  397. */
  398. public function getFields($indexed = false)
  399. {
  400. $result = array();
  401. foreach( $this->_fields as $field ) {
  402. if( (!$indexed) || $field->isIndexed ) {
  403. $result[ $field->name ] = $field->name;
  404. }
  405. }
  406. return $result;
  407. }
  408. /**
  409. * Returns array of FieldInfo objects.
  410. *
  411. * @return array
  412. */
  413. public function getFieldInfos()
  414. {
  415. return $this->_fields;
  416. }
  417. /**
  418. * Returns actual deletions file generation number.
  419. *
  420. * @return integer
  421. */
  422. public function getDelGen()
  423. {
  424. return $this->_delGen;
  425. }
  426. /**
  427. * Returns the total number of documents in this segment (including deleted documents).
  428. *
  429. * @return integer
  430. */
  431. public function count()
  432. {
  433. return $this->_docCount;
  434. }
  435. /**
  436. * Returns number of deleted documents.
  437. *
  438. * @return integer
  439. */
  440. private function _deletedCount()
  441. {
  442. if ($this->_deleted === null) {
  443. return 0;
  444. }
  445. if (extension_loaded('bitset')) {
  446. return count(bitset_to_array($this->_deleted));
  447. } else {
  448. return count($this->_deleted);
  449. }
  450. }
  451. /**
  452. * Returns the total number of non-deleted documents in this segment.
  453. *
  454. * @return integer
  455. */
  456. public function numDocs()
  457. {
  458. if ($this->hasDeletions()) {
  459. return $this->_docCount - $this->_deletedCount();
  460. } else {
  461. return $this->_docCount;
  462. }
  463. }
  464. /**
  465. * Get field position in a fields dictionary
  466. *
  467. * @param integer $fieldNum
  468. * @return integer
  469. */
  470. private function _getFieldPosition($fieldNum) {
  471. // Treat values which are not in a translation table as a 'direct value'
  472. return isset($this->_fieldsDicPositions[$fieldNum]) ?
  473. $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
  474. }
  475. /**
  476. * Return segment name
  477. *
  478. * @return string
  479. */
  480. public function getName()
  481. {
  482. return $this->_name;
  483. }
  484. /**
  485. * TermInfo cache
  486. *
  487. * Size is 1024.
  488. * Numbers are used instead of class constants because of performance considerations
  489. *
  490. * @var array
  491. */
  492. private $_termInfoCache = array();
  493. private function _cleanUpTermInfoCache()
  494. {
  495. // Clean 256 term infos
  496. foreach ($this->_termInfoCache as $key => $termInfo) {
  497. unset($this->_termInfoCache[$key]);
  498. // leave 768 last used term infos
  499. if (count($this->_termInfoCache) == 768) {
  500. break;
  501. }
  502. }
  503. }
  504. /**
  505. * Load terms dictionary index
  506. *
  507. * @throws Zend_Search_Lucene_Exception
  508. */
  509. private function _loadDictionaryIndex()
  510. {
  511. // Check, if index is already serialized
  512. if ($this->_directory->fileExists($this->_name . '.sti')) {
  513. // Load serialized dictionary index data
  514. $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
  515. $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
  516. // Load dictionary index data
  517. if (($unserializedData = @unserialize($stiFileData)) !== false) {
  518. list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
  519. return;
  520. }
  521. }
  522. // Load data from .tii file and generate .sti file
  523. // Prefetch dictionary index data
  524. $tiiFile = $this->openCompoundFile('.tii');
  525. $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
  526. // Load dictionary index data
  527. list($this->_termDictionary, $this->_termDictionaryInfos) =
  528. Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
  529. $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
  530. $stiFile = $this->_directory->createFile($this->_name . '.sti');
  531. $stiFile->writeBytes($stiFileData);
  532. }
  533. /**
  534. * Scans terms dictionary and returns term info
  535. *
  536. * @param Zend_Search_Lucene_Index_Term $term
  537. * @return Zend_Search_Lucene_Index_TermInfo
  538. */
  539. public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
  540. {
  541. $termKey = $term->key();
  542. if (isset($this->_termInfoCache[$termKey])) {
  543. $termInfo = $this->_termInfoCache[$termKey];
  544. // Move termInfo to the end of cache
  545. unset($this->_termInfoCache[$termKey]);
  546. $this->_termInfoCache[$termKey] = $termInfo;
  547. return $termInfo;
  548. }
  549. if ($this->_termDictionary === null) {
  550. $this->_loadDictionaryIndex();
  551. }
  552. $searchField = $this->getFieldNum($term->field);
  553. if ($searchField == -1) {
  554. return null;
  555. }
  556. $searchDicField = $this->_getFieldPosition($searchField);
  557. // search for appropriate value in dictionary
  558. $lowIndex = 0;
  559. $highIndex = count($this->_termDictionary)-1;
  560. while ($highIndex >= $lowIndex) {
  561. // $mid = ($highIndex - $lowIndex)/2;
  562. $mid = ($highIndex + $lowIndex) >> 1;
  563. $midTerm = $this->_termDictionary[$mid];
  564. $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
  565. $delta = $searchDicField - $fieldNum;
  566. if ($delta == 0) {
  567. $delta = strcmp($term->text, $midTerm[1] /* text */);
  568. }
  569. if ($delta < 0) {
  570. $highIndex = $mid-1;
  571. } elseif ($delta > 0) {
  572. $lowIndex = $mid+1;
  573. } else {
  574. // return $this->_termDictionaryInfos[$mid]; // We got it!
  575. $a = $this->_termDictionaryInfos[$mid];
  576. $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
  577. // Put loaded termInfo into cache
  578. $this->_termInfoCache[$termKey] = $termInfo;
  579. return $termInfo;
  580. }
  581. }
  582. if ($highIndex == -1) {
  583. // Term is out of the dictionary range
  584. return null;
  585. }
  586. $prevPosition = $highIndex;
  587. $prevTerm = $this->_termDictionary[$prevPosition];
  588. $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
  589. $tisFile = $this->openCompoundFile('.tis');
  590. $tiVersion = $tisFile->readInt();
  591. if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
  592. $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
  593. throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
  594. }
  595. $termCount = $tisFile->readLong();
  596. $indexInterval = $tisFile->readInt();
  597. $skipInterval = $tisFile->readInt();
  598. if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
  599. $maxSkipLevels = $tisFile->readInt();
  600. }
  601. $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
  602. $termValue = $prevTerm[1] /* text */;
  603. $termFieldNum = $prevTerm[0] /* field */;
  604. $freqPointer = $prevTermInfo[1] /* freqPointer */;
  605. $proxPointer = $prevTermInfo[2] /* proxPointer */;
  606. for ($count = $prevPosition*$indexInterval + 1;
  607. $count <= $termCount &&
  608. ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
  609. ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
  610. strcmp($termValue, $term->text) < 0) );
  611. $count++) {
  612. $termPrefixLength = $tisFile->readVInt();
  613. $termSuffix = $tisFile->readString();
  614. $termFieldNum = $tisFile->readVInt();
  615. $termValue = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
  616. $docFreq = $tisFile->readVInt();
  617. $freqPointer += $tisFile->readVInt();
  618. $proxPointer += $tisFile->readVInt();
  619. if( $docFreq >= $skipInterval ) {
  620. $skipOffset = $tisFile->readVInt();
  621. } else {
  622. $skipOffset = 0;
  623. }
  624. }
  625. if ($termFieldNum == $searchField && $termValue == $term->text) {
  626. $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
  627. } else {
  628. $termInfo = null;
  629. }
  630. // Put loaded termInfo into cache
  631. $this->_termInfoCache[$termKey] = $termInfo;
  632. if (count($this->_termInfoCache) == 1024) {
  633. $this->_cleanUpTermInfoCache();
  634. }
  635. return $termInfo;
  636. }
  637. /**
  638. * Returns term freqs array.
  639. * Result array structure: array(docId => freq, ...)
  640. *
  641. * @param Zend_Search_Lucene_Index_Term $term
  642. * @param integer $shift
  643. * @return Zend_Search_Lucene_Index_TermInfo
  644. */
  645. public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0)
  646. {
  647. $termInfo = $this->getTermInfo($term);
  648. if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
  649. return array();
  650. }
  651. $frqFile = $this->openCompoundFile('.frq');
  652. $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
  653. $result = array();
  654. $docId = 0;
  655. for ($count = 0; $count < $termInfo->docFreq; $count++) {
  656. $docDelta = $frqFile->readVInt();
  657. if ($docDelta % 2 == 1) {
  658. $docId += ($docDelta-1)/2;
  659. $result[$shift + $docId] = 1;
  660. } else {
  661. $docId += $docDelta/2;
  662. $result[$shift + $docId] = $frqFile->readVInt();
  663. }
  664. }
  665. return $result;
  666. }
  667. /**
  668. * Returns term positions array.
  669. * Result array structure: array(docId => array(pos1, pos2, ...), ...)
  670. *
  671. * @param Zend_Search_Lucene_Index_Term $term
  672. * @param integer $shift
  673. * @return Zend_Search_Lucene_Index_TermInfo
  674. */
  675. public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0)
  676. {
  677. $termInfo = $this->getTermInfo($term);
  678. if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
  679. return array();
  680. }
  681. $frqFile = $this->openCompoundFile('.frq');
  682. $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
  683. $freqs = array();
  684. $docId = 0;
  685. for ($count = 0; $count < $termInfo->docFreq; $count++) {
  686. $docDelta = $frqFile->readVInt();
  687. if ($docDelta % 2 == 1) {
  688. $docId += ($docDelta-1)/2;
  689. $freqs[$docId] = 1;
  690. } else {
  691. $docId += $docDelta/2;
  692. $freqs[$docId] = $frqFile->readVInt();
  693. }
  694. }
  695. $result = array();
  696. $prxFile = $this->openCompoundFile('.prx');
  697. $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
  698. foreach ($freqs as $docId => $freq) {
  699. $termPosition = 0;
  700. $positions = array();
  701. for ($count = 0; $count < $freq; $count++ ) {
  702. $termPosition += $prxFile->readVInt();
  703. $positions[] = $termPosition;
  704. }
  705. $result[$shift + $docId] = $positions;
  706. }
  707. return $result;
  708. }
  709. /**
  710. * Load normalizatin factors from an index file
  711. *
  712. * @param integer $fieldNum
  713. * @throws Zend_Search_Lucene_Exception
  714. */
  715. private function _loadNorm($fieldNum)
  716. {
  717. if ($this->_hasSingleNormFile) {
  718. $normfFile = $this->openCompoundFile('.nrm');
  719. $header = $normfFile->readBytes(3);
  720. $headerFormatVersion = $normfFile->readByte();
  721. if ($header != 'NRM' || $headerFormatVersion != (int)0xFF) {
  722. throw new Zend_Search_Lucene_Exception('Wrong norms file format.');
  723. }
  724. foreach ($this->_fields as $fieldNum => $fieldInfo) {
  725. if ($fieldInfo->isIndexed) {
  726. $this->_norms[$fieldNum] = $normfFile->readBytes($this->_docCount);
  727. }
  728. }
  729. } else {
  730. $fFile = $this->openCompoundFile('.f' . $fieldNum);
  731. $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
  732. }
  733. }
  734. /**
  735. * Returns normalization factor for specified documents
  736. *
  737. * @param integer $id
  738. * @param string $fieldName
  739. * @return float
  740. */
  741. public function norm($id, $fieldName)
  742. {
  743. $fieldNum = $this->getFieldNum($fieldName);
  744. if ( !($this->_fields[$fieldNum]->isIndexed) ) {
  745. return null;
  746. }
  747. if (!isset($this->_norms[$fieldNum])) {
  748. $this->_loadNorm($fieldNum);
  749. }
  750. return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) );
  751. }
  752. /**
  753. * Returns norm vector, encoded in a byte string
  754. *
  755. * @param string $fieldName
  756. * @return string
  757. */
  758. public function normVector($fieldName)
  759. {
  760. $fieldNum = $this->getFieldNum($fieldName);
  761. if ($fieldNum == -1 || !($this->_fields[$fieldNum]->isIndexed)) {
  762. $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
  763. return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
  764. $this->_docCount);
  765. }
  766. if (!isset($this->_norms[$fieldNum])) {
  767. $this->_loadNorm($fieldNum);
  768. }
  769. return $this->_norms[$fieldNum];
  770. }
  771. /**
  772. * Returns true if any documents have been deleted from this index segment.
  773. *
  774. * @return boolean
  775. */
  776. public function hasDeletions()
  777. {
  778. return $this->_deleted !== null;
  779. }
  780. /**
  781. * Returns true if segment has single norms file.
  782. *
  783. * @return boolean
  784. */
  785. public function hasSingleNormFile()
  786. {
  787. return $this->_hasSingleNormFile ? 1 : 0;
  788. }
  789. /**
  790. * Returns true if segment is stored using compound segment file.
  791. *
  792. * @return boolean
  793. */
  794. public function isCompound()
  795. {
  796. return $this->_isCompound;
  797. }
  798. /**
  799. * Deletes a document from the index segment.
  800. * $id is an internal document id
  801. *
  802. * @param integer
  803. */
  804. public function delete($id)
  805. {
  806. $this->_deletedDirty = true;
  807. if (extension_loaded('bitset')) {
  808. if ($this->_deleted === null) {
  809. $this->_deleted = bitset_empty($id);
  810. }
  811. bitset_incl($this->_deleted, $id);
  812. } else {
  813. if ($this->_deleted === null) {
  814. $this->_deleted = array();
  815. }
  816. $this->_deleted[$id] = 1;
  817. }
  818. }
  819. /**
  820. * Checks, that document is deleted
  821. *
  822. * @param integer
  823. * @return boolean
  824. */
  825. public function isDeleted($id)
  826. {
  827. if ($this->_deleted === null) {
  828. return false;
  829. }
  830. if (extension_loaded('bitset')) {
  831. return bitset_in($this->_deleted, $id);
  832. } else {
  833. return isset($this->_deleted[$id]);
  834. }
  835. }
  836. /**
  837. * Detect latest delete generation
  838. *
  839. * Is actualy used from writeChanges() method or from the constructor if it's invoked from
  840. * Index writer. In both cases index write lock is already obtained, so we shouldn't care
  841. * about it
  842. */
  843. private function _detectLatestDelGen()
  844. {
  845. $delFileList = array();
  846. foreach ($this->_directory->fileList() as $file) {
  847. if ($file == $this->_name . '.del') {
  848. // Matches <segment_name>.del file name
  849. $delFileList[] = 0;
  850. } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
  851. // Matches <segment_name>_NNN.del file names
  852. $delFileList[] = (int)base_convert($matches[1], 36, 10);
  853. }
  854. }
  855. if (count($delFileList) == 0) {
  856. // There is no deletions file for current segment in the directory
  857. // Set detetions file generation number to 1
  858. $this->_delGen = -1;
  859. } else {
  860. // There are some deletions files for current segment in the directory
  861. // Set deletions file generation number to the highest nuber
  862. $this->_delGen = max($delFileList);
  863. }
  864. }
  865. /**
  866. * Write changes if it's necessary.
  867. *
  868. * This method must be invoked only from the Writer _updateSegments() method,
  869. * so index Write lock has to be already obtained.
  870. *
  871. * @internal
  872. */
  873. public function writeChanges()
  874. {
  875. if (!$this->_deletedDirty) {
  876. return;
  877. }
  878. if (extension_loaded('bitset')) {
  879. $delBytes = $this->_deleted;
  880. $bitCount = count(bitset_to_array($delBytes));
  881. } else {
  882. $byteCount = floor($this->_docCount/8)+1;
  883. $delBytes = str_repeat(chr(0), $byteCount);
  884. for ($count = 0; $count < $byteCount; $count++) {
  885. $byte = 0;
  886. for ($bit = 0; $bit < 8; $bit++) {
  887. if (isset($this->_deleted[$count*8 + $bit])) {
  888. $byte |= (1<<$bit);
  889. }
  890. }
  891. $delBytes{$count} = chr($byte);
  892. }
  893. $bitCount = count($this->_deleted);
  894. }
  895. // Get new generation number
  896. $this->_detectLatestDelGen();
  897. if ($this->_delGen == -1) {
  898. // Set delete file generation number to 1
  899. $this->_delGen = 1;
  900. } else {
  901. // Increase delete file generation number by 1
  902. $this->_delGen++;
  903. }
  904. $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
  905. $delFile->writeInt($this->_docCount);
  906. $delFile->writeInt($bitCount);
  907. $delFile->writeBytes($delBytes);
  908. $this->_deletedDirty = false;
  909. }
  910. /**
  911. * Term Dictionary File object for stream like terms reading
  912. *
  913. * @var Zend_Search_Lucene_Storage_File
  914. */
  915. private $_tisFile = null;
  916. /**
  917. * Actual offset of the .tis file data
  918. *
  919. * @var integer
  920. */
  921. private $_tisFileOffset;
  922. /**
  923. * Frequencies File object for stream like terms reading
  924. *
  925. * @var Zend_Search_Lucene_Storage_File
  926. */
  927. private $_frqFile = null;
  928. /**
  929. * Actual offset of the .frq file data
  930. *
  931. * @var integer
  932. */
  933. private $_frqFileOffset;
  934. /**
  935. * Positions File object for stream like terms reading
  936. *
  937. * @var Zend_Search_Lucene_Storage_File
  938. */
  939. private $_prxFile = null;
  940. /**
  941. * Actual offset of the .prx file in the compound file
  942. *
  943. * @var integer
  944. */
  945. private $_prxFileOffset;
  946. /**
  947. * Actual number of terms in term stream
  948. *
  949. * @var integer
  950. */
  951. private $_termCount = 0;
  952. /**
  953. * Overall number of terms in term stream
  954. *
  955. * @var integer
  956. */
  957. private $_termNum = 0;
  958. /**
  959. * Segment index interval
  960. *
  961. * @var integer
  962. */
  963. private $_indexInterval;
  964. /**
  965. * Segment skip interval
  966. *
  967. * @var integer
  968. */
  969. private $_skipInterval;
  970. /**
  971. * Last TermInfo in a terms stream
  972. *
  973. * @var Zend_Search_Lucene_Index_TermInfo
  974. */
  975. private $_lastTermInfo = null;
  976. /**
  977. * Last Term in a terms stream
  978. *
  979. * @var Zend_Search_Lucene_Index_Term
  980. */
  981. private $_lastTerm = null;
  982. /**
  983. * Map of the document IDs
  984. * Used to get new docID after removing deleted documents.
  985. * It's not very effective from memory usage point of view,
  986. * but much more faster, then other methods
  987. *
  988. * @var array|null
  989. */
  990. private $_docMap = null;
  991. /**
  992. * An array of all term positions in the documents.
  993. * Array structure: array( docId => array( pos1, pos2, ...), ...)
  994. *
  995. * Is set to null if term positions loading has to be skipped
  996. *
  997. * @var array|null
  998. */
  999. private $_lastTermPositions;
  1000. /**
  1001. * Terms scan mode
  1002. *
  1003. * Values:
  1004. *
  1005. * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
  1006. * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
  1007. * document numbers are compacted (shifted if segment has deleted documents)
  1008. *
  1009. * @var integer
  1010. */
  1011. private $_termsScanMode;
  1012. /** Scan modes */
  1013. const SM_TERMS_ONLY = 0; // terms are scanned, no additional info is retrieved
  1014. const SM_FULL_INFO = 1; // terms are scanned, frequency and position info is retrieved
  1015. const SM_MERGE_INFO = 2; // terms are scanned, frequency and position info is retrieved
  1016. // document numbers are compacted (shifted if segment contains deleted documents)
  1017. /**
  1018. * Reset terms stream
  1019. *
  1020. * $startId - id for the fist document
  1021. * $compact - remove deleted documents
  1022. *
  1023. * Returns start document id for the next segment
  1024. *
  1025. * @param integer $startId
  1026. * @param integer $mode
  1027. * @throws Zend_Search_Lucene_Exception
  1028. * @return integer
  1029. */
  1030. public function reset($startId = 0, $mode = self::SM_TERMS_ONLY)
  1031. {
  1032. if ($this->_tisFile !== null) {
  1033. $this->_tisFile = null;
  1034. }
  1035. $this->_tisFile = $this->openCompoundFile('.tis', false);
  1036. $this->_tisFileOffset = $this->_tisFile->tell();
  1037. $tiVersion = $this->_tisFile->readInt();
  1038. if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
  1039. $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
  1040. throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
  1041. }
  1042. $this->_termCount =
  1043. $this->_termNum = $this->_tisFile->readLong(); // Read terms count
  1044. $this->_indexInterval = $this->_tisFile->readInt(); // Read Index interval
  1045. $this->_skipInterval = $this->_tisFile->readInt(); // Read skip interval
  1046. if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
  1047. $maxSkipLevels = $this->_tisFile->readInt();
  1048. }
  1049. if ($this->_frqFile !== null) {
  1050. $this->_frqFile = null;
  1051. }
  1052. if ($this->_prxFile !== null) {
  1053. $this->_prxFile = null;
  1054. }
  1055. $this->_docMap = array();
  1056. $this->_lastTerm = new Zend_Search_Lucene_Index_Term('', -1);
  1057. $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
  1058. $this->_lastTermPositions = null;
  1059. $this->_termsScanMode = $mode;
  1060. switch ($mode) {
  1061. case self::SM_TERMS_ONLY:
  1062. // Do nothing
  1063. break;
  1064. case self::SM_FULL_INFO:
  1065. // break intentionally omitted
  1066. case self::SM_MERGE_INFO:
  1067. $this->_frqFile = $this->openCompoundFile('.frq', false);
  1068. $this->_frqFileOffset = $this->_frqFile->tell();
  1069. $this->_prxFile = $this->openCompoundFile('.prx', false);
  1070. $this->_prxFileOffset = $this->_prxFile->tell();
  1071. for ($count = 0; $count < $this->_docCount; $count++) {
  1072. if (!$this->isDeleted($count)) {
  1073. $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
  1074. }
  1075. }
  1076. break;
  1077. default:
  1078. throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
  1079. break;
  1080. }
  1081. $this->nextTerm();
  1082. return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
  1083. }
  1084. /**
  1085. * Skip terms stream up to specified term preffix.
  1086. *
  1087. * Prefix contains fully specified field info and portion of searched term
  1088. *
  1089. * @param Zend_Search_Lucene_Index_Term $prefix
  1090. * @throws Zend_Search_Lucene_Exception
  1091. */
  1092. public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
  1093. {
  1094. if ($this->_termDictionary === null) {
  1095. $this->_loadDictionaryIndex();
  1096. }
  1097. $searchField = $this->getFieldNum($prefix->field);
  1098. if ($searchField == -1) {
  1099. /**
  1100. * Field is not presented in this segment
  1101. * Go to the end of dictionary
  1102. */
  1103. $this->_tisFile = null;
  1104. $this->_frqFile = null;
  1105. $this->_prxFile = null;
  1106. $this->_lastTerm = null;
  1107. $this->_lastTermInfo = null;
  1108. $this->_lastTermPositions = null;
  1109. return;
  1110. }
  1111. $searchDicField = $this->_getFieldPosition($searchField);
  1112. // search for appropriate value in dictionary
  1113. $lowIndex = 0;
  1114. $highIndex = count($this->_termDictionary)-1;
  1115. while ($highIndex >= $lowIndex) {
  1116. // $mid = ($highIndex - $lowIndex)/2;
  1117. $mid = ($highIndex + $lowIndex) >> 1;
  1118. $midTerm = $this->_termDictionary[$mid];
  1119. $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
  1120. $delta = $searchDicField - $fieldNum;
  1121. if ($delta == 0) {
  1122. $delta = strcmp($prefix->text, $midTerm[1] /* text */);
  1123. }
  1124. if ($delta < 0) {
  1125. $highIndex = $mid-1;
  1126. } elseif ($delta > 0) {
  1127. $lowIndex = $mid+1;
  1128. } else {
  1129. // We have reached term we are looking for
  1130. break;
  1131. }
  1132. }
  1133. if ($highIndex == -1) {
  1134. // Term is out of the dictionary range
  1135. $this->_tisFile = null;
  1136. $this->_frqFile = null;
  1137. $this->_prxFile = null;
  1138. $this->_lastTerm = null;
  1139. $this->_lastTermInfo = null;
  1140. $this->_lastTermPositions = null;
  1141. return;
  1142. }
  1143. $prevPosition = $highIndex;
  1144. $prevTerm = $this->_termDictionary[$prevPosition];
  1145. $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
  1146. if ($this->_tisFile === null) {
  1147. // The end of terms stream is reached and terms dictionary file is closed
  1148. // Perform mini-reset operation
  1149. $this->_tisFile = $this->openCompoundFile('.tis', false);
  1150. if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
  1151. $this->_frqFile = $this->openCompoundFile('.frq', false);
  1152. $this->_prxFile = $this->openCompoundFile('.prx', false);
  1153. }
  1154. }
  1155. $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
  1156. $this->_lastTerm = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
  1157. ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
  1158. $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
  1159. $prevTermInfo[1] /* freqPointer */,
  1160. $prevTermInfo[2] /* proxPointer */,
  1161. $prevTermInfo[3] /* skipOffset */);
  1162. $this->_termCount = $this->_termNum - $prevPosition*$this->_indexInterval;
  1163. if ($highIndex == 0) {
  1164. // skip start entry
  1165. $this->nextTerm();
  1166. } else if ($prefix->field == $this->_lastTerm->field && $prefix->text == $this->_lastTerm->text) {
  1167. // We got exact match in the dictionary index
  1168. if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
  1169. $this->_lastTermPositions = array();
  1170. $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
  1171. $freqs = array(); $docId = 0;
  1172. for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
  1173. $docDelta = $this->_frqFile->readVInt();
  1174. if( $docDelta % 2 == 1 ) {
  1175. $docId += ($docDelta-1)/2;
  1176. $freqs[ $docId ] = 1;
  1177. } else {
  1178. $docId += $docDelta/2;
  1179. $freqs[ $docId ] = $this->_frqFile->readVInt();
  1180. }
  1181. }
  1182. $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
  1183. foreach ($freqs as $docId => $freq) {
  1184. $termPosition = 0; $positions = array();
  1185. for ($count = 0; $count < $freq; $count++ ) {
  1186. $termPosition += $this->_prxFile->readVInt();
  1187. $positions[] = $termPosition;
  1188. }
  1189. if (isset($this->_docMap[$docId])) {
  1190. $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
  1191. }
  1192. }
  1193. }
  1194. return;
  1195. }
  1196. // Search term matching specified prefix
  1197. while ($this->_lastTerm !== null) {
  1198. if ( strcmp($this->_lastTerm->field, $prefix->field) > 0 ||
  1199. ($prefix->field == $this->_lastTerm->field && strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
  1200. // Current term matches or greate than the pattern
  1201. return;
  1202. }
  1203. $this->nextTerm();
  1204. }
  1205. }
  1206. /**
  1207. * Scans terms dictionary and returns next term
  1208. *
  1209. * @return Zend_Search_Lucene_Index_Term|null
  1210. */
  1211. public function nextTerm()
  1212. {
  1213. if ($this->_tisFile === null || $this->_termCount == 0) {
  1214. $this->_lastTerm = null;
  1215. $this->_lastTermInfo = null;
  1216. $this->_lastTermPositions = null;
  1217. $this->_docMap = null;
  1218. // may be necessary for "empty" segment
  1219. $this->_tisFile = null;
  1220. $this->_frqFile = null;
  1221. $this->_prxFile = null;
  1222. return null;
  1223. }
  1224. $termPrefixLength = $this->_tisFile->readVInt();
  1225. $termSuffix = $this->_tisFile->readString();
  1226. $termFieldNum = $this->_tisFile->readVInt();
  1227. $termValue = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;
  1228. $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);
  1229. $docFreq = $this->_tisFile->readVInt();
  1230. $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();
  1231. $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();
  1232. if ($docFreq >= $this->_skipInterval) {
  1233. $skipOffset = $this->_tisFile->readVInt();
  1234. } else {
  1235. $skipOffset = 0;
  1236. }
  1237. $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
  1238. if ($this->_termsScanMode == self::SM_FULL_INFO || $this->_termsScanMode == self::SM_MERGE_INFO) {
  1239. $this->_lastTermPositions = array();
  1240. $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
  1241. $freqs = array(); $docId = 0;
  1242. for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
  1243. $docDelta = $this->_frqFile->readVInt();
  1244. if( $docDelta % 2 == 1 ) {
  1245. $docId += ($docDelta-1)/2;
  1246. $freqs[ $docId ] = 1;
  1247. } else {
  1248. $docId += $docDelta/2;
  1249. $freqs[ $docId ] = $this->_frqFile->readVInt();
  1250. }
  1251. }
  1252. $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
  1253. foreach ($freqs as $docId => $freq) {
  1254. $termPosition = 0; $positions = array();
  1255. for ($count = 0; $count < $freq; $count++ ) {
  1256. $termPosition += $this->_prxFile->readVInt();
  1257. $positions[] = $termPosition;
  1258. }
  1259. if (isset($this->_docMap[$docId])) {
  1260. $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
  1261. }
  1262. }
  1263. }
  1264. $this->_termCount--;
  1265. if ($this->_termCount == 0) {
  1266. $this->_tisFile = null;
  1267. $this->_frqFile = null;
  1268. $this->_prxFile = null;
  1269. }
  1270. return $this->_lastTerm;
  1271. }
  1272. /**
  1273. * Close terms stream
  1274. *
  1275. * Should be used for resources clean up if stream is not read up to the end
  1276. */
  1277. public function closeTermsStream()
  1278. {
  1279. $this->_tisFile = null;
  1280. $this->_frqFile = null;
  1281. $this->_prxFile = null;
  1282. $this->_lastTerm = null;
  1283. $this->_lastTermInfo = null;
  1284. $this->_lastTermPositions = null;
  1285. $this->_docMap = null;
  1286. }
  1287. /**
  1288. * Returns term in current position
  1289. *
  1290. * @return Zend_Search_Lucene_Index_Term|null
  1291. */
  1292. public function currentTerm()
  1293. {
  1294. return $this->_lastTerm;
  1295. }
  1296. /**
  1297. * Returns an array of all term positions in the documents.
  1298. * Return array structure: array( docId => array( pos1, pos2, ...), ...)
  1299. *
  1300. * @return array
  1301. */
  1302. public function currentTermPositions()
  1303. {
  1304. return $this->_lastTermPositions;
  1305. }
  1306. }