PageRenderTime 215ms CodeModel.GetById 61ms app.highlight 83ms RepoModel.GetById 57ms app.codeStats 1ms

/library/Zend/Search/Lucene/Index/SegmentInfo.php

https://bitbucket.org/baruffaldi/website-2008-computer-shopping-3
PHP | 1651 lines | 877 code | 251 blank | 523 comment | 234 complexity | 33ea5b6ddeadd90f24aa39ed7c8cacfa MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1<?php
   2/**
   3 * Zend Framework
   4 *
   5 * LICENSE
   6 *
   7 * This source file is subject to the new BSD license that is bundled
   8 * with this package in the file LICENSE.txt.
   9 * It is also available through the world-wide-web at this URL:
  10 * http://framework.zend.com/license/new-bsd
  11 * If you did not receive a copy of the license and are unable to
  12 * obtain it through the world-wide-web, please send an email
  13 * to license@zend.com so we can send you a copy immediately.
  14 *
  15 * @category   Zend
  16 * @package    Zend_Search_Lucene
  17 * @subpackage Index
  18 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  19 * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20 */
  21
  22/** Zend_Search_Lucene_Index_DictionaryLoader */
  23require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
  24
  25
  26/** Zend_Search_Lucene_Exception */
  27require_once 'Zend/Search/Lucene/Exception.php';
  28
  29/** Zend_Search_Lucene_LockManager */
  30require_once 'Zend/Search/Lucene/LockManager.php';
  31
  32
  33/**
  34 * @category   Zend
  35 * @package    Zend_Search_Lucene
  36 * @subpackage Index
  37 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
  38 * @license    http://framework.zend.com/license/new-bsd     New BSD License
  39 */
  40class Zend_Search_Lucene_Index_SegmentInfo
  41{
  42    /**
  43     * Number of docs in a segment
  44     *
  45     * @var integer
  46     */
  47    private $_docCount;
  48
  49    /**
  50     * Segment name
  51     *
  52     * @var string
  53     */
  54    private $_name;
  55
  56    /**
  57     * Term Dictionary Index
  58     *
  59     * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
  60     * of performance considerations)
  61     * [0] -> $termValue
  62     * [1] -> $termFieldNum
  63     *
  64     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
  65     *
  66     * @var array
  67     */
  68    private $_termDictionary;
  69
  70    /**
  71     * Term Dictionary Index TermInfos
  72     *
  73     * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
  74     * of performance considerations)
  75     * [0] -> $docFreq
  76     * [1] -> $freqPointer
  77     * [2] -> $proxPointer
  78     * [3] -> $skipOffset
  79     * [4] -> $indexPointer
  80     *
  81     * @var array
  82     */
  83    private $_termDictionaryInfos;
  84
  85    /**
  86     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
  87     *
  88     * @var array
  89     */
  90    private $_fields;
  91
  92    /**
  93     * Field positions in a dictionary.
  94     * (Term dictionary contains filelds ordered by names)
  95     *
  96     * @var array
  97     */
  98    private $_fieldsDicPositions;
  99
 100
 101    /**
 102     * Associative array where the key is the file name and the value is data offset
 103     * in a compound segment file (.csf).
 104     *
 105     * @var array
 106     */
 107    private $_segFiles;
 108
 109    /**
 110     * Associative array where the key is the file name and the value is file size (.csf).
 111     *
 112     * @var array
 113     */
 114    private $_segFileSizes;
 115
 116    /**
 117     * Delete file generation number
 118     *
 119     * -2 means autodetect latest delete generation
 120     * -1 means 'there is no delete file'
 121     *  0 means pre-2.1 format delete file
 122     *  X specifies used delete file
 123     *
 124     * @var integer
 125     */
 126    private $_delGen;
 127
 128    /**
 129     * Segment has single norms file
 130     *
 131     * If true then one .nrm file is used for all fields
 132     * Otherwise .fN files are used
 133     *
 134     * @var boolean
 135     */
 136    private $_hasSingleNormFile;
 137
 138    /**
 139     * Use compound segment file (*.cfs) to collect all other segment files
 140     * (excluding .del files)
 141     *
 142     * @var boolean
 143     */
 144    private $_isCompound;
 145
 146
 147    /**
 148     * File system adapter.
 149     *
 150     * @var Zend_Search_Lucene_Storage_Directory_Filesystem
 151     */
 152    private $_directory;
 153
 154    /**
 155     * Normalization factors.
 156     * An array fieldName => normVector
 157     * normVector is a binary string.
 158     * Each byte corresponds to an indexed document in a segment and
 159     * encodes normalization factor (float value, encoded by
 160     * Zend_Search_Lucene_Search_Similarity::encodeNorm())
 161     *
 162     * @var array
 163     */
 164    private $_norms = array();
 165
 166    /**
 167     * List of deleted documents.
 168     * bitset if bitset extension is loaded or array otherwise.
 169     *
 170     * @var mixed
 171     */
 172    private $_deleted = null;
 173
 174    /**
 175     * $this->_deleted update flag
 176     *
 177     * @var boolean
 178     */
 179    private $_deletedDirty = false;
 180
 181    /**
 182     * True if segment uses shared doc store
 183     *
 184     * @var boolean
 185     */
 186    private $_usesSharedDocStore;
 187
 188    /*
 189     * Shared doc store options.
 190     * It's an assotiative array with the following items:
 191     * - 'offset'     => $docStoreOffset           The starting document in the shared doc store files where this segment's documents begin
 192     * - 'segment'    => $docStoreSegment          The name of the segment that has the shared doc store files.
 193     * - 'isCompound' => $docStoreIsCompoundFile   True, if compound file format is used for the shared doc store files (.cfx file).
 194     */
 195    private $_sharedDocStoreOptions;
 196
 197
 198    /**
 199     * Zend_Search_Lucene_Index_SegmentInfo constructor
 200     *
 201     * @param Zend_Search_Lucene_Storage_Directory $directory
 202     * @param string     $name
 203     * @param integer    $docCount
 204     * @param integer    $delGen
 205     * @param array|null $docStoreOptions
 206     * @param boolean    $hasSingleNormFile
 207     * @param boolean    $isCompound
 208     */
 209    public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null)
 210    {
 211        $this->_directory = $directory;
 212        $this->_name      = $name;
 213        $this->_docCount  = $docCount;
 214
 215        if ($docStoreOptions !== null) {
 216        	$this->_usesSharedDocStore    = true;
 217        	$this->_sharedDocStoreOptions = $docStoreOptions;
 218
 219        	if ($docStoreOptions['isCompound']) {
 220        		$cfxFile       = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx');
 221                $cfxFilesCount = $cfxFile->readVInt();
 222
 223                $cfxFiles     = array();
 224                $cfxFileSizes = array();
 225
 226                for ($count = 0; $count < $cfxFilesCount; $count++) {
 227                    $dataOffset = $cfxFile->readLong();
 228                    if ($count != 0) {
 229                        $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles);
 230                    }
 231                    $fileName            = $cfxFile->readString();
 232                    $cfxFiles[$fileName] = $dataOffset;
 233                }
 234                if ($count != 0) {
 235                    $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset;
 236                }
 237
 238                $this->_sharedDocStoreOptions['files']     = $cfxFiles;
 239                $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes;
 240        	}
 241        }
 242
 243        $this->_hasSingleNormFile = $hasSingleNormFile;
 244        $this->_delGen            = $delGen;
 245        $this->_termDictionary    = null;
 246
 247
 248        if ($isCompound !== null) {
 249            $this->_isCompound    = $isCompound;
 250        } else {
 251            // It's a pre-2.1 segment or isCompound is set to 'unknown'
 252            // Detect if segment uses compound file
 253            try {
 254                // Try to open compound file
 255                $this->_directory->getFileObject($name . '.cfs');
 256
 257                // Compound file is found
 258                $this->_isCompound = true;
 259            } catch (Zend_Search_Lucene_Exception $e) {
 260                if (strpos($e->getMessage(), 'is not readable') !== false) {
 261                    // Compound file is not found or is not readable
 262                    $this->_isCompound = false;
 263                } else {
 264                    throw $e;
 265                }
 266            }
 267        }
 268
 269        $this->_segFiles = array();
 270        if ($this->_isCompound) {
 271            $cfsFile = $this->_directory->getFileObject($name . '.cfs');
 272            $segFilesCount = $cfsFile->readVInt();
 273
 274            for ($count = 0; $count < $segFilesCount; $count++) {
 275                $dataOffset = $cfsFile->readLong();
 276                if ($count != 0) {
 277                    $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
 278                }
 279                $fileName = $cfsFile->readString();
 280                $this->_segFiles[$fileName] = $dataOffset;
 281            }
 282            if ($count != 0) {
 283                $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
 284            }
 285        }
 286
 287        $fnmFile = $this->openCompoundFile('.fnm');
 288        $fieldsCount = $fnmFile->readVInt();
 289        $fieldNames = array();
 290        $fieldNums  = array();
 291        $this->_fields = array();
 292        for ($count=0; $count < $fieldsCount; $count++) {
 293            $fieldName = $fnmFile->readString();
 294            $fieldBits = $fnmFile->readByte();
 295            $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
 296                                                                            $fieldBits & 1,
 297                                                                            $count,
 298                                                                            $fieldBits & 2 );
 299            if ($fieldBits & 0x10) {
 300                // norms are omitted for the indexed field
 301                $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
 302            }
 303
 304            $fieldNums[$count]  = $count;
 305            $fieldNames[$count] = $fieldName;
 306        }
 307        array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
 308        $this->_fieldsDicPositions = array_flip($fieldNums);
 309
 310        if ($this->_delGen == -2) {
 311        	$this->_detectLatestDelGen();
 312        }
 313
 314        if ($this->_delGen == -1) {
 315            // There is no delete file for this segment
 316            // Do nothing
 317        } else if ($this->_delGen == 0) {
 318            // It's a segment with pre-2.1 format delete file
 319            // Try to find delete file
 320            try {
 321                // '.del' files always stored in a separate file
 322                // Segment compound is not used
 323                $delFile = $this->_directory->getFileObject($this->_name . '.del');
 324
 325                $byteCount = $delFile->readInt();
 326                $byteCount = ceil($byteCount/8);
 327                $bitCount  = $delFile->readInt();
 328
 329                if ($bitCount == 0) {
 330                    $delBytes = '';
 331                } else {
 332                    $delBytes = $delFile->readBytes($byteCount);
 333                }
 334
 335                if (extension_loaded('bitset')) {
 336                    $this->_deleted = $delBytes;
 337                } else {
 338                    $this->_deleted = array();
 339                    for ($count = 0; $count < $byteCount; $count++) {
 340                        $byte = ord($delBytes[$count]);
 341                        for ($bit = 0; $bit < 8; $bit++) {
 342                            if ($byte & (1<<$bit)) {
 343                                $this->_deleted[$count*8 + $bit] = 1;
 344                            }
 345                        }
 346                    }
 347                }
 348            } catch(Zend_Search_Exception $e) {
 349                if (strpos($e->getMessage(), 'is not readable') === false ) {
 350                    throw $e;
 351                }
 352                // There is no delete file
 353                // Do nothing
 354            }
 355        } else {
 356            // It's 2.1+ format delete file
 357            $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
 358
 359            $format = $delFile->readInt();
 360
 361            if ($format == (int)0xFFFFFFFF) {
 362                if (extension_loaded('bitset')) {
 363                    $this->_deleted = bitset_empty();
 364                } else {
 365                    $this->_deleted = array();
 366                }
 367
 368                $byteCount = $delFile->readInt();
 369                $bitCount  = $delFile->readInt();
 370
 371                $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
 372                $byteNum = 0;
 373
 374                do {
 375                    $dgap = $delFile->readVInt();
 376                    $nonZeroByte = $delFile->readByte();
 377
 378                    $byteNum += $dgap;
 379
 380                    for ($bit = 0; $bit < 8; $bit++) {
 381                        if ($nonZeroByte & (1<<$bit)) {
 382                            if (extension_loaded('bitset')) {
 383                                bitset_incl($this->_deleted, $byteNum*8 + $bit);
 384                            } else {
 385                                $this->_deleted[$byteNum*8 + $bit] = 1;
 386                            }
 387                        }
 388                    }
 389                } while ($delFile->tell() < $delFileSize);
 390
 391            } else {
 392                // $format is actually byte count
 393                $byteCount = ceil($format/8);
 394                $bitCount  = $delFile->readInt();
 395
 396                if ($bitCount == 0) {
 397                    $delBytes = '';
 398                } else {
 399                    $delBytes = $delFile->readBytes($byteCount);
 400                }
 401
 402                if (extension_loaded('bitset')) {
 403                    $this->_deleted = $delBytes;
 404                } else {
 405                    $this->_deleted = array();
 406                    for ($count = 0; $count < $byteCount; $count++) {
 407                        $byte = ord($delBytes[$count]);
 408                        for ($bit = 0; $bit < 8; $bit++) {
 409                            if ($byte & (1<<$bit)) {
 410                                $this->_deleted[$count*8 + $bit] = 1;
 411                            }
 412                        }
 413                    }
 414                }
 415            }
 416        }
 417    }
 418
 419    /**
 420     * Opens index file stoted within compound index file
 421     *
 422     * @param string $extension
 423     * @param boolean $shareHandler
 424     * @throws Zend_Search_Lucene_Exception
 425     * @return Zend_Search_Lucene_Storage_File
 426     */
 427    public function openCompoundFile($extension, $shareHandler = true)
 428    {
 429        if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
 430        	$fdxFName = $this->_sharedDocStoreOptions['segment'] . '.fdx';
 431            $fdtFName = $this->_sharedDocStoreOptions['segment'] . '.fdt';
 432
 433            if (!$this->_sharedDocStoreOptions['isCompound']) {
 434            	$fdxFile = $this->_directory->getFileObject($fdxFName, $shareHandler);
 435            	$fdxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
 436
 437            	if ($extension == '.fdx') {
 438            		// '.fdx' file is requested
 439            		return $fdxFile;
 440            	} else {
 441            		// '.fdt' file is requested
 442            		$fdtStartOffset = $fdxFile->readLong();
 443
 444                    $fdtFile = $this->_directory->getFileObject($fdtFName, $shareHandler);
 445                    $fdtFile->seek($fdtStartOffset, SEEK_CUR);
 446
 447                    return $fdtFile;
 448            	}
 449            }
 450
 451            if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
 452                throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
 453                                       . $fdxFName . ' file.' );
 454            }
 455            if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
 456                throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
 457                                       . $fdtFName . ' file.' );
 458            }
 459
 460            // Open shared docstore segment file
 461            $cfxFile = $this->_directory->getFileObject($this->_sharedDocStoreOptions['segment'] . '.cfx', $shareHandler);
 462            // Seek to the start of '.fdx' file within compound file
 463            $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdxFName]);
 464            // Seek to the start of current segment documents section
 465            $cfxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
 466
 467            if ($extension == '.fdx') {
 468                // '.fdx' file is requested
 469                return $cfxFile;
 470            } else {
 471                // '.fdt' file is requested
 472                $fdtStartOffset = $cfxFile->readLong();
 473
 474                // Seek to the start of '.fdt' file within compound file
 475                $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdtFName]);
 476                // Seek to the start of current segment documents section
 477                $cfxFile->seek($fdtStartOffset, SEEK_CUR);
 478
 479                return $fdtFile;
 480            }
 481        }
 482
 483        $filename = $this->_name . $extension;
 484
 485        if (!$this->_isCompound) {
 486            return $this->_directory->getFileObject($filename, $shareHandler);
 487        }
 488
 489        if( !isset($this->_segFiles[$filename]) ) {
 490            throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
 491                                       . $filename . ' file.' );
 492        }
 493
 494        $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
 495        $file->seek($this->_segFiles[$filename]);
 496        return $file;
 497    }
 498
 499    /**
 500     * Get compound file length
 501     *
 502     * @param string $extension
 503     * @return integer
 504     */
 505    public function compoundFileLength($extension)
 506    {
 507        if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
 508        	$filename = $this->_sharedDocStoreOptions['segment'] . $extension;
 509
 510            if (!$this->_sharedDocStoreOptions['isCompound']) {
 511            	return $this->_directory->fileLength($filename);
 512            }
 513
 514            if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
 515                throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
 516                                           . $filename . ' file.' );
 517            }
 518
 519            return $this->_sharedDocStoreOptions['fileSizes'][$filename];
 520        }
 521
 522
 523        $filename = $this->_name . $extension;
 524
 525        // Try to get common file first
 526        if ($this->_directory->fileExists($filename)) {
 527            return $this->_directory->fileLength($filename);
 528        }
 529
 530        if( !isset($this->_segFileSizes[$filename]) ) {
 531            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
 532                                       . $filename . ' file.' );
 533        }
 534
 535        return $this->_segFileSizes[$filename];
 536    }
 537
 538    /**
 539     * Returns field index or -1 if field is not found
 540     *
 541     * @param string $fieldName
 542     * @return integer
 543     */
 544    public function getFieldNum($fieldName)
 545    {
 546        foreach( $this->_fields as $field ) {
 547            if( $field->name == $fieldName ) {
 548                return $field->number;
 549            }
 550        }
 551
 552        return -1;
 553    }
 554
 555    /**
 556     * Returns field info for specified field
 557     *
 558     * @param integer $fieldNum
 559     * @return Zend_Search_Lucene_Index_FieldInfo
 560     */
 561    public function getField($fieldNum)
 562    {
 563        return $this->_fields[$fieldNum];
 564    }
 565
 566    /**
 567     * Returns array of fields.
 568     * if $indexed parameter is true, then returns only indexed fields.
 569     *
 570     * @param boolean $indexed
 571     * @return array
 572     */
 573    public function getFields($indexed = false)
 574    {
 575        $result = array();
 576        foreach( $this->_fields as $field ) {
 577            if( (!$indexed) || $field->isIndexed ) {
 578                $result[ $field->name ] = $field->name;
 579            }
 580        }
 581        return $result;
 582    }
 583
 584    /**
 585     * Returns array of FieldInfo objects.
 586     *
 587     * @return array
 588     */
 589    public function getFieldInfos()
 590    {
 591        return $this->_fields;
 592    }
 593
 594    /**
 595     * Returns actual deletions file generation number.
 596     *
 597     * @return integer
 598     */
 599    public function getDelGen()
 600    {
 601        return $this->_delGen;
 602    }
 603
 604    /**
 605     * Returns the total number of documents in this segment (including deleted documents).
 606     *
 607     * @return integer
 608     */
 609    public function count()
 610    {
 611        return $this->_docCount;
 612    }
 613
 614    /**
 615     * Returns number of deleted documents.
 616     *
 617     * @return integer
 618     */
 619    private function _deletedCount()
 620    {
 621        if ($this->_deleted === null) {
 622            return 0;
 623        }
 624
 625        if (extension_loaded('bitset')) {
 626            return count(bitset_to_array($this->_deleted));
 627        } else {
 628            return count($this->_deleted);
 629        }
 630    }
 631
 632    /**
 633     * Returns the total number of non-deleted documents in this segment.
 634     *
 635     * @return integer
 636     */
 637    public function numDocs()
 638    {
 639        if ($this->hasDeletions()) {
 640            return $this->_docCount - $this->_deletedCount();
 641        } else {
 642            return $this->_docCount;
 643        }
 644    }
 645
 646    /**
 647     * Get field position in a fields dictionary
 648     *
 649     * @param integer $fieldNum
 650     * @return integer
 651     */
 652    private function _getFieldPosition($fieldNum) {
 653        // Treat values which are not in a translation table as a 'direct value'
 654        return isset($this->_fieldsDicPositions[$fieldNum]) ?
 655                           $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
 656    }
 657
 658    /**
 659     * Return segment name
 660     *
 661     * @return string
 662     */
 663    public function getName()
 664    {
 665        return $this->_name;
 666    }
 667
 668
 669    /**
 670     * TermInfo cache
 671     *
 672     * Size is 1024.
 673     * Numbers are used instead of class constants because of performance considerations
 674     *
 675     * @var array
 676     */
 677    private $_termInfoCache = array();
 678
 679    private function _cleanUpTermInfoCache()
 680    {
 681        // Clean 256 term infos
 682        foreach ($this->_termInfoCache as $key => $termInfo) {
 683            unset($this->_termInfoCache[$key]);
 684
 685            // leave 768 last used term infos
 686            if (count($this->_termInfoCache) == 768) {
 687                break;
 688            }
 689        }
 690    }
 691
 692    /**
 693     * Load terms dictionary index
 694     *
 695     * @throws Zend_Search_Lucene_Exception
 696     */
 697    private function _loadDictionaryIndex()
 698    {
 699        // Check, if index is already serialized
 700        if ($this->_directory->fileExists($this->_name . '.sti')) {
 701            // Load serialized dictionary index data
 702            $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
 703            $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
 704
 705            // Load dictionary index data
 706            if (($unserializedData = @unserialize($stiFileData)) !== false) {
 707                list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
 708                return;
 709            }
 710        }
 711
 712        // Load data from .tii file and generate .sti file
 713
 714        // Prefetch dictionary index data
 715        $tiiFile = $this->openCompoundFile('.tii');
 716        $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
 717
 718        // Load dictionary index data
 719        list($this->_termDictionary, $this->_termDictionaryInfos) =
 720                    Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
 721
 722        $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
 723        $stiFile = $this->_directory->createFile($this->_name . '.sti');
 724        $stiFile->writeBytes($stiFileData);
 725    }
 726
 727    /**
 728     * Scans terms dictionary and returns term info
 729     *
 730     * @param Zend_Search_Lucene_Index_Term $term
 731     * @return Zend_Search_Lucene_Index_TermInfo
 732     */
 733    public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
 734    {
 735        $termKey = $term->key();
 736        if (isset($this->_termInfoCache[$termKey])) {
 737            $termInfo = $this->_termInfoCache[$termKey];
 738
 739            // Move termInfo to the end of cache
 740            unset($this->_termInfoCache[$termKey]);
 741            $this->_termInfoCache[$termKey] = $termInfo;
 742
 743            return $termInfo;
 744        }
 745
 746
 747        if ($this->_termDictionary === null) {
 748            $this->_loadDictionaryIndex();
 749        }
 750
 751        $searchField = $this->getFieldNum($term->field);
 752
 753        if ($searchField == -1) {
 754            return null;
 755        }
 756        $searchDicField = $this->_getFieldPosition($searchField);
 757
 758        // search for appropriate value in dictionary
 759        $lowIndex = 0;
 760        $highIndex = count($this->_termDictionary)-1;
 761        while ($highIndex >= $lowIndex) {
 762            // $mid = ($highIndex - $lowIndex)/2;
 763            $mid = ($highIndex + $lowIndex) >> 1;
 764            $midTerm = $this->_termDictionary[$mid];
 765
 766            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
 767            $delta = $searchDicField - $fieldNum;
 768            if ($delta == 0) {
 769                $delta = strcmp($term->text, $midTerm[1] /* text */);
 770            }
 771
 772            if ($delta < 0) {
 773                $highIndex = $mid-1;
 774            } elseif ($delta > 0) {
 775                $lowIndex  = $mid+1;
 776            } else {
 777                // return $this->_termDictionaryInfos[$mid]; // We got it!
 778                $a = $this->_termDictionaryInfos[$mid];
 779                $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
 780
 781                // Put loaded termInfo into cache
 782                $this->_termInfoCache[$termKey] = $termInfo;
 783
 784                return $termInfo;
 785            }
 786        }
 787
 788        if ($highIndex == -1) {
 789            // Term is out of the dictionary range
 790            return null;
 791        }
 792
 793        $prevPosition = $highIndex;
 794        $prevTerm = $this->_termDictionary[$prevPosition];
 795        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
 796
 797        $tisFile = $this->openCompoundFile('.tis');
 798        $tiVersion = $tisFile->readInt();
 799        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
 800            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
 801            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
 802        }
 803
 804        $termCount     = $tisFile->readLong();
 805        $indexInterval = $tisFile->readInt();
 806        $skipInterval  = $tisFile->readInt();
 807        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
 808            $maxSkipLevels = $tisFile->readInt();
 809        }
 810
 811        $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
 812
 813        $termValue    = $prevTerm[1] /* text */;
 814        $termFieldNum = $prevTerm[0] /* field */;
 815        $freqPointer = $prevTermInfo[1] /* freqPointer */;
 816        $proxPointer = $prevTermInfo[2] /* proxPointer */;
 817        for ($count = $prevPosition*$indexInterval + 1;
 818             $count <= $termCount &&
 819             ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
 820              ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
 821               strcmp($termValue, $term->text) < 0) );
 822             $count++) {
 823            $termPrefixLength = $tisFile->readVInt();
 824            $termSuffix       = $tisFile->readString();
 825            $termFieldNum     = $tisFile->readVInt();
 826            $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
 827
 828            $docFreq      = $tisFile->readVInt();
 829            $freqPointer += $tisFile->readVInt();
 830            $proxPointer += $tisFile->readVInt();
 831            if( $docFreq >= $skipInterval ) {
 832                $skipOffset = $tisFile->readVInt();
 833            } else {
 834                $skipOffset = 0;
 835            }
 836        }
 837
 838        if ($termFieldNum == $searchField && $termValue == $term->text) {
 839            $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
 840        } else {
 841            $termInfo = null;
 842        }
 843
 844        // Put loaded termInfo into cache
 845        $this->_termInfoCache[$termKey] = $termInfo;
 846
 847        if (count($this->_termInfoCache) == 1024) {
 848            $this->_cleanUpTermInfoCache();
 849        }
 850
 851        return $termInfo;
 852    }
 853
 854    /**
 855     * Returns term freqs array.
 856     * Result array structure: array(docId => freq, ...)
 857     *
 858     * @param Zend_Search_Lucene_Index_Term $term
 859     * @param integer $shift
 860     * @return Zend_Search_Lucene_Index_TermInfo
 861     */
 862    public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0)
 863    {
 864        $termInfo = $this->getTermInfo($term);
 865
 866        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
 867            return array();
 868        }
 869
 870        $frqFile = $this->openCompoundFile('.frq');
 871        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
 872        $result = array();
 873        $docId = 0;
 874
 875        for ($count = 0; $count < $termInfo->docFreq; $count++) {
 876            $docDelta = $frqFile->readVInt();
 877            if ($docDelta % 2 == 1) {
 878                $docId += ($docDelta-1)/2;
 879                $result[$shift + $docId] = 1;
 880            } else {
 881                $docId += $docDelta/2;
 882                $result[$shift + $docId] = $frqFile->readVInt();
 883            }
 884        }
 885
 886        return $result;
 887    }
 888
 889    /**
 890     * Returns term positions array.
 891     * Result array structure: array(docId => array(pos1, pos2, ...), ...)
 892     *
 893     * @param Zend_Search_Lucene_Index_Term $term
 894     * @param integer $shift
 895     * @return Zend_Search_Lucene_Index_TermInfo
 896     */
 897    public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0)
 898    {
 899        $termInfo = $this->getTermInfo($term);
 900
 901        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
 902            return array();
 903        }
 904
 905        $frqFile = $this->openCompoundFile('.frq');
 906        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
 907        $freqs = array();
 908        $docId = 0;
 909
 910        for ($count = 0; $count < $termInfo->docFreq; $count++) {
 911            $docDelta = $frqFile->readVInt();
 912            if ($docDelta % 2 == 1) {
 913                $docId += ($docDelta-1)/2;
 914                $freqs[$docId] = 1;
 915            } else {
 916                $docId += $docDelta/2;
 917                $freqs[$docId] = $frqFile->readVInt();
 918            }
 919        }
 920
 921        $result = array();
 922        $prxFile = $this->openCompoundFile('.prx');
 923        $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
 924        foreach ($freqs as $docId => $freq) {
 925            $termPosition = 0;
 926            $positions = array();
 927
 928            for ($count = 0; $count < $freq; $count++ ) {
 929                $termPosition += $prxFile->readVInt();
 930                $positions[] = $termPosition;
 931            }
 932
 933            $result[$shift + $docId] = $positions;
 934        }
 935
 936        return $result;
 937    }
 938
 939    /**
 940     * Load normalizatin factors from an index file
 941     *
 942     * @param integer $fieldNum
 943     * @throws Zend_Search_Lucene_Exception
 944     */
 945    private function _loadNorm($fieldNum)
 946    {
 947        if ($this->_hasSingleNormFile) {
 948            $normfFile = $this->openCompoundFile('.nrm');
 949
 950            $header              = $normfFile->readBytes(3);
 951            $headerFormatVersion = $normfFile->readByte();
 952
 953            if ($header != 'NRM'  ||  $headerFormatVersion != (int)0xFF) {
 954                throw new  Zend_Search_Lucene_Exception('Wrong norms file format.');
 955            }
 956
 957            foreach ($this->_fields as $fNum => $fieldInfo) {
 958                if ($fieldInfo->isIndexed) {
 959                    $this->_norms[$fNum] = $normfFile->readBytes($this->_docCount);
 960                }
 961            }
 962        } else {
 963            $fFile = $this->openCompoundFile('.f' . $fieldNum);
 964            $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
 965        }
 966    }
 967
 968    /**
 969     * Returns normalization factor for specified documents
 970     *
 971     * @param integer $id
 972     * @param string $fieldName
 973     * @return float
 974     */
 975    public function norm($id, $fieldName)
 976    {
 977        $fieldNum = $this->getFieldNum($fieldName);
 978
 979        if ( !($this->_fields[$fieldNum]->isIndexed) ) {
 980            return null;
 981        }
 982
 983        if (!isset($this->_norms[$fieldNum])) {
 984            $this->_loadNorm($fieldNum);
 985        }
 986
 987        return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum][$id]) );
 988    }
 989
 990    /**
 991     * Returns norm vector, encoded in a byte string
 992     *
 993     * @param string $fieldName
 994     * @return string
 995     */
 996    public function normVector($fieldName)
 997    {
 998        $fieldNum = $this->getFieldNum($fieldName);
 999
1000        if ($fieldNum == -1  ||  !($this->_fields[$fieldNum]->isIndexed)) {
1001            $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
1002
1003            return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
1004                              $this->_docCount);
1005        }
1006
1007        if (!isset($this->_norms[$fieldNum])) {
1008            $this->_loadNorm($fieldNum);
1009        }
1010
1011        return $this->_norms[$fieldNum];
1012    }
1013
1014
1015    /**
1016     * Returns true if any documents have been deleted from this index segment.
1017     *
1018     * @return boolean
1019     */
1020    public function hasDeletions()
1021    {
1022        return $this->_deleted !== null;
1023    }
1024
1025
1026    /**
1027     * Returns true if segment has single norms file.
1028     *
1029     * @return boolean
1030     */
1031    public function hasSingleNormFile()
1032    {
1033        return $this->_hasSingleNormFile ? true : false;
1034    }
1035
1036    /**
1037     * Returns true if segment is stored using compound segment file.
1038     *
1039     * @return boolean
1040     */
1041    public function isCompound()
1042    {
1043        return $this->_isCompound;
1044    }
1045
1046    /**
1047     * Deletes a document from the index segment.
1048     * $id is an internal document id
1049     *
1050     * @param integer
1051     */
1052    public function delete($id)
1053    {
1054        $this->_deletedDirty = true;
1055
1056        if (extension_loaded('bitset')) {
1057            if ($this->_deleted === null) {
1058                $this->_deleted = bitset_empty($id);
1059            }
1060            bitset_incl($this->_deleted, $id);
1061        } else {
1062            if ($this->_deleted === null) {
1063                $this->_deleted = array();
1064            }
1065
1066            $this->_deleted[$id] = 1;
1067        }
1068    }
1069
1070    /**
1071     * Checks, that document is deleted
1072     *
1073     * @param integer
1074     * @return boolean
1075     */
1076    public function isDeleted($id)
1077    {
1078        if ($this->_deleted === null) {
1079            return false;
1080        }
1081
1082        if (extension_loaded('bitset')) {
1083            return bitset_in($this->_deleted, $id);
1084        } else {
1085            return isset($this->_deleted[$id]);
1086        }
1087    }
1088
1089
1090    /**
1091     * Detect latest delete generation
1092     *
1093     * Is actualy used from writeChanges() method or from the constructor if it's invoked from
1094     * Index writer. In both cases index write lock is already obtained, so we shouldn't care
1095     * about it
1096     */
1097    private function _detectLatestDelGen()
1098    {
1099        $delFileList = array();
1100        foreach ($this->_directory->fileList() as $file) {
1101            if ($file == $this->_name . '.del') {
1102                // Matches <segment_name>.del file name
1103                $delFileList[] = 0;
1104            } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
1105                // Matches <segment_name>_NNN.del file names
1106                $delFileList[] = (int)base_convert($matches[1], 36, 10);
1107            }
1108        }
1109
1110        if (count($delFileList) == 0) {
1111            // There is no deletions file for current segment in the directory
1112            // Set detetions file generation number to 1
1113            $this->_delGen = -1;
1114        } else {
1115            // There are some deletions files for current segment in the directory
1116            // Set deletions file generation number to the highest nuber
1117            $this->_delGen = max($delFileList);
1118        }
1119    }
1120
1121    /**
1122     * Write changes if it's necessary.
1123     *
1124     * This method must be invoked only from the Writer _updateSegments() method,
1125     * so index Write lock has to be already obtained.
1126     *
1127     * @internal
1128     */
1129    public function writeChanges()
1130    {
1131        if (!$this->_deletedDirty) {
1132            return;
1133        }
1134
1135        if (extension_loaded('bitset')) {
1136            $delBytes = $this->_deleted;
1137            $bitCount = count(bitset_to_array($delBytes));
1138        } else {
1139            $byteCount = floor($this->_docCount/8)+1;
1140            $delBytes = str_repeat(chr(0), $byteCount);
1141            for ($count = 0; $count < $byteCount; $count++) {
1142                $byte = 0;
1143                for ($bit = 0; $bit < 8; $bit++) {
1144                    if (isset($this->_deleted[$count*8 + $bit])) {
1145                        $byte |= (1<<$bit);
1146                    }
1147                }
1148                $delBytes[$count] = chr($byte);
1149            }
1150            $bitCount = count($this->_deleted);
1151        }
1152
1153
1154        // Get new generation number
1155        $this->_detectLatestDelGen();
1156
1157        if ($this->_delGen == -1) {
1158        	// Set delete file generation number to 1
1159        	$this->_delGen = 1;
1160        } else {
1161        	// Increase delete file generation number by 1
1162        	$this->_delGen++;
1163        }
1164
1165        $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
1166        $delFile->writeInt($this->_docCount);
1167        $delFile->writeInt($bitCount);
1168        $delFile->writeBytes($delBytes);
1169
1170        $this->_deletedDirty = false;
1171    }
1172
1173
1174
1175    /**
1176     * Term Dictionary File object for stream like terms reading
1177     *
1178     * @var Zend_Search_Lucene_Storage_File
1179     */
1180    private $_tisFile = null;
1181
1182    /**
1183     * Actual offset of the .tis file data
1184     *
1185     * @var integer
1186     */
1187    private $_tisFileOffset;
1188
1189    /**
1190     * Frequencies File object for stream like terms reading
1191     *
1192     * @var Zend_Search_Lucene_Storage_File
1193     */
1194    private $_frqFile = null;
1195
1196    /**
1197     * Actual offset of the .frq file data
1198     *
1199     * @var integer
1200     */
1201    private $_frqFileOffset;
1202
1203    /**
1204     * Positions File object for stream like terms reading
1205     *
1206     * @var Zend_Search_Lucene_Storage_File
1207     */
1208    private $_prxFile = null;
1209
1210    /**
1211     * Actual offset of the .prx file in the compound file
1212     *
1213     * @var integer
1214     */
1215    private $_prxFileOffset;
1216
1217
1218    /**
1219     * Actual number of terms in term stream
1220     *
1221     * @var integer
1222     */
1223    private $_termCount = 0;
1224
1225    /**
1226     * Overall number of terms in term stream
1227     *
1228     * @var integer
1229     */
1230    private $_termNum = 0;
1231
1232    /**
1233     * Segment index interval
1234     *
1235     * @var integer
1236     */
1237    private $_indexInterval;
1238
1239    /**
1240     * Segment skip interval
1241     *
1242     * @var integer
1243     */
1244    private $_skipInterval;
1245
1246    /**
1247     * Last TermInfo in a terms stream
1248     *
1249     * @var Zend_Search_Lucene_Index_TermInfo
1250     */
1251    private $_lastTermInfo = null;
1252
1253    /**
1254     * Last Term in a terms stream
1255     *
1256     * @var Zend_Search_Lucene_Index_Term
1257     */
1258    private $_lastTerm = null;
1259
1260    /**
1261     * Map of the document IDs
1262     * Used to get new docID after removing deleted documents.
1263     * It's not very effective from memory usage point of view,
1264     * but much more faster, then other methods
1265     *
1266     * @var array|null
1267     */
1268    private $_docMap = null;
1269
1270    /**
1271     * An array of all term positions in the documents.
1272     * Array structure: array( docId => array( pos1, pos2, ...), ...)
1273     *
1274     * Is set to null if term positions loading has to be skipped
1275     *
1276     * @var array|null
1277     */
1278    private $_lastTermPositions;
1279
1280
1281    /**
1282     * Terms scan mode
1283     *
1284     * Values:
1285     *
1286     * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
1287     * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
1288     *                       document numbers are compacted (shifted if segment has deleted documents)
1289     *
1290     * @var integer
1291     */
1292    private $_termsScanMode;
1293
1294    /** Scan modes */
1295    const SM_TERMS_ONLY = 0;    // terms are scanned, no additional info is retrieved
1296    const SM_FULL_INFO  = 1;    // terms are scanned, frequency and position info is retrieved
1297    const SM_MERGE_INFO = 2;    // terms are scanned, frequency and position info is retrieved
1298                                // document numbers are compacted (shifted if segment contains deleted documents)
1299
1300    /**
1301     * Reset terms stream
1302     *
1303     * $startId - id for the fist document
1304     * $compact - remove deleted documents
1305     *
1306     * Returns start document id for the next segment
1307     *
1308     * @param integer $startId
1309     * @param integer $mode
1310     * @throws Zend_Search_Lucene_Exception
1311     * @return integer
1312     */
1313    public function reset($startId = 0, $mode = self::SM_TERMS_ONLY)
1314    {
1315        if ($this->_tisFile !== null) {
1316            $this->_tisFile = null;
1317        }
1318
1319        $this->_tisFile = $this->openCompoundFile('.tis', false);
1320        $this->_tisFileOffset = $this->_tisFile->tell();
1321
1322        $tiVersion = $this->_tisFile->readInt();
1323        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
1324            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
1325            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
1326        }
1327
1328        $this->_termCount     =
1329              $this->_termNum = $this->_tisFile->readLong(); // Read terms count
1330        $this->_indexInterval = $this->_tisFile->readInt();  // Read Index interval
1331        $this->_skipInterval  = $this->_tisFile->readInt();  // Read skip interval
1332        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
1333            $maxSkipLevels = $this->_tisFile->readInt();
1334        }
1335
1336        if ($this->_frqFile !== null) {
1337            $this->_frqFile = null;
1338        }
1339        if ($this->_prxFile !== null) {
1340            $this->_prxFile = null;
1341        }
1342        $this->_docMap = array();
1343
1344        $this->_lastTerm          = new Zend_Search_Lucene_Index_Term('', -1);
1345        $this->_lastTermInfo      = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
1346        $this->_lastTermPositions = null;
1347
1348        $this->_termsScanMode = $mode;
1349
1350        switch ($mode) {
1351            case self::SM_TERMS_ONLY:
1352                // Do nothing
1353                break;
1354
1355            case self::SM_FULL_INFO:
1356                // break intentionally omitted
1357            case self::SM_MERGE_INFO:
1358                $this->_frqFile = $this->openCompoundFile('.frq', false);
1359                $this->_frqFileOffset = $this->_frqFile->tell();
1360
1361                $this->_prxFile = $this->openCompoundFile('.prx', false);
1362                $this->_prxFileOffset = $this->_prxFile->tell();
1363
1364                for ($count = 0; $count < $this->_docCount; $count++) {
1365                    if (!$this->isDeleted($count)) {
1366                        $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
1367                    }
1368                }
1369                break;
1370
1371            default:
1372                throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
1373                break;
1374        }
1375
1376
1377        $this->nextTerm();
1378        return $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
1379    }
1380
1381
1382    /**
1383     * Skip terms stream up to specified term preffix.
1384     *
1385     * Prefix contains fully specified field info and portion of searched term
1386     *
1387     * @param Zend_Search_Lucene_Index_Term $prefix
1388     * @throws Zend_Search_Lucene_Exception
1389     */
1390    public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
1391    {
1392        if ($this->_termDictionary === null) {
1393            $this->_loadDictionaryIndex();
1394        }
1395
1396        $searchField = $this->getFieldNum($prefix->field);
1397
1398        if ($searchField == -1) {
1399            /**
1400             * Field is not presented in this segment
1401             * Go to the end of dictionary
1402             */
1403            $this->_tisFile = null;
1404            $this->_frqFile = null;
1405            $this->_prxFile = null;
1406
1407            $this->_lastTerm          = null;
1408            $this->_lastTermInfo      = null;
1409            $this->_lastTermPositions = null;
1410
1411            return;
1412        }
1413        $searchDicField = $this->_getFieldPosition($searchField);
1414
1415        // search for appropriate value in dictionary
1416        $lowIndex = 0;
1417        $highIndex = count($this->_termDictionary)-1;
1418        while ($highIndex >= $lowIndex) {
1419            // $mid = ($highIndex - $lowIndex)/2;
1420            $mid = ($highIndex + $lowIndex) >> 1;
1421            $midTerm = $this->_termDictionary[$mid];
1422
1423            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
1424            $delta = $searchDicField - $fieldNum;
1425            if ($delta == 0) {
1426                $delta = strcmp($prefix->text, $midTerm[1] /* text */);
1427            }
1428
1429            if ($delta < 0) {
1430                $highIndex = $mid-1;
1431            } elseif ($delta > 0) {
1432                $lowIndex  = $mid+1;
1433            } else {
1434                // We have reached term we are looking for
1435                break;
1436            }
1437        }
1438
1439        if ($highIndex == -1) {
1440            // Term is out of the dictionary range
1441            $this->_tisFile = null;
1442            $this->_frqFile = null;
1443            $this->_prxFile = null;
1444
1445            $this->_lastTerm          = null;
1446            $this->_lastTermInfo      = null;
1447            $this->_lastTermPositions = null;
1448
1449            return;
1450        }
1451
1452        $prevPosition = $highIndex;
1453        $prevTerm = $this->_termDictionary[$prevPosition];
1454        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
1455
1456        if ($this->_tisFile === null) {
1457            // The end of terms stream is reached and terms dictionary file is closed
1458            // Perform mini-reset operation
1459            $this->_tisFile = $this->openCompoundFile('.tis', false);
1460
1461            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
1462                $this->_frqFile = $this->openCompoundFile('.frq', false);
1463                $this->_prxFile = $this->openCompoundFile('.prx', false);
1464            }
1465        }
1466        $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
1467
1468        $this->_lastTerm     = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
1469                                                                 ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
1470        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
1471                                                                     $prevTermInfo[1] /* freqPointer */,
1472                                                                     $prevTermInfo[2] /* proxPointer */,
1473                                                                     $prevTermInfo[3] /* skipOffset */);
1474        $this->_termCount  =  $this->_termNum - $prevPosition*$this->_indexInterval;
1475
1476        if ($highIndex == 0) {
1477            // skip start entry
1478            $this->nextTerm();
1479        } else if ($prefix->field == $this->_lastTerm->field  &&  $prefix->text  == $this->_lastTerm->text) {
1480            // We got exact match in the dictionary index
1481
1482            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
1483                $this->_lastTermPositions = array();
1484
1485                $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
1486                $freqs = array();   $docId = 0;
1487                for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
1488                    $docDelta = $this->_frqFile->readVInt();
1489                    if( $docDelta % 2 == 1 ) {
1490                        $docId += ($docDelta-1)/2;
1491                        $freqs[ $docId ] = 1;
1492                    } else {
1493                        $docId += $docDelta/2;
1494                        $freqs[ $docId ] = $this->_frqFile->readVInt();
1495                    }
1496                }
1497
1498                $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
1499                foreach ($freqs as $docId => $freq) {
1500                    $termPosition = 0;  $positions = array();
1501
1502                    for ($count = 0; $count < $freq; $count++ ) {
1503                        $termPosition += $this->_prxFile->readVInt();
1504                        $positions[] = $termPosition;
1505                    }
1506
1507                    if (isset($this->_docMap[$docId])) {
1508                        $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
1509                    }
1510                }
1511            }
1512
1513            return;
1514        }
1515
1516        // Search term matching specified prefix
1517        while ($this->_lastTerm !== null) {
1518            if ( strcmp($this->_lastTerm->field, $prefix->field) > 0  ||
1519                 ($prefix->field == $this->_lastTerm->field  &&  strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
1520                    // Current term matches or greate than the pattern
1521                    return;
1522            }
1523
1524            $this->nextTerm();
1525        }
1526    }
1527
1528
1529    /**
1530     * Scans terms dictionary and returns next term
1531     *
1532     * @return Zend_Search_Lucene_Index_Term|null
1533     */
1534    public function nextTerm()
1535    {
1536        if ($this->_tisFile === null  ||  $this->_termCount == 0) {
1537            $this->_lastTerm          = null;
1538            $this->_lastTermInfo      = null;
1539            $this->_lastTermPositions = null;
1540            $this->_docMap            = null;
1541
1542            // may be necessary for "empty" segment
1543            $this->_tisFile = null;
1544            $this->_frqFile = null;
1545            $this->_prxFile = null;
1546
1547            return null;
1548        }
1549
1550        $termPrefixLength = $this->_tisFile->readVInt();
1551        $termSuffix       = $this->_tisFile->readString();
1552        $termFieldNum     = $this->

Large files files are truncated, but you can click here to view the full file