PageRenderTime 196ms CodeModel.GetById 80ms app.highlight 70ms RepoModel.GetById 31ms app.codeStats 1ms

/Search/Lucene/Index/SegmentInfo.php

https://bitbucket.org/gkawka/zend-framework
PHP | 2132 lines | 1207 code | 309 blank | 616 comment | 337 complexity | c0b75a51479e8add6ab2b71dfb82aa67 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1<?php
   2/**
   3 * Zend Framework
   4 *
   5 * LICENSE
   6 *
   7 * This source file is subject to the new BSD license that is bundled
   8 * with this package in the file LICENSE.txt.
   9 * It is also available through the world-wide-web at this URL:
  10 * http://framework.zend.com/license/new-bsd
  11 * If you did not receive a copy of the license and are unable to
  12 * obtain it through the world-wide-web, please send an email
  13 * to license@zend.com so we can send you a copy immediately.
  14 *
  15 * @category   Zend
  16 * @package    Zend_Search_Lucene
  17 * @subpackage Index
  18 * @copyright  Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  19 * @license    http://framework.zend.com/license/new-bsd     New BSD License
  20 * @version    $Id: SegmentInfo.php 24593 2012-01-05 20:35:02Z matthew $
  21 */
  22
  23/** Zend_Search_Lucene_Index_TermsStream_Interface */
  24require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
  25
  26
  27/** Zend_Search_Lucene_Search_Similarity */
  28require_once 'Zend/Search/Lucene/Search/Similarity.php';
  29
  30/** Zend_Search_Lucene_Index_FieldInfo */
  31require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
  32
  33/** Zend_Search_Lucene_Index_Term */
  34require_once 'Zend/Search/Lucene/Index/Term.php';
  35
  36/** Zend_Search_Lucene_Index_TermInfo */
  37require_once 'Zend/Search/Lucene/Index/TermInfo.php';
  38
  39/**
  40 * @category   Zend
  41 * @package    Zend_Search_Lucene
  42 * @subpackage Index
  43 * @copyright  Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  44 * @license    http://framework.zend.com/license/new-bsd     New BSD License
  45 */
  46class Zend_Search_Lucene_Index_SegmentInfo implements Zend_Search_Lucene_Index_TermsStream_Interface
  47{
  48    /**
  49     * "Full scan vs fetch" boundary.
  50     *
  51     * If filter selectivity is less than this value, then full scan is performed
  52     * (since term entries fetching has some additional overhead).
  53     */
  54    const FULL_SCAN_VS_FETCH_BOUNDARY = 5;
  55
  56    /**
  57     * Number of docs in a segment
  58     *
  59     * @var integer
  60     */
  61    private $_docCount;
  62
  63    /**
  64     * Segment name
  65     *
  66     * @var string
  67     */
  68    private $_name;
  69
  70    /**
  71     * Term Dictionary Index
  72     *
  73     * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
  74     * of performance considerations)
  75     * [0] -> $termValue
  76     * [1] -> $termFieldNum
  77     *
  78     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
  79     *
  80     * @var array
  81     */
  82    private $_termDictionary;
  83
  84    /**
  85     * Term Dictionary Index TermInfos
  86     *
  87     * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
  88     * of performance considerations)
  89     * [0] -> $docFreq
  90     * [1] -> $freqPointer
  91     * [2] -> $proxPointer
  92     * [3] -> $skipOffset
  93     * [4] -> $indexPointer
  94     *
  95     * @var array
  96     */
  97    private $_termDictionaryInfos;
  98
  99    /**
 100     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
 101     *
 102     * @var array
 103     */
 104    private $_fields;
 105
 106    /**
 107     * Field positions in a dictionary.
 108     * (Term dictionary contains filelds ordered by names)
 109     *
 110     * @var array
 111     */
 112    private $_fieldsDicPositions;
 113
 114
 115    /**
 116     * Associative array where the key is the file name and the value is data offset
 117     * in a compound segment file (.csf).
 118     *
 119     * @var array
 120     */
 121    private $_segFiles;
 122
 123    /**
 124     * Associative array where the key is the file name and the value is file size (.csf).
 125     *
 126     * @var array
 127     */
 128    private $_segFileSizes;
 129
 130    /**
 131     * Delete file generation number
 132     *
 133     * -2 means autodetect latest delete generation
 134     * -1 means 'there is no delete file'
 135     *  0 means pre-2.1 format delete file
 136     *  X specifies used delete file
 137     *
 138     * @var integer
 139     */
 140    private $_delGen;
 141
 142    /**
 143     * Segment has single norms file
 144     *
 145     * If true then one .nrm file is used for all fields
 146     * Otherwise .fN files are used
 147     *
 148     * @var boolean
 149     */
 150    private $_hasSingleNormFile;
 151
 152    /**
 153     * Use compound segment file (*.cfs) to collect all other segment files
 154     * (excluding .del files)
 155     *
 156     * @var boolean
 157     */
 158    private $_isCompound;
 159
 160
 161    /**
 162     * File system adapter.
 163     *
 164     * @var Zend_Search_Lucene_Storage_Directory_Filesystem
 165     */
 166    private $_directory;
 167
 168    /**
 169     * Normalization factors.
 170     * An array fieldName => normVector
 171     * normVector is a binary string.
 172     * Each byte corresponds to an indexed document in a segment and
 173     * encodes normalization factor (float value, encoded by
 174     * Zend_Search_Lucene_Search_Similarity::encodeNorm())
 175     *
 176     * @var array
 177     */
 178    private $_norms = array();
 179
 180    /**
 181     * List of deleted documents.
 182     * bitset if bitset extension is loaded or array otherwise.
 183     *
 184     * @var mixed
 185     */
 186    private $_deleted = null;
 187
 188    /**
 189     * $this->_deleted update flag
 190     *
 191     * @var boolean
 192     */
 193    private $_deletedDirty = false;
 194
 195    /**
 196     * True if segment uses shared doc store
 197     *
 198     * @var boolean
 199     */
 200    private $_usesSharedDocStore;
 201
 202    /*
 203     * Shared doc store options.
 204     * It's an assotiative array with the following items:
 205     * - 'offset'     => $docStoreOffset           The starting document in the shared doc store files where this segment's documents begin
 206     * - 'segment'    => $docStoreSegment          The name of the segment that has the shared doc store files.
 207     * - 'isCompound' => $docStoreIsCompoundFile   True, if compound file format is used for the shared doc store files (.cfx file).
 208     */
 209    private $_sharedDocStoreOptions;
 210
 211
 212    /**
 213     * Zend_Search_Lucene_Index_SegmentInfo constructor
 214     *
 215     * @param Zend_Search_Lucene_Storage_Directory $directory
 216     * @param string     $name
 217     * @param integer    $docCount
 218     * @param integer    $delGen
 219     * @param array|null $docStoreOptions
 220     * @param boolean    $hasSingleNormFile
 221     * @param boolean    $isCompound
 222     */
 223    public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null)
 224    {
 225        $this->_directory = $directory;
 226        $this->_name      = $name;
 227        $this->_docCount  = $docCount;
 228
 229        if ($docStoreOptions !== null) {
 230            $this->_usesSharedDocStore    = true;
 231            $this->_sharedDocStoreOptions = $docStoreOptions;
 232
 233            if ($docStoreOptions['isCompound']) {
 234                $cfxFile       = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx');
 235                $cfxFilesCount = $cfxFile->readVInt();
 236
 237                $cfxFiles     = array();
 238                $cfxFileSizes = array();
 239
 240                for ($count = 0; $count < $cfxFilesCount; $count++) {
 241                    $dataOffset = $cfxFile->readLong();
 242                    if ($count != 0) {
 243                        $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles);
 244                    }
 245                    $fileName            = $cfxFile->readString();
 246                    $cfxFiles[$fileName] = $dataOffset;
 247                }
 248                if ($count != 0) {
 249                    $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset;
 250                }
 251
 252                $this->_sharedDocStoreOptions['files']     = $cfxFiles;
 253                $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes;
 254            }
 255        }
 256
 257        $this->_hasSingleNormFile = $hasSingleNormFile;
 258        $this->_delGen            = $delGen;
 259        $this->_termDictionary    = null;
 260
 261
 262        if ($isCompound !== null) {
 263            $this->_isCompound    = $isCompound;
 264        } else {
 265            // It's a pre-2.1 segment or isCompound is set to 'unknown'
 266            // Detect if segment uses compound file
 267            require_once 'Zend/Search/Lucene/Exception.php';
 268            try {
 269                // Try to open compound file
 270                $this->_directory->getFileObject($name . '.cfs');
 271
 272                // Compound file is found
 273                $this->_isCompound = true;
 274            } catch (Zend_Search_Lucene_Exception $e) {
 275                if (strpos($e->getMessage(), 'is not readable') !== false) {
 276                    // Compound file is not found or is not readable
 277                    $this->_isCompound = false;
 278                } else {
 279                    throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
 280                }
 281            }
 282        }
 283
 284        $this->_segFiles = array();
 285        if ($this->_isCompound) {
 286            $cfsFile = $this->_directory->getFileObject($name . '.cfs');
 287            $segFilesCount = $cfsFile->readVInt();
 288
 289            for ($count = 0; $count < $segFilesCount; $count++) {
 290                $dataOffset = $cfsFile->readLong();
 291                if ($count != 0) {
 292                    $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
 293                }
 294                $fileName = $cfsFile->readString();
 295                $this->_segFiles[$fileName] = $dataOffset;
 296            }
 297            if ($count != 0) {
 298                $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
 299            }
 300        }
 301
 302        $fnmFile = $this->openCompoundFile('.fnm');
 303        $fieldsCount = $fnmFile->readVInt();
 304        $fieldNames = array();
 305        $fieldNums  = array();
 306        $this->_fields = array();
 307
 308        for ($count=0; $count < $fieldsCount; $count++) {
 309            $fieldName = $fnmFile->readString();
 310            $fieldBits = $fnmFile->readByte();
 311            $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
 312                                                                            $fieldBits & 0x01 /* field is indexed */,
 313                                                                            $count,
 314                                                                            $fieldBits & 0x02 /* termvectors are stored */,
 315                                                                            $fieldBits & 0x10 /* norms are omitted */,
 316                                                                            $fieldBits & 0x20 /* payloads are stored */);
 317            if ($fieldBits & 0x10) {
 318                // norms are omitted for the indexed field
 319                $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
 320            }
 321
 322            $fieldNums[$count]  = $count;
 323            $fieldNames[$count] = $fieldName;
 324        }
 325        array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
 326        $this->_fieldsDicPositions = array_flip($fieldNums);
 327
 328        if ($this->_delGen == -2) {
 329            // SegmentInfo constructor is invoked from index writer
 330            // Autodetect current delete file generation number
 331            $this->_delGen = $this->_detectLatestDelGen();
 332        }
 333
 334        // Load deletions
 335        $this->_deleted = $this->_loadDelFile();
 336    }
 337
 338    /**
 339     * Load detetions file
 340     *
 341     * Returns bitset or an array depending on bitset extension availability
 342     *
 343     * @return mixed
 344     * @throws Zend_Search_Lucene_Exception
 345     */
 346    private function _loadDelFile()
 347    {
 348        if ($this->_delGen == -1) {
 349            // There is no delete file for this segment
 350            return null;
 351        } else if ($this->_delGen == 0) {
 352            // It's a segment with pre-2.1 format delete file
 353            // Try to load deletions file
 354            return $this->_loadPre21DelFile();
 355        } else {
 356            // It's 2.1+ format deleteions file
 357            return $this->_load21DelFile();
 358        }
 359    }
 360
 361    /**
 362     * Load pre-2.1 detetions file
 363     *
 364     * Returns bitset or an array depending on bitset extension availability
 365     *
 366     * @return mixed
 367     * @throws Zend_Search_Lucene_Exception
 368     */
 369    private function _loadPre21DelFile()
 370    {
 371        require_once 'Zend/Search/Lucene/Exception.php';
 372        try {
 373            // '.del' files always stored in a separate file
 374            // Segment compound is not used
 375            $delFile = $this->_directory->getFileObject($this->_name . '.del');
 376
 377            $byteCount = $delFile->readInt();
 378            $byteCount = ceil($byteCount/8);
 379            $bitCount  = $delFile->readInt();
 380
 381            if ($bitCount == 0) {
 382                $delBytes = '';
 383            } else {
 384                $delBytes = $delFile->readBytes($byteCount);
 385            }
 386
 387            if (extension_loaded('bitset')) {
 388                return $delBytes;
 389            } else {
 390                $deletions = array();
 391                for ($count = 0; $count < $byteCount; $count++) {
 392                    $byte = ord($delBytes[$count]);
 393                    for ($bit = 0; $bit < 8; $bit++) {
 394                        if ($byte & (1<<$bit)) {
 395                            $deletions[$count*8 + $bit] = 1;
 396                        }
 397                    }
 398                }
 399
 400                return $deletions;
 401            }
 402        } catch(Zend_Search_Lucene_Exception $e) {
 403            if (strpos($e->getMessage(), 'is not readable') === false) {
 404                throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
 405            }
 406            // There is no deletion file
 407            $this->_delGen = -1;
 408
 409            return null;
 410        }
 411    }
 412
 413    /**
 414     * Load 2.1+ format detetions file
 415     *
 416     * Returns bitset or an array depending on bitset extension availability
 417     *
 418     * @return mixed
 419     */
 420    private function _load21DelFile()
 421    {
 422        $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
 423
 424        $format = $delFile->readInt();
 425
 426        if ($format == (int)0xFFFFFFFF) {
 427            if (extension_loaded('bitset')) {
 428                $deletions = bitset_empty();
 429            } else {
 430                $deletions = array();
 431            }
 432
 433            $byteCount = $delFile->readInt();
 434            $bitCount  = $delFile->readInt();
 435
 436            $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
 437            $byteNum = 0;
 438
 439            do {
 440                $dgap = $delFile->readVInt();
 441                $nonZeroByte = $delFile->readByte();
 442
 443                $byteNum += $dgap;
 444
 445
 446                if (extension_loaded('bitset')) {
 447                    for ($bit = 0; $bit < 8; $bit++) {
 448                        if ($nonZeroByte & (1<<$bit)) {
 449                            bitset_incl($deletions, $byteNum*8 + $bit);
 450                        }
 451                    }
 452                    return $deletions;
 453                } else {
 454                    for ($bit = 0; $bit < 8; $bit++) {
 455                        if ($nonZeroByte & (1<<$bit)) {
 456                            $deletions[$byteNum*8 + $bit] = 1;
 457                        }
 458                    }
 459                    return (count($deletions) > 0) ? $deletions : null;
 460                }
 461
 462            } while ($delFile->tell() < $delFileSize);
 463        } else {
 464            // $format is actually byte count
 465            $byteCount = ceil($format/8);
 466            $bitCount  = $delFile->readInt();
 467
 468            if ($bitCount == 0) {
 469                $delBytes = '';
 470            } else {
 471                $delBytes = $delFile->readBytes($byteCount);
 472            }
 473
 474            if (extension_loaded('bitset')) {
 475                return $delBytes;
 476            } else {
 477                $deletions = array();
 478                for ($count = 0; $count < $byteCount; $count++) {
 479                    $byte = ord($delBytes[$count]);
 480                    for ($bit = 0; $bit < 8; $bit++) {
 481                        if ($byte & (1<<$bit)) {
 482                            $deletions[$count*8 + $bit] = 1;
 483                        }
 484                    }
 485                }
 486
 487                return (count($deletions) > 0) ? $deletions : null;
 488            }
 489        }
 490    }
 491
 492    /**
 493     * Opens index file stoted within compound index file
 494     *
 495     * @param string $extension
 496     * @param boolean $shareHandler
 497     * @throws Zend_Search_Lucene_Exception
 498     * @return Zend_Search_Lucene_Storage_File
 499     */
 500    public function openCompoundFile($extension, $shareHandler = true)
 501    {
 502        if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
 503            $fdxFName = $this->_sharedDocStoreOptions['segment'] . '.fdx';
 504            $fdtFName = $this->_sharedDocStoreOptions['segment'] . '.fdt';
 505
 506            if (!$this->_sharedDocStoreOptions['isCompound']) {
 507                $fdxFile = $this->_directory->getFileObject($fdxFName, $shareHandler);
 508                $fdxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
 509
 510                if ($extension == '.fdx') {
 511                    // '.fdx' file is requested
 512                    return $fdxFile;
 513                } else {
 514                    // '.fdt' file is requested
 515                    $fdtStartOffset = $fdxFile->readLong();
 516
 517                    $fdtFile = $this->_directory->getFileObject($fdtFName, $shareHandler);
 518                    $fdtFile->seek($fdtStartOffset, SEEK_CUR);
 519
 520                    return $fdtFile;
 521                }
 522            }
 523
 524            if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
 525                require_once 'Zend/Search/Lucene/Exception.php';
 526                throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
 527                                       . $fdxFName . ' file.' );
 528            }
 529            if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
 530                require_once 'Zend/Search/Lucene/Exception.php';
 531                throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
 532                                       . $fdtFName . ' file.' );
 533            }
 534
 535            // Open shared docstore segment file
 536            $cfxFile = $this->_directory->getFileObject($this->_sharedDocStoreOptions['segment'] . '.cfx', $shareHandler);
 537            // Seek to the start of '.fdx' file within compound file
 538            $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdxFName]);
 539            // Seek to the start of current segment documents section
 540            $cfxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
 541
 542            if ($extension == '.fdx') {
 543                // '.fdx' file is requested
 544                return $cfxFile;
 545            } else {
 546                // '.fdt' file is requested
 547                $fdtStartOffset = $cfxFile->readLong();
 548
 549                // Seek to the start of '.fdt' file within compound file
 550                $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdtFName]);
 551                // Seek to the start of current segment documents section
 552                $cfxFile->seek($fdtStartOffset, SEEK_CUR);
 553
 554                return $fdtFile;
 555            }
 556        }
 557
 558        $filename = $this->_name . $extension;
 559
 560        if (!$this->_isCompound) {
 561            return $this->_directory->getFileObject($filename, $shareHandler);
 562        }
 563
 564        if( !isset($this->_segFiles[$filename]) ) {
 565            require_once 'Zend/Search/Lucene/Exception.php';
 566            throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
 567                                       . $filename . ' file.' );
 568        }
 569
 570        $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
 571        $file->seek($this->_segFiles[$filename]);
 572        return $file;
 573    }
 574
 575    /**
 576     * Get compound file length
 577     *
 578     * @param string $extension
 579     * @return integer
 580     */
 581    public function compoundFileLength($extension)
 582    {
 583        if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
 584            $filename = $this->_sharedDocStoreOptions['segment'] . $extension;
 585
 586            if (!$this->_sharedDocStoreOptions['isCompound']) {
 587                return $this->_directory->fileLength($filename);
 588            }
 589
 590            if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
 591                require_once 'Zend/Search/Lucene/Exception.php';
 592                throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
 593                                           . $filename . ' file.' );
 594            }
 595
 596            return $this->_sharedDocStoreOptions['fileSizes'][$filename];
 597        }
 598
 599
 600        $filename = $this->_name . $extension;
 601
 602        // Try to get common file first
 603        if ($this->_directory->fileExists($filename)) {
 604            return $this->_directory->fileLength($filename);
 605        }
 606
 607        if( !isset($this->_segFileSizes[$filename]) ) {
 608            require_once 'Zend/Search/Lucene/Exception.php';
 609            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
 610                                       . $filename . ' file.' );
 611        }
 612
 613        return $this->_segFileSizes[$filename];
 614    }
 615
 616    /**
 617     * Returns field index or -1 if field is not found
 618     *
 619     * @param string $fieldName
 620     * @return integer
 621     */
 622    public function getFieldNum($fieldName)
 623    {
 624        foreach( $this->_fields as $field ) {
 625            if( $field->name == $fieldName ) {
 626                return $field->number;
 627            }
 628        }
 629
 630        return -1;
 631    }
 632
 633    /**
 634     * Returns field info for specified field
 635     *
 636     * @param integer $fieldNum
 637     * @return Zend_Search_Lucene_Index_FieldInfo
 638     */
 639    public function getField($fieldNum)
 640    {
 641        return $this->_fields[$fieldNum];
 642    }
 643
 644    /**
 645     * Returns array of fields.
 646     * if $indexed parameter is true, then returns only indexed fields.
 647     *
 648     * @param boolean $indexed
 649     * @return array
 650     */
 651    public function getFields($indexed = false)
 652    {
 653        $result = array();
 654        foreach( $this->_fields as $field ) {
 655            if( (!$indexed) || $field->isIndexed ) {
 656                $result[ $field->name ] = $field->name;
 657            }
 658        }
 659        return $result;
 660    }
 661
 662    /**
 663     * Returns array of FieldInfo objects.
 664     *
 665     * @return array
 666     */
 667    public function getFieldInfos()
 668    {
 669        return $this->_fields;
 670    }
 671
 672    /**
 673     * Returns actual deletions file generation number.
 674     *
 675     * @return integer
 676     */
 677    public function getDelGen()
 678    {
 679        return $this->_delGen;
 680    }
 681
 682    /**
 683     * Returns the total number of documents in this segment (including deleted documents).
 684     *
 685     * @return integer
 686     */
 687    public function count()
 688    {
 689        return $this->_docCount;
 690    }
 691
 692    /**
 693     * Returns number of deleted documents.
 694     *
 695     * @return integer
 696     */
 697    private function _deletedCount()
 698    {
 699        if ($this->_deleted === null) {
 700            return 0;
 701        }
 702
 703        if (extension_loaded('bitset')) {
 704            return count(bitset_to_array($this->_deleted));
 705        } else {
 706            return count($this->_deleted);
 707        }
 708    }
 709
 710    /**
 711     * Returns the total number of non-deleted documents in this segment.
 712     *
 713     * @return integer
 714     */
 715    public function numDocs()
 716    {
 717        if ($this->hasDeletions()) {
 718            return $this->_docCount - $this->_deletedCount();
 719        } else {
 720            return $this->_docCount;
 721        }
 722    }
 723
 724    /**
 725     * Get field position in a fields dictionary
 726     *
 727     * @param integer $fieldNum
 728     * @return integer
 729     */
 730    private function _getFieldPosition($fieldNum) {
 731        // Treat values which are not in a translation table as a 'direct value'
 732        return isset($this->_fieldsDicPositions[$fieldNum]) ?
 733                           $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
 734    }
 735
 736    /**
 737     * Return segment name
 738     *
 739     * @return string
 740     */
 741    public function getName()
 742    {
 743        return $this->_name;
 744    }
 745
 746
 747    /**
 748     * TermInfo cache
 749     *
 750     * Size is 1024.
 751     * Numbers are used instead of class constants because of performance considerations
 752     *
 753     * @var array
 754     */
 755    private $_termInfoCache = array();
 756
 757    private function _cleanUpTermInfoCache()
 758    {
 759        // Clean 256 term infos
 760        foreach ($this->_termInfoCache as $key => $termInfo) {
 761            unset($this->_termInfoCache[$key]);
 762
 763            // leave 768 last used term infos
 764            if (count($this->_termInfoCache) == 768) {
 765                break;
 766            }
 767        }
 768    }
 769
 770    /**
 771     * Load terms dictionary index
 772     *
 773     * @throws Zend_Search_Lucene_Exception
 774     */
 775    private function _loadDictionaryIndex()
 776    {
 777        // Check, if index is already serialized
 778        if ($this->_directory->fileExists($this->_name . '.sti')) {
 779            // Load serialized dictionary index data
 780            $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
 781            $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
 782
 783            // Load dictionary index data
 784            if (($unserializedData = @unserialize($stiFileData)) !== false) {
 785                list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
 786                return;
 787            }
 788        }
 789
 790        // Load data from .tii file and generate .sti file
 791
 792        // Prefetch dictionary index data
 793        $tiiFile = $this->openCompoundFile('.tii');
 794        $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
 795
 796        /** Zend_Search_Lucene_Index_DictionaryLoader */
 797        require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
 798
 799        // Load dictionary index data
 800        list($this->_termDictionary, $this->_termDictionaryInfos) =
 801                    Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
 802
 803        $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
 804        $stiFile = $this->_directory->createFile($this->_name . '.sti');
 805        $stiFile->writeBytes($stiFileData);
 806    }
 807
 808    /**
 809     * Scans terms dictionary and returns term info
 810     *
 811     * @param Zend_Search_Lucene_Index_Term $term
 812     * @return Zend_Search_Lucene_Index_TermInfo
 813     */
 814    public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
 815    {
 816        $termKey = $term->key();
 817        if (isset($this->_termInfoCache[$termKey])) {
 818            $termInfo = $this->_termInfoCache[$termKey];
 819
 820            // Move termInfo to the end of cache
 821            unset($this->_termInfoCache[$termKey]);
 822            $this->_termInfoCache[$termKey] = $termInfo;
 823
 824            return $termInfo;
 825        }
 826
 827
 828        if ($this->_termDictionary === null) {
 829            $this->_loadDictionaryIndex();
 830        }
 831
 832        $searchField = $this->getFieldNum($term->field);
 833
 834        if ($searchField == -1) {
 835            return null;
 836        }
 837        $searchDicField = $this->_getFieldPosition($searchField);
 838
 839        // search for appropriate value in dictionary
 840        $lowIndex = 0;
 841        $highIndex = count($this->_termDictionary)-1;
 842        while ($highIndex >= $lowIndex) {
 843            // $mid = ($highIndex - $lowIndex)/2;
 844            $mid = ($highIndex + $lowIndex) >> 1;
 845            $midTerm = $this->_termDictionary[$mid];
 846
 847            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
 848            $delta = $searchDicField - $fieldNum;
 849            if ($delta == 0) {
 850                $delta = strcmp($term->text, $midTerm[1] /* text */);
 851            }
 852
 853            if ($delta < 0) {
 854                $highIndex = $mid-1;
 855            } elseif ($delta > 0) {
 856                $lowIndex  = $mid+1;
 857            } else {
 858                // return $this->_termDictionaryInfos[$mid]; // We got it!
 859                $a = $this->_termDictionaryInfos[$mid];
 860                $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
 861
 862                // Put loaded termInfo into cache
 863                $this->_termInfoCache[$termKey] = $termInfo;
 864
 865                return $termInfo;
 866            }
 867        }
 868
 869        if ($highIndex == -1) {
 870            // Term is out of the dictionary range
 871            return null;
 872        }
 873
 874        $prevPosition = $highIndex;
 875        $prevTerm = $this->_termDictionary[$prevPosition];
 876        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
 877
 878        $tisFile = $this->openCompoundFile('.tis');
 879        $tiVersion = $tisFile->readInt();
 880        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
 881            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
 882            require_once 'Zend/Search/Lucene/Exception.php';
 883            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
 884        }
 885
 886        $termCount     = $tisFile->readLong();
 887        $indexInterval = $tisFile->readInt();
 888        $skipInterval  = $tisFile->readInt();
 889        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
 890            $maxSkipLevels = $tisFile->readInt();
 891        }
 892
 893        $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
 894
 895        $termValue    = $prevTerm[1] /* text */;
 896        $termFieldNum = $prevTerm[0] /* field */;
 897        $freqPointer = $prevTermInfo[1] /* freqPointer */;
 898        $proxPointer = $prevTermInfo[2] /* proxPointer */;
 899        for ($count = $prevPosition*$indexInterval + 1;
 900             $count <= $termCount &&
 901             ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
 902              ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
 903               strcmp($termValue, $term->text) < 0) );
 904             $count++) {
 905            $termPrefixLength = $tisFile->readVInt();
 906            $termSuffix       = $tisFile->readString();
 907            $termFieldNum     = $tisFile->readVInt();
 908            $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
 909
 910            $docFreq      = $tisFile->readVInt();
 911            $freqPointer += $tisFile->readVInt();
 912            $proxPointer += $tisFile->readVInt();
 913            if( $docFreq >= $skipInterval ) {
 914                $skipOffset = $tisFile->readVInt();
 915            } else {
 916                $skipOffset = 0;
 917            }
 918        }
 919
 920        if ($termFieldNum == $searchField && $termValue == $term->text) {
 921            $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
 922        } else {
 923            $termInfo = null;
 924        }
 925
 926        // Put loaded termInfo into cache
 927        $this->_termInfoCache[$termKey] = $termInfo;
 928
 929        if (count($this->_termInfoCache) == 1024) {
 930            $this->_cleanUpTermInfoCache();
 931        }
 932
 933        return $termInfo;
 934    }
 935
 936    /**
 937     * Returns IDs of all the documents containing term.
 938     *
 939     * @param Zend_Search_Lucene_Index_Term $term
 940     * @param integer $shift
 941     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
 942     * @return array
 943     */
 944    public function termDocs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
 945    {
 946        $termInfo = $this->getTermInfo($term);
 947
 948        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
 949            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
 950                $docsFilter->segmentFilters[$this->_name] = array();
 951            }
 952            return array();
 953        }
 954
 955        $frqFile = $this->openCompoundFile('.frq');
 956        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
 957        $docId  = 0;
 958        $result = array();
 959
 960        if ($docsFilter !== null) {
 961            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
 962                require_once 'Zend/Search/Lucene/Exception.php';
 963                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
 964            }
 965
 966            if (isset($docsFilter->segmentFilters[$this->_name])) {
 967                // Filter already has some data for the current segment
 968
 969                // Make short name for the filter (which doesn't need additional dereferencing)
 970                $filter = &$docsFilter->segmentFilters[$this->_name];
 971
 972                // Check if filter is not empty
 973                if (count($filter) == 0) {
 974                    return array();
 975                }
 976
 977                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
 978                    // Perform fetching
 979// ---------------------------------------------------------------
 980                    $updatedFilterData = array();
 981
 982                    for( $count=0; $count < $termInfo->docFreq; $count++ ) {
 983                        $docDelta = $frqFile->readVInt();
 984                        if( $docDelta % 2 == 1 ) {
 985                            $docId += ($docDelta-1)/2;
 986                        } else {
 987                            $docId += $docDelta/2;
 988                            // read freq
 989                            $frqFile->readVInt();
 990                        }
 991
 992                        if (isset($filter[$docId])) {
 993                           $result[] = $shift + $docId;
 994                           $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
 995                        }
 996                    }
 997                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
 998// ---------------------------------------------------------------
 999                } else {
1000                    // Perform full scan
1001                    $updatedFilterData = array();
1002
1003                    for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1004                        $docDelta = $frqFile->readVInt();
1005                        if( $docDelta % 2 == 1 ) {
1006                            $docId += ($docDelta-1)/2;
1007                        } else {
1008                            $docId += $docDelta/2;
1009                            // read freq
1010                            $frqFile->readVInt();
1011                        }
1012
1013                        if (isset($filter[$docId])) {
1014                           $result[] = $shift + $docId;
1015                           $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1016                        }
1017                    }
1018                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1019                }
1020            } else {
1021                // Filter is present, but doesn't has data for the current segment yet
1022                $filterData = array();
1023                for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1024                    $docDelta = $frqFile->readVInt();
1025                    if( $docDelta % 2 == 1 ) {
1026                        $docId += ($docDelta-1)/2;
1027                    } else {
1028                        $docId += $docDelta/2;
1029                        // read freq
1030                        $frqFile->readVInt();
1031                    }
1032
1033                    $result[] = $shift + $docId;
1034                    $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1035                }
1036                $docsFilter->segmentFilters[$this->_name] = $filterData;
1037            }
1038        } else {
1039            for( $count=0; $count < $termInfo->docFreq; $count++ ) {
1040                $docDelta = $frqFile->readVInt();
1041                if( $docDelta % 2 == 1 ) {
1042                    $docId += ($docDelta-1)/2;
1043                } else {
1044                    $docId += $docDelta/2;
1045                    // read freq
1046                    $frqFile->readVInt();
1047                }
1048
1049                $result[] = $shift + $docId;
1050            }
1051        }
1052
1053        return $result;
1054    }
1055
1056    /**
1057     * Returns term freqs array.
1058     * Result array structure: array(docId => freq, ...)
1059     *
1060     * @param Zend_Search_Lucene_Index_Term $term
1061     * @param integer $shift
1062     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
1063     * @return Zend_Search_Lucene_Index_TermInfo
1064     */
1065    public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
1066    {
1067        $termInfo = $this->getTermInfo($term);
1068
1069        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
1070            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1071                $docsFilter->segmentFilters[$this->_name] = array();
1072            }
1073            return array();
1074        }
1075
1076        $frqFile = $this->openCompoundFile('.frq');
1077        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
1078        $result = array();
1079        $docId = 0;
1080
1081        $result = array();
1082
1083        if ($docsFilter !== null) {
1084            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1085                require_once 'Zend/Search/Lucene/Exception.php';
1086                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
1087            }
1088
1089            if (isset($docsFilter->segmentFilters[$this->_name])) {
1090                // Filter already has some data for the current segment
1091
1092                // Make short name for the filter (which doesn't need additional dereferencing)
1093                $filter = &$docsFilter->segmentFilters[$this->_name];
1094
1095                // Check if filter is not empty
1096                if (count($filter) == 0) {
1097                    return array();
1098                }
1099
1100
1101                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
1102                    // Perform fetching
1103// ---------------------------------------------------------------
1104                    $updatedFilterData = array();
1105
1106                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
1107                        $docDelta = $frqFile->readVInt();
1108                        if ($docDelta % 2 == 1) {
1109                            $docId += ($docDelta-1)/2;
1110                            if (isset($filter[$docId])) {
1111                                $result[$shift + $docId] = 1;
1112                                $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1113                            }
1114                        } else {
1115                            $docId += $docDelta/2;
1116                            $freq = $frqFile->readVInt();
1117                            if (isset($filter[$docId])) {
1118                                $result[$shift + $docId] = $freq;
1119                                $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1120                            }
1121                        }
1122                    }
1123                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1124// ---------------------------------------------------------------
1125                } else {
1126                    // Perform full scan
1127                    $updatedFilterData = array();
1128
1129                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
1130                        $docDelta = $frqFile->readVInt();
1131                        if ($docDelta % 2 == 1) {
1132                            $docId += ($docDelta-1)/2;
1133                            if (isset($filter[$docId])) {
1134                                $result[$shift + $docId] = 1;
1135                                $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
1136                            }
1137                        } else {
1138                            $docId += $docDelta/2;
1139                            $freq = $frqFile->readVInt();
1140                            if (isset($filter[$docId])) {
1141                                $result[$shift + $docId] = $freq;
1142                                $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
1143                            }
1144                        }
1145                    }
1146                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1147                }
1148            } else {
1149                // Filter doesn't has data for current segment
1150                $filterData = array();
1151
1152                for ($count = 0; $count < $termInfo->docFreq; $count++) {
1153                    $docDelta = $frqFile->readVInt();
1154                    if ($docDelta % 2 == 1) {
1155                        $docId += ($docDelta-1)/2;
1156                        $result[$shift + $docId] = 1;
1157                        $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1158                    } else {
1159                        $docId += $docDelta/2;
1160                        $result[$shift + $docId] = $frqFile->readVInt();
1161                        $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1162                    }
1163                }
1164
1165                $docsFilter->segmentFilters[$this->_name] = $filterData;
1166            }
1167        } else {
1168            for ($count = 0; $count < $termInfo->docFreq; $count++) {
1169                $docDelta = $frqFile->readVInt();
1170                if ($docDelta % 2 == 1) {
1171                    $docId += ($docDelta-1)/2;
1172                    $result[$shift + $docId] = 1;
1173                } else {
1174                    $docId += $docDelta/2;
1175                    $result[$shift + $docId] = $frqFile->readVInt();
1176                }
1177            }
1178        }
1179
1180        return $result;
1181    }
1182
1183    /**
1184     * Returns term positions array.
1185     * Result array structure: array(docId => array(pos1, pos2, ...), ...)
1186     *
1187     * @param Zend_Search_Lucene_Index_Term $term
1188     * @param integer $shift
1189     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
1190     * @return Zend_Search_Lucene_Index_TermInfo
1191     */
1192    public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
1193    {
1194        $termInfo = $this->getTermInfo($term);
1195
1196        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
1197            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1198                $docsFilter->segmentFilters[$this->_name] = array();
1199            }
1200            return array();
1201        }
1202
1203        $frqFile = $this->openCompoundFile('.frq');
1204        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
1205
1206        $docId = 0;
1207        $freqs = array();
1208
1209
1210        if ($docsFilter !== null) {
1211            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
1212                require_once 'Zend/Search/Lucene/Exception.php';
1213                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
1214            }
1215
1216            if (isset($docsFilter->segmentFilters[$this->_name])) {
1217                // Filter already has some data for the current segment
1218
1219                // Make short name for the filter (which doesn't need additional dereferencing)
1220                $filter = &$docsFilter->segmentFilters[$this->_name];
1221
1222                // Check if filter is not empty
1223                if (count($filter) == 0) {
1224                    return array();
1225                }
1226
1227                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
1228                    // Perform fetching
1229// ---------------------------------------------------------------
1230                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
1231                        $docDelta = $frqFile->readVInt();
1232                        if ($docDelta % 2 == 1) {
1233                            $docId += ($docDelta-1)/2;
1234                            $freqs[$docId] = 1;
1235                        } else {
1236                            $docId += $docDelta/2;
1237                            $freqs[$docId] = $frqFile->readVInt();
1238                        }
1239                    }
1240
1241                    $updatedFilterData = array();
1242                    $result = array();
1243                    $prxFile = $this->openCompoundFile('.prx');
1244                    $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1245                    foreach ($freqs as $docId => $freq) {
1246                        $termPosition = 0;
1247                        $positions = array();
1248
1249                        // we have to read .prx file to get right position for next doc
1250                        // even filter doesn't match current document
1251                        for ($count = 0; $count < $freq; $count++ ) {
1252                            $termPosition += $prxFile->readVInt();
1253                            $positions[] = $termPosition;
1254                        }
1255
1256                        // Include into updated filter and into result only if doc is matched by filter
1257                        if (isset($filter[$docId])) {
1258                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1259                            $result[$shift + $docId] = $positions;
1260                        }
1261                    }
1262
1263                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1264// ---------------------------------------------------------------
1265                } else {
1266                    // Perform full scan
1267                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
1268                        $docDelta = $frqFile->readVInt();
1269                        if ($docDelta % 2 == 1) {
1270                            $docId += ($docDelta-1)/2;
1271                            $freqs[$docId] = 1;
1272                        } else {
1273                            $docId += $docDelta/2;
1274                            $freqs[$docId] = $frqFile->readVInt();
1275                        }
1276                    }
1277
1278                    $updatedFilterData = array();
1279                    $result = array();
1280                    $prxFile = $this->openCompoundFile('.prx');
1281                    $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1282                    foreach ($freqs as $docId => $freq) {
1283                        $termPosition = 0;
1284                        $positions = array();
1285
1286                        // we have to read .prx file to get right position for next doc
1287                        // even filter doesn't match current document
1288                        for ($count = 0; $count < $freq; $count++ ) {
1289                            $termPosition += $prxFile->readVInt();
1290                            $positions[] = $termPosition;
1291                        }
1292
1293                        // Include into updated filter and into result only if doc is matched by filter
1294                        if (isset($filter[$docId])) {
1295                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1296                            $result[$shift + $docId] = $positions;
1297                        }
1298                    }
1299
1300                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
1301                }
1302            } else {
1303                // Filter doesn't has data for current segment
1304                for ($count = 0; $count < $termInfo->docFreq; $count++) {
1305                    $docDelta = $frqFile->readVInt();
1306                    if ($docDelta % 2 == 1) {
1307                        $docId += ($docDelta-1)/2;
1308                        $freqs[$docId] = 1;
1309                    } else {
1310                        $docId += $docDelta/2;
1311                        $freqs[$docId] = $frqFile->readVInt();
1312                    }
1313                }
1314
1315                $filterData = array();
1316                $result = array();
1317                $prxFile = $this->openCompoundFile('.prx');
1318                $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1319                foreach ($freqs as $docId => $freq) {
1320                    $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
1321
1322                    $termPosition = 0;
1323                    $positions = array();
1324
1325                    for ($count = 0; $count < $freq; $count++ ) {
1326                        $termPosition += $prxFile->readVInt();
1327                        $positions[] = $termPosition;
1328                    }
1329
1330                    $result[$shift + $docId] = $positions;
1331                }
1332
1333                $docsFilter->segmentFilters[$this->_name] = $filterData;
1334            }
1335        } else {
1336            for ($count = 0; $count < $termInfo->docFreq; $count++) {
1337                $docDelta = $frqFile->readVInt();
1338                if ($docDelta % 2 == 1) {
1339                    $docId += ($docDelta-1)/2;
1340                    $freqs[$docId] = 1;
1341                } else {
1342                    $docId += $docDelta/2;
1343                    $freqs[$docId] = $frqFile->readVInt();
1344                }
1345            }
1346
1347            $result = array();
1348            $prxFile = $this->openCompoundFile('.prx');
1349            $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
1350            foreach ($freqs as $docId => $freq) {
1351                $termPosition = 0;
1352                $positions = array();
1353
1354                for ($count = 0; $count < $freq; $count++ ) {
1355                    $termPosition += $prxFile->readVInt();
1356                    $positions[] = $termPosition;
1357                }
1358
1359                $result[$shift + $docId] = $positions;
1360            }
1361        }
1362
1363        return $result;
1364    }
1365
1366    /**
1367     * Load normalizatin factors from an index file
1368     *
1369     * @param integer $fieldNum
1370     * @throws Zend_Search_Lucene_Exception
1371

Large files files are truncated, but you can click here to view the full file