/xampp/htdocs/magento/lib/Zend/Search/Lucene/Index/DictionaryLoader.php
https://github.com/edmondscommerce/XAMPP-Magento-Demo-Site · PHP · 265 lines · 166 code · 27 blank · 72 comment · 55 complexity · 061882eebe6895cf14d385449786f06c MD5 · raw file
- <?php
- /**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to the new BSD license that is bundled
- * with this package in the file LICENSE.txt.
- * It is also available through the world-wide-web at this URL:
- * http://framework.zend.com/license/new-bsd
- * If you did not receive a copy of the license and are unable to
- * obtain it through the world-wide-web, please send an email
- * to license@zend.com so we can send you a copy immediately.
- *
- * @category Zend
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://framework.zend.com/license/new-bsd New BSD License
- */
- /**
- * Dictionary loader
- *
- * It's a dummy class which is created to encapsulate non-good structured code.
- * Manual "method inlining" is performed to increase dictionary index loading operation
- * which is major bottelneck for search performance.
- *
- *
- * @category Zend
- * @package Zend_Search_Lucene
- * @subpackage Index
- * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://framework.zend.com/license/new-bsd New BSD License
- */
- class Zend_Search_Lucene_Index_DictionaryLoader
- {
- /**
- * Dictionary index loader.
- *
- * It takes a string which is actually <segment_name>.tii index file data and
- * returns two arrays - term and tremInfo lists.
- *
- * See Zend_Search_Lucene_Index_SegmintInfo class for details
- *
- * @param string $data
- * @return array
- * @throws Zend_Search_Lucene_Exception
- */
- public static function load($data)
- {
- $termDictionary = array();
- $termInfos = array();
- $pos = 0;
- // $tiVersion = $tiiFile->readInt();
- $tiVersion = ord($data[0]) << 24 | ord($data[1]) << 16 | ord($data[2]) << 8 | ord($data[3]);
- $pos += 4;
- if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
- $tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
- #require_once 'Zend/Search/Lucene/Exception.php';
- throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
- }
- // $indexTermCount = $tiiFile->readLong();
- if (PHP_INT_SIZE > 4) {
- $indexTermCount = ord($data[$pos]) << 56 |
- ord($data[$pos+1]) << 48 |
- ord($data[$pos+2]) << 40 |
- ord($data[$pos+3]) << 32 |
- ord($data[$pos+4]) << 24 |
- ord($data[$pos+5]) << 16 |
- ord($data[$pos+6]) << 8 |
- ord($data[$pos+7]);
- } else {
- if ((ord($data[$pos]) != 0) ||
- (ord($data[$pos+1]) != 0) ||
- (ord($data[$pos+2]) != 0) ||
- (ord($data[$pos+3]) != 0) ||
- ((ord($data[$pos+4]) & 0x80) != 0)) {
- #require_once 'Zend/Search/Lucene/Exception.php';
- throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
- }
- $indexTermCount = ord($data[$pos+4]) << 24 |
- ord($data[$pos+5]) << 16 |
- ord($data[$pos+6]) << 8 |
- ord($data[$pos+7]);
- }
- $pos += 8;
- // $tiiFile->readInt(); // IndexInterval
- $pos += 4;
- // $skipInterval = $tiiFile->readInt();
- $skipInterval = ord($data[$pos]) << 24 | ord($data[$pos+1]) << 16 | ord($data[$pos+2]) << 8 | ord($data[$pos+3]);
- $pos += 4;
- if ($indexTermCount < 1) {
- #require_once 'Zend/Search/Lucene/Exception.php';
- throw new Zend_Search_Lucene_Exception('Wrong number of terms in a term dictionary index');
- }
- if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
- /* Skip MaxSkipLevels value */
- $pos += 4;
- }
- $prevTerm = '';
- $freqPointer = 0;
- $proxPointer = 0;
- $indexPointer = 0;
- for ($count = 0; $count < $indexTermCount; $count++) {
- //$termPrefixLength = $tiiFile->readVInt();
- $nbyte = ord($data[$pos++]);
- $termPrefixLength = $nbyte & 0x7F;
- for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
- $nbyte = ord($data[$pos++]);
- $termPrefixLength |= ($nbyte & 0x7F) << $shift;
- }
- // $termSuffix = $tiiFile->readString();
- $nbyte = ord($data[$pos++]);
- $len = $nbyte & 0x7F;
- for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
- $nbyte = ord($data[$pos++]);
- $len |= ($nbyte & 0x7F) << $shift;
- }
- if ($len == 0) {
- $termSuffix = '';
- } else {
- $termSuffix = substr($data, $pos, $len);
- $pos += $len;
- for ($count1 = 0; $count1 < $len; $count1++ ) {
- if (( ord($termSuffix[$count1]) & 0xC0 ) == 0xC0) {
- $addBytes = 1;
- if (ord($termSuffix[$count1]) & 0x20 ) {
- $addBytes++;
- // Never used for Java Lucene created index.
- // Java2 doesn't encode strings in four bytes
- if (ord($termSuffix[$count1]) & 0x10 ) {
- $addBytes++;
- }
- }
- $termSuffix .= substr($data, $pos, $addBytes);
- $pos += $addBytes;
- $len += $addBytes;
- // Check for null character. Java2 encodes null character
- // in two bytes.
- if (ord($termSuffix[$count1]) == 0xC0 &&
- ord($termSuffix[$count1+1]) == 0x80 ) {
- $termSuffix[$count1] = 0;
- $termSuffix = substr($termSuffix,0,$count1+1)
- . substr($termSuffix,$count1+2);
- }
- $count1 += $addBytes;
- }
- }
- }
- // $termValue = Zend_Search_Lucene_Index_Term::getPrefix($prevTerm, $termPrefixLength) . $termSuffix;
- $pb = 0; $pc = 0;
- while ($pb < strlen($prevTerm) && $pc < $termPrefixLength) {
- $charBytes = 1;
- if ((ord($prevTerm[$pb]) & 0xC0) == 0xC0) {
- $charBytes++;
- if (ord($prevTerm[$pb]) & 0x20 ) {
- $charBytes++;
- if (ord($prevTerm[$pb]) & 0x10 ) {
- $charBytes++;
- }
- }
- }
- if ($pb + $charBytes > strlen($data)) {
- // wrong character
- break;
- }
- $pc++;
- $pb += $charBytes;
- }
- $termValue = substr($prevTerm, 0, $pb) . $termSuffix;
- // $termFieldNum = $tiiFile->readVInt();
- $nbyte = ord($data[$pos++]);
- $termFieldNum = $nbyte & 0x7F;
- for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
- $nbyte = ord($data[$pos++]);
- $termFieldNum |= ($nbyte & 0x7F) << $shift;
- }
- // $docFreq = $tiiFile->readVInt();
- $nbyte = ord($data[$pos++]);
- $docFreq = $nbyte & 0x7F;
- for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
- $nbyte = ord($data[$pos++]);
- $docFreq |= ($nbyte & 0x7F) << $shift;
- }
- // $freqPointer += $tiiFile->readVInt();
- $nbyte = ord($data[$pos++]);
- $vint = $nbyte & 0x7F;
- for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
- $nbyte = ord($data[$pos++]);
- $vint |= ($nbyte & 0x7F) << $shift;
- }
- $freqPointer += $vint;
- // $proxPointer += $tiiFile->readVInt();
- $nbyte = ord($data[$pos++]);
- $vint = $nbyte & 0x7F;
- for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
- $nbyte = ord($data[$pos++]);
- $vint |= ($nbyte & 0x7F) << $shift;
- }
- $proxPointer += $vint;
- if( $docFreq >= $skipInterval ) {
- // $skipDelta = $tiiFile->readVInt();
- $nbyte = ord($data[$pos++]);
- $vint = $nbyte & 0x7F;
- for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
- $nbyte = ord($data[$pos++]);
- $vint |= ($nbyte & 0x7F) << $shift;
- }
- $skipDelta = $vint;
- } else {
- $skipDelta = 0;
- }
- // $indexPointer += $tiiFile->readVInt();
- $nbyte = ord($data[$pos++]);
- $vint = $nbyte & 0x7F;
- for ($shift=7; ($nbyte & 0x80) != 0; $shift += 7) {
- $nbyte = ord($data[$pos++]);
- $vint |= ($nbyte & 0x7F) << $shift;
- }
- $indexPointer += $vint;
- // $this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue, $termFieldNum);
- $termDictionary[] = array($termFieldNum, $termValue);
- $termInfos[] =
- // new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
- array($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
- $prevTerm = $termValue;
- }
- // Check special index entry mark
- if ($termDictionary[0][0] != (int)0xFFFFFFFF) {
- #require_once 'Zend/Search/Lucene/Exception.php';
- throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
- } else if (PHP_INT_SIZE > 4){
- // Treat 64-bit 0xFFFFFFFF as -1
- $termDictionary[0][0] = -1;
- }
- return array(&$termDictionary, &$termInfos);
- }
- }