/library/Zend/Search/Lucene/Storage/File.php
PHP | 427 lines | 177 code | 49 blank | 201 comment | 35 complexity | d93be0ae6c29c6bd331616639e2e5d3d MD5 | raw file
- <?php
- /**
- * Zend Framework
- *
- * LICENSE
- *
- * This source file is subject to the new BSD license that is bundled
- * with this package in the file LICENSE.txt.
- * It is also available through the world-wide-web at this URL:
- * http://framework.zend.com/license/new-bsd
- * If you did not receive a copy of the license and are unable to
- * obtain it through the world-wide-web, please send an email
- * to license@zend.com so we can send you a copy immediately.
- *
- * @category Zend
- * @package Zend_Search_Lucene
- * @subpackage Storage
- * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://framework.zend.com/license/new-bsd New BSD License
- */
- /** Zend_Search_Lucene_Exception */
- require_once 'Zend/Search/Lucene/Exception.php';
- /**
- * @category Zend
- * @package Zend_Search_Lucene
- * @subpackage Storage
- * @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
- * @license http://framework.zend.com/license/new-bsd New BSD License
- */
- abstract class Zend_Search_Lucene_Storage_File
- {
- /**
- * Reads $length number of bytes at the current position in the
- * file and advances the file pointer.
- *
- * @param integer $length
- * @return string
- */
- abstract protected function _fread($length=1);
- /**
- * Sets the file position indicator and advances the file pointer.
- * The new position, measured in bytes from the beginning of the file,
- * is obtained by adding offset to the position specified by whence,
- * whose values are defined as follows:
- * SEEK_SET - Set position equal to offset bytes.
- * SEEK_CUR - Set position to current location plus offset.
- * SEEK_END - Set position to end-of-file plus offset. (To move to
- * a position before the end-of-file, you need to pass a negative value
- * in offset.)
- * Upon success, returns 0; otherwise, returns -1
- *
- * @param integer $offset
- * @param integer $whence
- * @return integer
- */
- abstract public function seek($offset, $whence=SEEK_SET);
- /**
- * Get file position.
- *
- * @return integer
- */
- abstract public function tell();
- /**
- * Flush output.
- *
- * Returns true on success or false on failure.
- *
- * @return boolean
- */
- abstract public function flush();
- /**
- * Writes $length number of bytes (all, if $length===null) to the end
- * of the file.
- *
- * @param string $data
- * @param integer $length
- */
- abstract protected function _fwrite($data, $length=null);
- /**
- * Lock file
- *
- * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
- *
- * @param integer $lockType
- * @return boolean
- */
- abstract public function lock($lockType, $nonBlockinLock = false);
- /**
- * Unlock file
- */
- abstract public function unlock();
- /**
- * Reads a byte from the current position in the file
- * and advances the file pointer.
- *
- * @return integer
- */
- public function readByte()
- {
- return ord($this->_fread(1));
- }
- /**
- * Writes a byte to the end of the file.
- *
- * @param integer $byte
- */
- public function writeByte($byte)
- {
- return $this->_fwrite(chr($byte), 1);
- }
- /**
- * Read num bytes from the current position in the file
- * and advances the file pointer.
- *
- * @param integer $num
- * @return string
- */
- public function readBytes($num)
- {
- return $this->_fread($num);
- }
- /**
- * Writes num bytes of data (all, if $num===null) to the end
- * of the string.
- *
- * @param string $data
- * @param integer $num
- */
- public function writeBytes($data, $num=null)
- {
- $this->_fwrite($data, $num);
- }
- /**
- * Reads an integer from the current position in the file
- * and advances the file pointer.
- *
- * @return integer
- */
- public function readInt()
- {
- $str = $this->_fread(4);
- return ord($str[0]) << 24 |
- ord($str[1]) << 16 |
- ord($str[2]) << 8 |
- ord($str[3]);
- }
- /**
- * Writes an integer to the end of file.
- *
- * @param integer $value
- */
- public function writeInt($value)
- {
- settype($value, 'integer');
- $this->_fwrite( chr($value>>24 & 0xFF) .
- chr($value>>16 & 0xFF) .
- chr($value>>8 & 0xFF) .
- chr($value & 0xFF), 4 );
- }
- /**
- * Returns a long integer from the current position in the file
- * and advances the file pointer.
- *
- * @return integer
- * @throws Zend_Search_Lucene_Exception
- */
- public function readLong()
- {
- $str = $this->_fread(8);
- /**
- * Check, that we work in 64-bit mode.
- * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
- */
- if (PHP_INT_SIZE > 4) {
- return ord($str[0]) << 56 |
- ord($str[1]) << 48 |
- ord($str[2]) << 40 |
- ord($str[3]) << 32 |
- ord($str[4]) << 24 |
- ord($str[5]) << 16 |
- ord($str[6]) << 8 |
- ord($str[7]);
- } else {
- if ((ord($str[0]) != 0) ||
- (ord($str[1]) != 0) ||
- (ord($str[2]) != 0) ||
- (ord($str[3]) != 0) ||
- ((ord($str[0]) & 0x80) != 0)) {
- throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
- }
- return ord($str[4]) << 24 |
- ord($str[5]) << 16 |
- ord($str[6]) << 8 |
- ord($str[7]);
- }
- }
- /**
- * Writes long integer to the end of file
- *
- * @param integer $value
- * @throws Zend_Search_Lucene_Exception
- */
- public function writeLong($value)
- {
- /**
- * Check, that we work in 64-bit mode.
- * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
- */
- if (PHP_INT_SIZE > 4) {
- settype($value, 'integer');
- $this->_fwrite( chr($value>>56 & 0xFF) .
- chr($value>>48 & 0xFF) .
- chr($value>>40 & 0xFF) .
- chr($value>>32 & 0xFF) .
- chr($value>>24 & 0xFF) .
- chr($value>>16 & 0xFF) .
- chr($value>>8 & 0xFF) .
- chr($value & 0xFF), 8 );
- } else {
- if ($value > 0x7FFFFFFF) {
- throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
- }
- $this->_fwrite( "\x00\x00\x00\x00" .
- chr($value>>24 & 0xFF) .
- chr($value>>16 & 0xFF) .
- chr($value>>8 & 0xFF) .
- chr($value & 0xFF), 8 );
- }
- }
- /**
- * Returns a variable-length integer from the current
- * position in the file and advances the file pointer.
- *
- * @return integer
- */
- public function readVInt()
- {
- $nextByte = ord($this->_fread(1));
- $val = $nextByte & 0x7F;
- for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
- $nextByte = ord($this->_fread(1));
- $val |= ($nextByte & 0x7F) << $shift;
- }
- return $val;
- }
- /**
- * Writes a variable-length integer to the end of file.
- *
- * @param integer $value
- */
- public function writeVInt($value)
- {
- settype($value, 'integer');
- while ($value > 0x7F) {
- $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
- $value >>= 7;
- }
- $this->_fwrite(chr($value));
- }
- /**
- * Reads a string from the current position in the file
- * and advances the file pointer.
- *
- * @return string
- */
- public function readString()
- {
- $strlen = $this->readVInt();
- if ($strlen == 0) {
- return '';
- } else {
- /**
- * This implementation supports only Basic Multilingual Plane
- * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
- * "supplementary characters" (characters whose code points are
- * greater than 0xFFFF)
- * Java 2 represents these characters as a pair of char (16-bit)
- * values, the first from the high-surrogates range (0xD800-0xDBFF),
- * the second from the low-surrogates range (0xDC00-0xDFFF). Then
- * they are encoded as usual UTF-8 characters in six bytes.
- * Standard UTF-8 representation uses four bytes for supplementary
- * characters.
- */
- $str_val = $this->_fread($strlen);
- for ($count = 0; $count < $strlen; $count++ ) {
- if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
- $addBytes = 1;
- if (ord($str_val[$count]) & 0x20 ) {
- $addBytes++;
- // Never used. Java2 doesn't encode strings in four bytes
- if (ord($str_val[$count]) & 0x10 ) {
- $addBytes++;
- }
- }
- $str_val .= $this->_fread($addBytes);
- $strlen += $addBytes;
- // Check for null character. Java2 encodes null character
- // in two bytes.
- if (ord($str_val[$count]) == 0xC0 &&
- ord($str_val[$count+1]) == 0x80 ) {
- $str_val[$count] = 0;
- $str_val = substr($str_val,0,$count+1)
- . substr($str_val,$count+2);
- }
- $count += $addBytes;
- }
- }
- return $str_val;
- }
- }
- /**
- * Writes a string to the end of file.
- *
- * @param string $str
- * @throws Zend_Search_Lucene_Exception
- */
- public function writeString($str)
- {
- /**
- * This implementation supports only Basic Multilingual Plane
- * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
- * "supplementary characters" (characters whose code points are
- * greater than 0xFFFF)
- * Java 2 represents these characters as a pair of char (16-bit)
- * values, the first from the high-surrogates range (0xD800-0xDBFF),
- * the second from the low-surrogates range (0xDC00-0xDFFF). Then
- * they are encoded as usual UTF-8 characters in six bytes.
- * Standard UTF-8 representation uses four bytes for supplementary
- * characters.
- */
- // convert input to a string before iterating string characters
- settype($str, 'string');
- $chars = $strlen = strlen($str);
- $containNullChars = false;
- for ($count = 0; $count < $strlen; $count++ ) {
- /**
- * String is already in Java 2 representation.
- * We should only calculate actual string length and replace
- * \x00 by \xC0\x80
- */
- if ((ord($str[$count]) & 0xC0) == 0xC0) {
- $addBytes = 1;
- if (ord($str[$count]) & 0x20 ) {
- $addBytes++;
- // Never used. Java2 doesn't encode strings in four bytes
- // and we dont't support non-BMP characters
- if (ord($str[$count]) & 0x10 ) {
- $addBytes++;
- }
- }
- $chars -= $addBytes;
- if (ord($str[$count]) == 0 ) {
- $containNullChars = true;
- }
- $count += $addBytes;
- }
- }
- if ($chars < 0) {
- throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
- }
- $this->writeVInt($chars);
- if ($containNullChars) {
- $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
- } else {
- $this->_fwrite($str);
- }
- }
- /**
- * Reads binary data from the current position in the file
- * and advances the file pointer.
- *
- * @return string
- */
- public function readBinary()
- {
- return $this->_fread($this->readVInt());
- }
- }