PageRenderTime 26ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Zend/Search/Lucene/Storage/File/AbstractFile.php

https://github.com/Exercise/zf2
PHP | 411 lines | 194 code | 48 blank | 169 comment | 40 complexity | a066292161f55db9811aa4d9d4a3eea6 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Storage
  18. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. * @version $Id$
  21. */
  22. /**
  23. * @namespace
  24. */
  25. namespace Zend\Search\Lucene\Storage\File;
  26. use Zend\Search\Lucene\Storage\File,
  27. Zend\Search\Lucene;
  28. /**
  29. * @uses \Zend\Search\Lucene\Exception
  30. * @category Zend
  31. * @package Zend_Search_Lucene
  32. * @subpackage Storage
  33. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  34. * @license http://framework.zend.com/license/new-bsd New BSD License
  35. */
  36. abstract class AbstractFile implements File
  37. {
  38. /**
  39. * Reads a byte from the current position in the file
  40. * and advances the file pointer.
  41. *
  42. * @return integer
  43. */
  44. public function readByte()
  45. {
  46. return ord($this->_fread(1));
  47. }
  48. /**
  49. * Writes a byte to the end of the file.
  50. *
  51. * @param integer $byte
  52. */
  53. public function writeByte($byte)
  54. {
  55. return $this->_fwrite(chr($byte), 1);
  56. }
  57. /**
  58. * Read num bytes from the current position in the file
  59. * and advances the file pointer.
  60. *
  61. * @param integer $num
  62. * @return string
  63. */
  64. public function readBytes($num)
  65. {
  66. return $this->_fread($num);
  67. }
  68. /**
  69. * Writes num bytes of data (all, if $num===null) to the end
  70. * of the string.
  71. *
  72. * @param string $data
  73. * @param integer $num
  74. */
  75. public function writeBytes($data, $num=null)
  76. {
  77. $this->_fwrite($data, $num);
  78. }
  79. /**
  80. * Reads an integer from the current position in the file
  81. * and advances the file pointer.
  82. *
  83. * @return integer
  84. */
  85. public function readInt()
  86. {
  87. $str = $this->_fread(4);
  88. return ord($str[0]) << 24 |
  89. ord($str[1]) << 16 |
  90. ord($str[2]) << 8 |
  91. ord($str[3]);
  92. }
  93. /**
  94. * Writes an integer to the end of file.
  95. *
  96. * @param integer $value
  97. */
  98. public function writeInt($value)
  99. {
  100. settype($value, 'integer');
  101. $this->_fwrite( chr($value>>24 & 0xFF) .
  102. chr($value>>16 & 0xFF) .
  103. chr($value>>8 & 0xFF) .
  104. chr($value & 0xFF), 4 );
  105. }
  106. /**
  107. * Returns a long integer from the current position in the file
  108. * and advances the file pointer.
  109. *
  110. * @return integer|float
  111. * @throws \Zend\Search\Lucene\Exception
  112. */
  113. public function readLong()
  114. {
  115. /**
  116. * Check, that we work in 64-bit mode.
  117. * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
  118. */
  119. if (PHP_INT_SIZE > 4) {
  120. $str = $this->_fread(8);
  121. return ord($str[0]) << 56 |
  122. ord($str[1]) << 48 |
  123. ord($str[2]) << 40 |
  124. ord($str[3]) << 32 |
  125. ord($str[4]) << 24 |
  126. ord($str[5]) << 16 |
  127. ord($str[6]) << 8 |
  128. ord($str[7]);
  129. } else {
  130. return $this->_readLong32Bit();
  131. }
  132. }
  133. /**
  134. * Writes long integer to the end of file
  135. *
  136. * @param integer $value
  137. * @throws \Zend\Search\Lucene\Exception
  138. */
  139. public function writeLong($value)
  140. {
  141. /**
  142. * Check, that we work in 64-bit mode.
  143. * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
  144. */
  145. if (PHP_INT_SIZE > 4) {
  146. settype($value, 'integer');
  147. $this->_fwrite( chr($value>>56 & 0xFF) .
  148. chr($value>>48 & 0xFF) .
  149. chr($value>>40 & 0xFF) .
  150. chr($value>>32 & 0xFF) .
  151. chr($value>>24 & 0xFF) .
  152. chr($value>>16 & 0xFF) .
  153. chr($value>>8 & 0xFF) .
  154. chr($value & 0xFF), 8 );
  155. } else {
  156. $this->_writeLong32Bit($value);
  157. }
  158. }
  159. /**
  160. * Returns a long integer from the current position in the file,
  161. * advances the file pointer and return it as float (for 32-bit platforms).
  162. *
  163. * @return integer|float
  164. * @throws \Zend\Search\Lucene\Exception
  165. */
  166. protected function _readLong32Bit()
  167. {
  168. $wordHigh = $this->readInt();
  169. $wordLow = $this->readInt();
  170. if ($wordHigh & (int)0x80000000) {
  171. // It's a negative value since the highest bit is set
  172. if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
  173. return $wordLow;
  174. } else {
  175. throw new Lucene\Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
  176. }
  177. }
  178. if ($wordLow < 0) {
  179. // Value is large than 0x7FFF FFFF. Represent low word as float.
  180. $wordLow &= 0x7FFFFFFF;
  181. $wordLow += (float)0x80000000;
  182. }
  183. if ($wordHigh == 0) {
  184. // Return value as integer if possible
  185. return $wordLow;
  186. }
  187. return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
  188. }
  189. /**
  190. * Writes long integer to the end of file (32-bit platforms implementation)
  191. *
  192. * @param integer|float $value
  193. * @throws \Zend\Search\Lucene\Exception
  194. */
  195. protected function _writeLong32Bit($value)
  196. {
  197. if ($value < (int)0x80000000) {
  198. throw new Lucene\Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
  199. }
  200. if ($value < 0) {
  201. $wordHigh = (int)0xFFFFFFFF;
  202. $wordLow = (int)$value;
  203. } else {
  204. $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
  205. $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
  206. if ($wordLow > 0x7FFFFFFF) {
  207. // Highest bit of low word is set. Translate it to the corresponding negative integer value
  208. $wordLow -= 0x80000000;
  209. $wordLow |= 0x80000000;
  210. }
  211. }
  212. $this->writeInt($wordHigh);
  213. $this->writeInt($wordLow);
  214. }
  215. /**
  216. * Returns a variable-length integer from the current
  217. * position in the file and advances the file pointer.
  218. *
  219. * @return integer
  220. */
  221. public function readVInt()
  222. {
  223. $nextByte = ord($this->_fread(1));
  224. $val = $nextByte & 0x7F;
  225. for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
  226. $nextByte = ord($this->_fread(1));
  227. $val |= ($nextByte & 0x7F) << $shift;
  228. }
  229. return $val;
  230. }
  231. /**
  232. * Writes a variable-length integer to the end of file.
  233. *
  234. * @param integer $value
  235. */
  236. public function writeVInt($value)
  237. {
  238. settype($value, 'integer');
  239. while ($value > 0x7F) {
  240. $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
  241. $value >>= 7;
  242. }
  243. $this->_fwrite(chr($value));
  244. }
  245. /**
  246. * Reads a string from the current position in the file
  247. * and advances the file pointer.
  248. *
  249. * @return string
  250. */
  251. public function readString()
  252. {
  253. $strlen = $this->readVInt();
  254. if ($strlen == 0) {
  255. return '';
  256. } else {
  257. /**
  258. * This implementation supports only Basic Multilingual Plane
  259. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  260. * "supplementary characters" (characters whose code points are
  261. * greater than 0xFFFF)
  262. * Java 2 represents these characters as a pair of char (16-bit)
  263. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  264. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  265. * they are encoded as usual UTF-8 characters in six bytes.
  266. * Standard UTF-8 representation uses four bytes for supplementary
  267. * characters.
  268. */
  269. $str_val = $this->_fread($strlen);
  270. for ($count = 0; $count < $strlen; $count++ ) {
  271. if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
  272. $addBytes = 1;
  273. if (ord($str_val[$count]) & 0x20 ) {
  274. $addBytes++;
  275. // Never used. Java2 doesn't encode strings in four bytes
  276. if (ord($str_val[$count]) & 0x10 ) {
  277. $addBytes++;
  278. }
  279. }
  280. $str_val .= $this->_fread($addBytes);
  281. $strlen += $addBytes;
  282. // Check for null character. Java2 encodes null character
  283. // in two bytes.
  284. if (ord($str_val[$count]) == 0xC0 &&
  285. ord($str_val[$count+1]) == 0x80 ) {
  286. $str_val[$count] = 0;
  287. $str_val = substr($str_val,0,$count+1)
  288. . substr($str_val,$count+2);
  289. }
  290. $count += $addBytes;
  291. }
  292. }
  293. return $str_val;
  294. }
  295. }
  296. /**
  297. * Writes a string to the end of file.
  298. *
  299. * @param string $str
  300. * @throws \Zend\Search\Lucene\Exception
  301. */
  302. public function writeString($str)
  303. {
  304. /**
  305. * This implementation supports only Basic Multilingual Plane
  306. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  307. * "supplementary characters" (characters whose code points are
  308. * greater than 0xFFFF)
  309. * Java 2 represents these characters as a pair of char (16-bit)
  310. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  311. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  312. * they are encoded as usual UTF-8 characters in six bytes.
  313. * Standard UTF-8 representation uses four bytes for supplementary
  314. * characters.
  315. */
  316. // convert input to a string before iterating string characters
  317. settype($str, 'string');
  318. $chars = $strlen = strlen($str);
  319. $containNullChars = false;
  320. for ($count = 0; $count < $strlen; $count++ ) {
  321. /**
  322. * String is already in Java 2 representation.
  323. * We should only calculate actual string length and replace
  324. * \x00 by \xC0\x80
  325. */
  326. if ((ord($str[$count]) & 0xC0) == 0xC0) {
  327. $addBytes = 1;
  328. if (ord($str[$count]) & 0x20 ) {
  329. $addBytes++;
  330. // Never used. Java2 doesn't encode strings in four bytes
  331. // and we dont't support non-BMP characters
  332. if (ord($str[$count]) & 0x10 ) {
  333. $addBytes++;
  334. }
  335. }
  336. $chars -= $addBytes;
  337. if (ord($str[$count]) == 0 ) {
  338. $containNullChars = true;
  339. }
  340. $count += $addBytes;
  341. }
  342. }
  343. if ($chars < 0) {
  344. throw new Lucene\Exception('Invalid UTF-8 string');
  345. }
  346. $this->writeVInt($chars);
  347. if ($containNullChars) {
  348. $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
  349. } else {
  350. $this->_fwrite($str);
  351. }
  352. }
  353. /**
  354. * Reads binary data from the current position in the file
  355. * and advances the file pointer.
  356. *
  357. * @return string
  358. */
  359. public function readBinary()
  360. {
  361. return $this->_fread($this->readVInt());
  362. }
  363. }