PageRenderTime 46ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/protected/vendors/Zend/ZendSearch/Lucene/Storage/File/AbstractFile.php

https://github.com/kayasax/atlas
PHP | 395 lines | 199 code | 47 blank | 149 comment | 40 complexity | 1bfae4b45f7a520b7458d7bd7f01e008 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-3.0, LGPL-3.0, LGPL-2.1
  1. <?php
  2. /**
  3. * Zend Framework (http://framework.zend.com/)
  4. *
  5. * @link http://github.com/zendframework/zf2 for the canonical source repository
  6. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7. * @license http://framework.zend.com/license/new-bsd New BSD License
  8. * @package Zend_Search
  9. */
  10. namespace ZendSearch\Lucene\Storage\File;
  11. require_once 'Zend\ZendSearch\Lucene\Storage\File\FileInterface.php';
  12. use ZendSearch\Lucene;
  13. use ZendSearch\Lucene\Storage\File;
  14. /**
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Storage
  18. */
  19. abstract class AbstractFile implements FileInterface
  20. {
  21. /**
  22. * Reads a byte from the current position in the file
  23. * and advances the file pointer.
  24. *
  25. * @return integer
  26. */
  27. public function readByte()
  28. {
  29. return ord($this->_fread(1));
  30. }
  31. /**
  32. * Writes a byte to the end of the file.
  33. *
  34. * @param integer $byte
  35. */
  36. public function writeByte($byte)
  37. {
  38. return $this->_fwrite(chr($byte), 1);
  39. }
  40. /**
  41. * Read num bytes from the current position in the file
  42. * and advances the file pointer.
  43. *
  44. * @param integer $num
  45. * @return string
  46. */
  47. public function readBytes($num)
  48. {
  49. return $this->_fread($num);
  50. }
  51. /**
  52. * Writes num bytes of data (all, if $num===null) to the end
  53. * of the string.
  54. *
  55. * @param string $data
  56. * @param integer $num
  57. */
  58. public function writeBytes($data, $num=null)
  59. {
  60. $this->_fwrite($data, $num);
  61. }
  62. /**
  63. * Reads an integer from the current position in the file
  64. * and advances the file pointer.
  65. *
  66. * @return integer
  67. */
  68. public function readInt()
  69. {
  70. $str = $this->_fread(4);
  71. return ord($str[0]) << 24 |
  72. ord($str[1]) << 16 |
  73. ord($str[2]) << 8 |
  74. ord($str[3]);
  75. }
  76. /**
  77. * Writes an integer to the end of file.
  78. *
  79. * @param integer $value
  80. */
  81. public function writeInt($value)
  82. {
  83. settype($value, 'integer');
  84. $this->_fwrite( chr($value>>24 & 0xFF) .
  85. chr($value>>16 & 0xFF) .
  86. chr($value>>8 & 0xFF) .
  87. chr($value & 0xFF), 4 );
  88. }
  89. /**
  90. * Returns a long integer from the current position in the file
  91. * and advances the file pointer.
  92. *
  93. * @return integer|float
  94. */
  95. public function readLong()
  96. {
  97. /**
  98. * Check, that we work in 64-bit mode.
  99. * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
  100. */
  101. if (PHP_INT_SIZE > 4) {
  102. $str = $this->_fread(8);
  103. return ord($str[0]) << 56 |
  104. ord($str[1]) << 48 |
  105. ord($str[2]) << 40 |
  106. ord($str[3]) << 32 |
  107. ord($str[4]) << 24 |
  108. ord($str[5]) << 16 |
  109. ord($str[6]) << 8 |
  110. ord($str[7]);
  111. } else {
  112. return $this->_readLong32Bit();
  113. }
  114. }
  115. /**
  116. * Writes long integer to the end of file
  117. *
  118. * @param integer $value
  119. */
  120. public function writeLong($value)
  121. {
  122. /**
  123. * Check, that we work in 64-bit mode.
  124. * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
  125. */
  126. if (PHP_INT_SIZE > 4) {
  127. settype($value, 'integer');
  128. $this->_fwrite( chr($value>>56 & 0xFF) .
  129. chr($value>>48 & 0xFF) .
  130. chr($value>>40 & 0xFF) .
  131. chr($value>>32 & 0xFF) .
  132. chr($value>>24 & 0xFF) .
  133. chr($value>>16 & 0xFF) .
  134. chr($value>>8 & 0xFF) .
  135. chr($value & 0xFF), 8 );
  136. } else {
  137. $this->_writeLong32Bit($value);
  138. }
  139. }
  140. /**
  141. * Returns a long integer from the current position in the file,
  142. * advances the file pointer and return it as float (for 32-bit platforms).
  143. *
  144. * @throws \ZendSearch\Lucene\Exception\RuntimeException
  145. * @return integer|float
  146. */
  147. protected function _readLong32Bit()
  148. {
  149. $wordHigh = $this->readInt();
  150. $wordLow = $this->readInt();
  151. if ($wordHigh & (int)0x80000000) {
  152. // It's a negative value since the highest bit is set
  153. if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
  154. return $wordLow;
  155. } else {
  156. throw new Lucene\Exception\RuntimeException(
  157. 'Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'
  158. );
  159. }
  160. }
  161. if ($wordLow < 0) {
  162. // Value is large than 0x7FFF FFFF. Represent low word as float.
  163. $wordLow &= 0x7FFFFFFF;
  164. $wordLow += (float)0x80000000;
  165. }
  166. if ($wordHigh == 0) {
  167. // Return value as integer if possible
  168. return $wordLow;
  169. }
  170. return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
  171. }
  172. /**
  173. * Writes long integer to the end of file (32-bit platforms implementation)
  174. *
  175. * @param integer|float $value
  176. * @throws \ZendSearch\Lucene\Exception\RuntimeException
  177. */
  178. protected function _writeLong32Bit($value)
  179. {
  180. if ($value < (int)0x80000000) {
  181. throw new Lucene\Exception\RuntimeException(
  182. 'Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'
  183. );
  184. }
  185. if ($value < 0) {
  186. $wordHigh = (int)0xFFFFFFFF;
  187. $wordLow = (int)$value;
  188. } else {
  189. $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
  190. $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
  191. if ($wordLow > 0x7FFFFFFF) {
  192. // Highest bit of low word is set. Translate it to the corresponding negative integer value
  193. $wordLow -= 0x80000000;
  194. $wordLow |= 0x80000000;
  195. }
  196. }
  197. $this->writeInt($wordHigh);
  198. $this->writeInt($wordLow);
  199. }
  200. /**
  201. * Returns a variable-length integer from the current
  202. * position in the file and advances the file pointer.
  203. *
  204. * @return integer
  205. */
  206. public function readVInt()
  207. {
  208. $nextByte = ord($this->_fread(1));
  209. $val = $nextByte & 0x7F;
  210. for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
  211. $nextByte = ord($this->_fread(1));
  212. $val |= ($nextByte & 0x7F) << $shift;
  213. }
  214. return $val;
  215. }
  216. /**
  217. * Writes a variable-length integer to the end of file.
  218. *
  219. * @param integer $value
  220. */
  221. public function writeVInt($value)
  222. {
  223. settype($value, 'integer');
  224. while ($value > 0x7F) {
  225. $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
  226. $value >>= 7;
  227. }
  228. $this->_fwrite(chr($value));
  229. }
  230. /**
  231. * Reads a string from the current position in the file
  232. * and advances the file pointer.
  233. *
  234. * @return string
  235. */
  236. public function readString()
  237. {
  238. $strlen = $this->readVInt();
  239. if ($strlen == 0) {
  240. return '';
  241. } else {
  242. /**
  243. * This implementation supports only Basic Multilingual Plane
  244. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  245. * "supplementary characters" (characters whose code points are
  246. * greater than 0xFFFF)
  247. * Java 2 represents these characters as a pair of char (16-bit)
  248. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  249. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  250. * they are encoded as usual UTF-8 characters in six bytes.
  251. * Standard UTF-8 representation uses four bytes for supplementary
  252. * characters.
  253. */
  254. $str_val = $this->_fread($strlen);
  255. for ($count = 0; $count < $strlen; $count++ ) {
  256. if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
  257. $addBytes = 1;
  258. if (ord($str_val[$count]) & 0x20 ) {
  259. $addBytes++;
  260. // Never used. Java2 doesn't encode strings in four bytes
  261. if (ord($str_val[$count]) & 0x10 ) {
  262. $addBytes++;
  263. }
  264. }
  265. $str_val .= $this->_fread($addBytes);
  266. $strlen += $addBytes;
  267. // Check for null character. Java2 encodes null character
  268. // in two bytes.
  269. if (ord($str_val[$count]) == 0xC0 &&
  270. ord($str_val[$count+1]) == 0x80 ) {
  271. $str_val[$count] = 0;
  272. $str_val = substr($str_val,0,$count+1)
  273. . substr($str_val,$count+2);
  274. }
  275. $count += $addBytes;
  276. }
  277. }
  278. return $str_val;
  279. }
  280. }
  281. /**
  282. * Writes a string to the end of file.
  283. *
  284. * @param string $str
  285. * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException
  286. */
  287. public function writeString($str)
  288. {
  289. /**
  290. * This implementation supports only Basic Multilingual Plane
  291. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  292. * "supplementary characters" (characters whose code points are
  293. * greater than 0xFFFF)
  294. * Java 2 represents these characters as a pair of char (16-bit)
  295. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  296. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  297. * they are encoded as usual UTF-8 characters in six bytes.
  298. * Standard UTF-8 representation uses four bytes for supplementary
  299. * characters.
  300. */
  301. // convert input to a string before iterating string characters
  302. settype($str, 'string');
  303. $chars = $strlen = strlen($str);
  304. $containNullChars = false;
  305. for ($count = 0; $count < $strlen; $count++ ) {
  306. /**
  307. * String is already in Java 2 representation.
  308. * We should only calculate actual string length and replace
  309. * \x00 by \xC0\x80
  310. */
  311. if ((ord($str[$count]) & 0xC0) == 0xC0) {
  312. $addBytes = 1;
  313. if (ord($str[$count]) & 0x20 ) {
  314. $addBytes++;
  315. // Never used. Java2 doesn't encode strings in four bytes
  316. // and we dont't support non-BMP characters
  317. if (ord($str[$count]) & 0x10 ) {
  318. $addBytes++;
  319. }
  320. }
  321. $chars -= $addBytes;
  322. if (ord($str[$count]) == 0 ) {
  323. $containNullChars = true;
  324. }
  325. $count += $addBytes;
  326. }
  327. }
  328. if ($chars < 0) {
  329. throw new Lucene\Exception\InvalidArgumentException('Invalid UTF-8 string');
  330. }
  331. $this->writeVInt($chars);
  332. if ($containNullChars) {
  333. $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
  334. } else {
  335. $this->_fwrite($str);
  336. }
  337. }
  338. /**
  339. * Reads binary data from the current position in the file
  340. * and advances the file pointer.
  341. *
  342. * @return string
  343. */
  344. public function readBinary()
  345. {
  346. return $this->_fread($this->readVInt());
  347. }
  348. }