PageRenderTime 26ms CodeModel.GetById 30ms RepoModel.GetById 1ms app.codeStats 0ms

/standard/tags/release-0.1.5/library/Zend/Search/Lucene/Storage/File.php

https://github.com/bhaumik25/zend-framework
PHP | 371 lines | 142 code | 42 blank | 187 comment | 24 complexity | 6244e183757ae3519518e7333a7ce25e MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Search_Lucene
  17. * @subpackage Storage
  18. * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
  19. * @license http://framework.zend.com/license/new-bsd New BSD License
  20. */
  21. /**
  22. * @category Zend
  23. * @package Zend_Search_Lucene
  24. * @subpackage Storage
  25. * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
  26. * @license http://framework.zend.com/license/new-bsd New BSD License
  27. */
  28. abstract class Zend_Search_Lucene_Storage_File
  29. {
  30. /**
  31. * Reads $length number of bytes at the current position in the
  32. * file and advances the file pointer.
  33. *
  34. * @param integer $length
  35. * @return string
  36. */
  37. abstract protected function _fread($length=1);
  38. /**
  39. * Sets the file position indicator and advances the file pointer.
  40. * The new position, measured in bytes from the beginning of the file,
  41. * is obtained by adding offset to the position specified by whence,
  42. * whose values are defined as follows:
  43. * SEEK_SET - Set position equal to offset bytes.
  44. * SEEK_CUR - Set position to current location plus offset.
  45. * SEEK_END - Set position to end-of-file plus offset. (To move to
  46. * a position before the end-of-file, you need to pass a negative value
  47. * in offset.)
  48. * Upon success, returns 0; otherwise, returns -1
  49. *
  50. * @param integer $offset
  51. * @param integer $whence
  52. * @return integer
  53. */
  54. abstract public function seek($offset, $whence=SEEK_SET);
  55. /**
  56. * Get file position.
  57. *
  58. * @return integer
  59. */
  60. abstract public function tell();
  61. /**
  62. * Writes $length number of bytes (all, if $length===null) to the end
  63. * of the file.
  64. *
  65. * @param string $data
  66. * @param integer $length
  67. */
  68. abstract protected function _fwrite($data, $length=null);
  69. /**
  70. * Reads a byte from the current position in the file
  71. * and advances the file pointer.
  72. *
  73. * @return integer
  74. */
  75. public function readByte()
  76. {
  77. return ord($this->_fread(1));
  78. }
  79. /**
  80. * Writes a byte to the end of the file.
  81. *
  82. * @param integer $byte
  83. */
  84. public function writeByte($byte)
  85. {
  86. return $this->_fwrite(chr($byte), 1);
  87. }
  88. /**
  89. * Read num bytes from the current position in the file
  90. * and advances the file pointer.
  91. *
  92. * @param integer $num
  93. * @return string
  94. */
  95. public function readBytes($num)
  96. {
  97. return $this->_fread($num);
  98. }
  99. /**
  100. * Writes num bytes of data (all, if $num===null) to the end
  101. * of the string.
  102. *
  103. * @param string $data
  104. * @param integer $num
  105. */
  106. public function writeBytes($data, $num=null)
  107. {
  108. $this->_fwrite($data, $num);
  109. }
  110. /**
  111. * Reads an integer from the current position in the file
  112. * and advances the file pointer.
  113. *
  114. * @return integer
  115. */
  116. public function readInt()
  117. {
  118. $str = $this->_fread(4);
  119. return ord($str{0}) << 24 |
  120. ord($str{1}) << 16 |
  121. ord($str{2}) << 8 |
  122. ord($str{3});
  123. }
  124. /**
  125. * Writes an integer to the end of file.
  126. *
  127. * @param integer $value
  128. */
  129. public function writeInt($value)
  130. {
  131. settype($value, 'integer');
  132. $this->_fwrite( chr($value>>24 & 0xFF) .
  133. chr($value>>16 & 0xFF) .
  134. chr($value>>8 & 0xFF) .
  135. chr($value & 0xFF), 4 );
  136. }
  137. /**
  138. * Returns a long integer from the current position in the file
  139. * and advances the file pointer.
  140. *
  141. * @return integer
  142. */
  143. public function readLong()
  144. {
  145. $str = $this->_fread(8);
  146. /**
  147. * PHP uses long as largest integer. fseek() uses long for offset.
  148. * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
  149. * conversion to float.
  150. * So, largest index segment file is 2Gb
  151. */
  152. return /* ord($str{0}) << 56 | */
  153. /* ord($str{1}) << 48 | */
  154. /* ord($str{2}) << 40 | */
  155. /* ord($str{3}) << 32 | */
  156. ord($str{4}) << 24 |
  157. ord($str{5}) << 16 |
  158. ord($str{6}) << 8 |
  159. ord($str{7});
  160. }
  161. /**
  162. * Writes long integer to the end of file
  163. *
  164. * @param integer $value
  165. */
  166. public function writeLong($value)
  167. {
  168. /**
  169. * PHP uses long as largest integer. fseek() uses long for offset.
  170. * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
  171. * conversion to float.
  172. * So, largest index segment file is 2Gb
  173. */
  174. settype($value, 'integer');
  175. $this->_fwrite( "\x00\x00\x00\x00" .
  176. chr($value>>24 & 0xFF) .
  177. chr($value>>16 & 0xFF) .
  178. chr($value>>8 & 0xFF) .
  179. chr($value & 0xFF), 8 );
  180. }
  181. /**
  182. * Returns a variable-length integer from the current
  183. * position in the file and advances the file pointer.
  184. *
  185. * @return integer
  186. */
  187. public function readVInt()
  188. {
  189. $nextByte = ord($this->_fread(1));
  190. $val = $nextByte & 0x7F;
  191. for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
  192. $nextByte = ord($this->_fread(1));
  193. $val |= ($nextByte & 0x7F) << $shift;
  194. }
  195. return $val;
  196. }
  197. /**
  198. * Writes a variable-length integer to the end of file.
  199. *
  200. * @param integer $value
  201. */
  202. public function writeVInt($value)
  203. {
  204. settype($value, 'integer');
  205. while ($value > 0x7F) {
  206. $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
  207. $value >>= 7;
  208. }
  209. $this->_fwrite(chr($value));
  210. }
  211. /**
  212. * Reads a string from the current position in the file
  213. * and advances the file pointer.
  214. *
  215. * @return string
  216. */
  217. public function readString()
  218. {
  219. $strlen = $this->readVInt();
  220. if ($strlen == 0) {
  221. return '';
  222. } else {
  223. /**
  224. * This implementation supports only Basic Multilingual Plane
  225. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  226. * "supplementary characters" (characters whose code points are
  227. * greater than 0xFFFF)
  228. * Java 2 represents these characters as a pair of char (16-bit)
  229. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  230. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  231. * they are encoded as usual UTF-8 characters in six bytes.
  232. * Standard UTF-8 representation uses four bytes for supplementary
  233. * characters.
  234. */
  235. $str_val = $this->_fread($strlen);
  236. for ($count = 0; $count < $strlen; $count++ ) {
  237. if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {
  238. $addBytes = 1;
  239. if (ord($str_val{$count}) & 0x20 ) {
  240. $addBytes++;
  241. // Never used. Java2 doesn't encode strings in four bytes
  242. if (ord($str_val{$count}) & 0x10 ) {
  243. $addBytes++;
  244. }
  245. }
  246. $str_val .= $this->_fread($addBytes);
  247. $strlen += $addBytes;
  248. // Check for null character. Java2 encodes null character
  249. // in two bytes.
  250. if (ord($str_val{$count}) == 0xC0 &&
  251. ord($str_val{$count+1}) == 0x80 ) {
  252. $str_val{$count} = 0;
  253. $str_val = substr($str_val,0,$count+1)
  254. . substr($str_val,$count+2);
  255. }
  256. $count += $addBytes;
  257. }
  258. }
  259. return $str_val;
  260. }
  261. }
  262. /**
  263. * Writes a string to the end of file.
  264. *
  265. * @param string $str
  266. * @throws Zend_Search_Lucene_Exception
  267. */
  268. public function writeString($str)
  269. {
  270. /**
  271. * This implementation supports only Basic Multilingual Plane
  272. * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
  273. * "supplementary characters" (characters whose code points are
  274. * greater than 0xFFFF)
  275. * Java 2 represents these characters as a pair of char (16-bit)
  276. * values, the first from the high-surrogates range (0xD800-0xDBFF),
  277. * the second from the low-surrogates range (0xDC00-0xDFFF). Then
  278. * they are encoded as usual UTF-8 characters in six bytes.
  279. * Standard UTF-8 representation uses four bytes for supplementary
  280. * characters.
  281. */
  282. // convert input to a string before iterating string characters
  283. settype($str, 'string');
  284. $chars = $strlen = strlen($str);
  285. $containNullChars = false;
  286. for ($count = 0; $count < $strlen; $count++ ) {
  287. /**
  288. * String is already in Java 2 representation.
  289. * We should only calculate actual string length and replace
  290. * \x00 by \xC0\x80
  291. */
  292. if ((ord($str{$count}) & 0xC0) == 0xC0) {
  293. $addBytes = 1;
  294. if (ord($str{$count}) & 0x20 ) {
  295. $addBytes++;
  296. // Never used. Java2 doesn't encode strings in four bytes
  297. // and we dont't support non-BMP characters
  298. if (ord($str{$count}) & 0x10 ) {
  299. $addBytes++;
  300. }
  301. }
  302. $chars -= $addBytes;
  303. if (ord($str{$count}) == 0 ) {
  304. $containNullChars = true;
  305. }
  306. $count += $addBytes;
  307. }
  308. }
  309. if ($chars < 0) {
  310. throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
  311. }
  312. $this->writeVInt($chars);
  313. if ($containNullChars) {
  314. $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
  315. } else {
  316. $this->_fwrite($str);
  317. }
  318. }
  319. /**
  320. * Reads binary data from the current position in the file
  321. * and advances the file pointer.
  322. *
  323. * @return string
  324. */
  325. public function readBinary()
  326. {
  327. return $this->_fread($this->readVInt());
  328. }
  329. }