PageRenderTime 49ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/phpmorphy/src-bak/graminfo/graminfo.php

https://github.com/Oreolek/Togataltu
PHP | 338 lines | 226 code | 51 blank | 61 comment | 11 complexity | c93393e51a96294ed7420a2b1610f699 MD5 | raw file
  1. <?php
  2. /**
  3. * This file is part of phpMorphy library
  4. *
  5. * Copyright c 2007-2008 Kamaev Vladimir <heromantor@users.sourceforge.net>
  6. *
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the
  19. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20. * Boston, MA 02111-1307, USA.
  21. */
  22. interface phpMorphy_GramInfo_Interace {
  23. /**
  24. * Returns langugage for graminfo file
  25. * @return string
  26. */
  27. function getLocale();
  28. /**
  29. * Return encoding for graminfo file
  30. * @return string
  31. */
  32. function getEncoding();
  33. /**
  34. * Return size of character (cp1251 - 1, utf8 - 1, utf16 - 2, utf32 - 4 etc)
  35. * @return int
  36. */
  37. function getCharSize();
  38. /**
  39. * Return end of string value (usually string with \0 value of char_size + 1 length)
  40. * @return string
  41. */
  42. function getEnds();
  43. /**
  44. * Reads graminfo header
  45. *
  46. * @param int $offset
  47. * @return array
  48. */
  49. function readGramInfoHeader($offset);
  50. /**
  51. * Returns size of header struct
  52. */
  53. function getGramInfoHeaderSize();
  54. /**
  55. * Read ancodes section for header retrieved with readGramInfoHeader
  56. *
  57. * @param array $info
  58. * @return array
  59. */
  60. function readAncodes($info);
  61. /**
  62. * Read flexias section for header retrieved with readGramInfoHeader
  63. *
  64. * @param array $info
  65. * @return array
  66. */
  67. function readFlexiaData($info);
  68. /**
  69. * Read all graminfo headers offsets, which can be used latter for readGramInfoHeader method
  70. * @return array
  71. */
  72. function readAllGramInfoOffsets();
  73. function getHeader();
  74. function readAllPartOfSpeech();
  75. function readAllGrammems();
  76. function readAllAncodes();
  77. }
  78. abstract class phpMorphy_GramInfo implements phpMorphy_GramInfo_Interace {
  79. const HEADER_SIZE = 128;
  80. protected
  81. $resource,
  82. $header,
  83. $ends,
  84. $ends_size;
  85. protected function phpMorphy_GramInfo($resource, $header) {
  86. $this->resource = $resource;
  87. $this->header = $header;
  88. $this->ends = str_repeat("\0", $header['char_size'] + 1);
  89. $this->ends_size = $GLOBALS['__phpmorphy_strlen']($this->ends);
  90. }
  91. static function create(phpMorphy_Storage $storage, $lazy) {
  92. if($lazy) {
  93. return new phpMorphy_GramInfo_Proxy($storage);
  94. }
  95. $header = phpMorphy_GramInfo::readHeader(
  96. $storage->read(0, self::HEADER_SIZE)
  97. );
  98. if(!phpMorphy_GramInfo::validateHeader($header)) {
  99. throw new phpMorphy_Exception('Invalid graminfo format');
  100. }
  101. $storage_type = $storage->getTypeAsString();
  102. $file_path = dirname(__FILE__) . "/access/graminfo_{$storage_type}.php";
  103. $clazz = 'phpMorphy_GramInfo_' . ucfirst($storage_type);
  104. require_once($file_path);
  105. return new $clazz($storage->getResource(), $header);
  106. }
  107. function getLocale() {
  108. return $this->header['lang'];
  109. }
  110. function getEncoding() {
  111. return $this->header['encoding'];
  112. }
  113. function getCharSize() {
  114. return $this->header['char_size'];
  115. }
  116. function getEnds() {
  117. return $this->ends;
  118. }
  119. function getHeader() {
  120. return $this->header;
  121. }
  122. static protected function readHeader($headerRaw) {
  123. $header = unpack(
  124. 'Vver/Vis_be/Vflex_count_old/' .
  125. 'Vflex_offset/Vflex_size/Vflex_count/Vflex_index_offset/Vflex_index_size/' .
  126. 'Vposes_offset/Vposes_size/Vposes_count/Vposes_index_offset/Vposes_index_size/' .
  127. 'Vgrammems_offset/Vgrammems_size/Vgrammems_count/Vgrammems_index_offset/Vgrammems_index_size/' .
  128. 'Vancodes_offset/Vancodes_size/Vancodes_count/Vancodes_index_offset/Vancodes_index_size/' .
  129. 'Vchar_size/',
  130. $headerRaw
  131. );
  132. $offset = 24 * 4;
  133. $len = ord($GLOBALS['__phpmorphy_substr']($headerRaw, $offset++, 1));
  134. $header['lang'] = rtrim($GLOBALS['__phpmorphy_substr']($headerRaw, $offset, $len));
  135. $offset += $len;
  136. $len = ord($GLOBALS['__phpmorphy_substr']($headerRaw, $offset++, 1));
  137. $header['encoding'] = rtrim($GLOBALS['__phpmorphy_substr']($headerRaw, $offset, $len));
  138. return $header;
  139. }
  140. static protected function validateHeader($header) {
  141. if(
  142. 3 != $header['ver'] ||
  143. 1 == $header['is_be']
  144. ) {
  145. return false;
  146. }
  147. return true;
  148. }
  149. protected function cleanupCString($string) {
  150. if(false !== ($pos = $GLOBALS['__phpmorphy_strpos']($string, $this->ends))) {
  151. $string = $GLOBALS['__phpmorphy_substr']($string, 0, $pos);
  152. }
  153. return $string;
  154. }
  155. abstract protected function readSectionIndex($offset, $count);
  156. protected function readSectionIndexAsSize($offset, $count, $total_size) {
  157. if(!$count) {
  158. return array();
  159. }
  160. $index = $this->readSectionIndex($offset, $count);
  161. $index[$count] = $index[0] + $total_size;
  162. for($i = 0; $i < $count; $i++) {
  163. $index[$i] = $index[$i + 1] - $index[$i];
  164. }
  165. unset($index[$count]);
  166. return $index;
  167. }
  168. };
  169. class phpMorphy_GramInfo_Decorator implements phpMorphy_GramInfo_Interace {
  170. protected $info;
  171. function phpMorphy_GramInfo_Decorator(phpMorphy_GramInfo_Interace $info) {
  172. $this->info = $info;
  173. }
  174. function readGramInfoHeader($offset) { return $this->info->readGramInfoHeader($offset); }
  175. function getGramInfoHeaderSize() { return $this->info->getGramInfoHeaderSize($offset); }
  176. function readAncodes($info) { return $this->info->readAncodes($info); }
  177. function readFlexiaData($info) { return $this->info->readFlexiaData($info); }
  178. function readAllGramInfoOffsets() { return $this->info->readAllGramInfoOffsets(); }
  179. function readAllPartOfSpeech() { return $this->info->readAllPartOfSpeech(); }
  180. function readAllGrammems() { return $this->info->readAllGrammems(); }
  181. function readAllAncodes() { return $this->info->readAllAncodes(); }
  182. function getLocale() { return $this->info->getLocale(); }
  183. function getEncoding() { return $this->info->getEncoding(); }
  184. function getCharSize() { return $this->info->getCharSize(); }
  185. function getEnds() { return $this->info->getEnds(); }
  186. function getHeader() { return $this->info->getHeader(); }
  187. }
  188. class phpMorphy_GramInfo_Proxy extends phpMorphy_GramInfo_Decorator {
  189. protected $storage;
  190. function __construct(phpMorphy_Storage $storage) {
  191. $this->storage = $storage;
  192. unset($this->info);
  193. }
  194. function __get($propName) {
  195. if($propName == 'info') {
  196. $this->info = phpMorphy_GramInfo::create($this->storage, false);
  197. unset($this->storage);
  198. return $this->info;
  199. }
  200. throw new phpMorphy_Exception("Unknown prop name '$propName'");
  201. }
  202. }
  203. class phpMorphy_GramInfo_Proxy_WithHeader extends phpMorphy_GramInfo_Proxy {
  204. protected
  205. $cache,
  206. $ends;
  207. function __construct(phpMorphy_Storage $storage, $cacheFile) {
  208. parent::__construct($storage);
  209. $this->cache = $this->readCache($cacheFile);
  210. $this->ends = str_repeat("\0", $this->getCharSize() + 1);
  211. }
  212. protected function readCache($fileName) {
  213. if(!is_array($result = include($fileName))) {
  214. throw new phpMorphy_Exception("Can`t get header cache from '$fileName' file'");
  215. }
  216. return $result;
  217. }
  218. function getLocale() {
  219. return $this->cache['lang'];
  220. }
  221. function getEncoding() {
  222. return $this->cache['encoding'];
  223. }
  224. function getCharSize() {
  225. return $this->cache['char_size'];
  226. }
  227. function getEnds() {
  228. return $this->ends;
  229. }
  230. function getHeader() {
  231. return $this->cache;
  232. }
  233. }
  234. class phpMorphy_GramInfo_RuntimeCaching extends phpMorphy_GramInfo_Decorator {
  235. protected
  236. $flexia = array(),
  237. $ancodes = array();
  238. function readFlexiaData($info) {
  239. $offset = $info['offset'];
  240. if(!isset($this->flexia_all[$offset])) {
  241. $this->flexia_all[$offset] = $this->info->readFlexiaData($info);
  242. }
  243. return $this->flexia_all[$offset];
  244. }
  245. }
  246. class phpMorphy_GramInfo_AncodeCache extends phpMorphy_GramInfo_Decorator {
  247. public
  248. $hits = 0,
  249. $miss = 0;
  250. protected
  251. $cache;
  252. function __construct(phpMorphy_GramInfo_Interace $inner, $resource) {
  253. parent::__construct($inner);
  254. if(false === ($this->cache = unserialize($resource->read(0, $resource->getFileSize())))) {
  255. throw new phpMorphy_Exception("Can`t read ancodes cache");
  256. }
  257. }
  258. function readAncodes($info) {
  259. $offset = $info['offset'];
  260. if(isset($this->cache[$offset])) {
  261. $this->hits++;
  262. return $this->cache[$offset];
  263. } else {
  264. // in theory misses never occur
  265. $this->miss++;
  266. return parent::readAncodes($info);
  267. }
  268. }
  269. }