PageRenderTime 52ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/pscws23/dict.class.php

http://student-test.googlecode.com/
PHP | 408 lines | 285 code | 62 blank | 61 comment | 61 complexity | 7ed6b687b16aeba32c73e5d08722eca8 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0
  1. <?php
  2. /* ----------------------------------------------------------------------- *\
  3. PHP? ??????(SCWS 2/3) ???????
  4. -----------------------------------------------------------------------
  5. ??: ???(hightman) (MSN: MingL_Mar@msn.com) (php-QQ?: 17708754)
  6. ??: http://www.ftphp.com/scws
  7. ??: 2005/11/25 (update: 2006/03/06)
  8. ??: 2008/12/20
  9. ??: ???????, ????????????????.
  10. $Id: dict.class.php,v 1.1 2008/12/20 12:03:00 hightman Exp $
  11. -----------------------------------------------------------------------
  12. ??????: ?????????.
  13. ??????: dba(cdb/gdbm):txt(eAccelerator):sqlite(sqlite):xdb(XTreeDB)
  14. , ????????
  15. $dict = new PSCWS23_Dict('dict.xdb');
  16. $dict->load($fpath);
  17. $dict->find();
  18. \* ----------------------------------------------------------------------- */
  19. /**
  20. * ?????????
  21. * ->load($fpath) ????(cdb/gdbm/sqlite/txt)
  22. * ->find($word) ???????
  23. */
  24. class PSCWS23_Dict
  25. {
  26. var $_handler;
  27. var $_cache;
  28. var $query_times;
  29. var $has_cache = true;
  30. // ????, ????????
  31. function PSCWS23_Dict($fpath = '')
  32. {
  33. if ($this->has_cache)
  34. $this->_cache = array();
  35. $this->query_times = 0;
  36. $this->_handler = false;
  37. if ('' != $fpath)
  38. $this->load($fpath);
  39. }
  40. // for PHP5
  41. function __construct($fpath = '') { $this->PSCWS23_Dict($fpath); }
  42. function __destruct() { $this->unload(); }
  43. // ???? (??: ????)
  44. function load($fpath)
  45. {
  46. // ????
  47. if ($this->_handler)
  48. return $this->_handler->_load($fpath);
  49. // ????????
  50. $ext = strtolower(strrchr($fpath, '.'));
  51. if ($ext == '.txt')
  52. {
  53. $this->_handler = new txt_Dictionary($fpath);
  54. }
  55. else if ($ext == '.sqlite')
  56. {
  57. $this->_handler = new sql_Dictionary($fpath);
  58. }
  59. else if ($ext == '.xdb')
  60. {
  61. $this->_handler = new xdb_Dictionary($fpath);
  62. }
  63. else
  64. {
  65. $this->_handler = new dba_Dictionary($fpath);
  66. }
  67. }
  68. // ???????? (-1: not found)
  69. function find($word)
  70. {
  71. if (!$this->_handler)
  72. {
  73. trigger_error('???????????', E_USER_WARNING);
  74. return -1;
  75. }
  76. $this->query_times++;
  77. // check the cache
  78. if ($this->has_cache && isset($this->_cache[$word]))
  79. return $this->_cache[$word];
  80. // query from dictionary
  81. $val = $this->_handler->_find($word);
  82. // convert to integer
  83. $val = (is_bool($val) ? -1 : intval($val));
  84. // save to cache
  85. if ($this->has_cache)
  86. $this->_cache[$word] = $val;
  87. return $val;
  88. }
  89. // unload the dictionary
  90. function unload()
  91. {
  92. if ($this->_handler)
  93. {
  94. $this->_handler->_unload();
  95. $this->_handler = false;
  96. }
  97. if ($this->has_cache)
  98. $this->_cache = array();
  99. }
  100. // ??
  101. function _my_Dictionary()
  102. {
  103. $this->unload();
  104. }
  105. }
  106. /**
  107. * ??????? [_load:_find]
  108. */
  109. // ?? XDB_R, ??? .xdb
  110. class xdb_Dictionary
  111. {
  112. var $_dbh;
  113. function xdb_Dictionary($fpath = '')
  114. {
  115. // ????
  116. if (!require_once(dirname(__FILE__) . '/xdb_r.class.php'))
  117. trigger_error('?? PHP ???? `xdb_r` ???, ???', E_USER_ERROR);
  118. // ?????
  119. $this->_dbh = false;
  120. if ('' != $fpath)
  121. $this->_load($fpath);
  122. }
  123. // for PHP5
  124. function __construct($fpath = '') { $this->xdb_Dictionary($fpath); }
  125. function __destruct() { $this->_unload(); }
  126. function _load($fpath)
  127. {
  128. $db = new XDB_R;
  129. if (!$db->Open($fpath))
  130. trigger_error("??????? xdb ???? `$fpath`", E_USER_ERROR);
  131. else
  132. {
  133. $this->_dbh = $db;
  134. }
  135. }
  136. function _unload()
  137. {
  138. if ($this->_dbh)
  139. {
  140. $this->_dbh->Close();
  141. $this->_dbh = false;
  142. }
  143. }
  144. function _find($word)
  145. {
  146. if (!$this->_dbh)
  147. {
  148. trigger_error('?????????????', E_USER_WARNING);
  149. return -1;
  150. }
  151. return $this->_dbh->Get($word);
  152. }
  153. }
  154. // ?? DBA , ?? cdb/gdbm ?
  155. class dba_Dictionary
  156. {
  157. var $_dbh;
  158. function dba_Dictionary($fpath = '')
  159. {
  160. // ????
  161. if (!extension_loaded('dba'))
  162. trigger_error('?? PHP ???? `dba` ??, ????? PHP', E_USER_ERROR);
  163. // ?????
  164. $this->_dbh = false;
  165. if ('' != $fpath)
  166. $this->_load($fpath);
  167. }
  168. // for PHP5
  169. function __construct($fpath = '') { $this->dba_Dictionary($fpath); }
  170. function __destruct() { $this->_unload(); }
  171. function _load($fpath)
  172. {
  173. $ext = strrchr($fpath, '.');
  174. $type = ($ext ? strtolower(substr($ext, 1)) : 'gdbm');
  175. if (!in_array($type, dba_handlers()))
  176. trigger_error("?? dba ????? `$type` ??????", E_USER_ERROR);
  177. $this->_dbh = dba_popen($fpath, 'r', $type);
  178. if (!$this->_dbh)
  179. trigger_error("??????? `$type` ? dba ???? `$fpath`", E_USER_ERROR);
  180. }
  181. function _unload()
  182. {
  183. if ($this->_dbh)
  184. {
  185. dba_close($this->_dbh);
  186. $this->_dbh = false;
  187. }
  188. }
  189. function _find($word)
  190. {
  191. if (!$this->_dbh)
  192. {
  193. trigger_error('?????????????', E_USER_WARNING);
  194. return -1;
  195. }
  196. return dba_fetch($word, $this->_dbh);
  197. }
  198. }
  199. // ?? sqlite, ??? sqlite ??
  200. // CREATE TABLE _wordlist (id INTEGER NOT NULL PRIMARY KEY, word CHAR(32), freq BIGINT);
  201. // CREATE UNIQUE INDEX _wordidx ON _wordlist (word);
  202. class sql_Dictionary
  203. {
  204. var $_dbh;
  205. function sql_Dictionary($fpath = '')
  206. {
  207. // ????
  208. if (!extension_loaded('sqlite'))
  209. trigger_error('?? PHP ???? `sqlite` ??, ????? PHP', E_USER_ERROR);
  210. $this->_dbh = false;
  211. if ('' != $fpath)
  212. $this->_load($fpath);
  213. }
  214. // for PHP5
  215. function __construct($fpath = '') { $this->sql_Dictionary($fpath); }
  216. function __destruct() { $this->_unload(); }
  217. function _load($fpath)
  218. {
  219. $this->_dbh = sqlite_popen($fpath);
  220. if (!$this->_dbh)
  221. trigger_error("???? sqlite ????? `$fpath`", E_USER_ERROR);
  222. }
  223. //function _unload($fpath)
  224. function _unload()
  225. {
  226. if ($this->_dbh)
  227. {
  228. sqlite_close($this->_dbh);
  229. $this->_dbh = false;
  230. }
  231. }
  232. function _find($word)
  233. {
  234. $word = sqlite_escape_string($word);
  235. $sql = "SELECT * FROM _wordlist WHERE word = '$word' LIMIT 1";
  236. $rs = sqlite_unbuffered_query($sql, $this->_dbh);
  237. if (!$rs)
  238. {
  239. $errno = sqlite_last_error($this->_dbh);
  240. trigger_error("SQLite: " . sqlite_error_string($errno) . "(#{$errno})", E_USER_WARNING);
  241. trigger_error("SQLite: " . $sql, E_USER_WARNING);
  242. return -1;
  243. }
  244. $ret = sqlite_fetch_array($rs, SQLITE_ASSOC);
  245. if (!$ret)
  246. return false;
  247. return $ret['freq'];
  248. }
  249. }
  250. // ?????, [word\tfreq\r\n]
  251. // ???? eAccelerator ?????
  252. define ('_EAKEY_DICT_', 'ea_dict');
  253. if (!defined('_WORD_ALONE_')) define ('_WORD_ALONE_', 0x4000000);
  254. if (!defined('_WORD_PART_')) define ('_WORD_PART_', 0x8000000);
  255. class txt_Dictionary
  256. {
  257. var $_wordlist;
  258. var $_fpath = 'dict/dict.txt';
  259. function txt_Dictionary($fpath = '')
  260. {
  261. if ('' != $fpath)
  262. $this->_load($fpath);
  263. }
  264. // for PHP5
  265. function __construct($fpath = '') { $this->txt_Dictionary($fpath); }
  266. function __destruct() { $this->_unload(); }
  267. function _load($fpath)
  268. {
  269. $this->_wordlist = false;
  270. if ('' == $fpath)
  271. $fpath = $this->fpath;
  272. // ??? ea ?????
  273. $has_ea = extension_loaded('eAccelerator');
  274. if ($has_ea)
  275. {
  276. $cache_time = eaccelerator_get(_EAKEY_DICT_ . '_time');
  277. if (!file_exists($fpath) || filemtime($fpath) < $cache_time)
  278. $this->_wordlist = eaccelerator_get(_EAKEY_DICT_);
  279. }
  280. // try to load the wordlist from txt file:
  281. if ($this->_wordlist)
  282. return;
  283. // ????
  284. $this->_wordlist = array();
  285. if ($fd = @fopen($fpath, 'r'))
  286. {
  287. $dict = &$this->_wordlist;
  288. while ($line = fgets($fd, 256))
  289. {
  290. $line = trim($line);
  291. list($word, $freq) = explode("\t", $line, 2);
  292. if (strlen($word) < 4)
  293. continue;
  294. $first = substr($word, 0, 2);
  295. if (!isset($dict[$first])) $dict[$first] = array();
  296. // ???
  297. $val = $dict[$first][$word];
  298. if (!$val || !($val & _WORD_ALONE_))
  299. {
  300. if (!$val) $val = 0;
  301. else $val &= _WORD_PART_;
  302. $val |= _WORD_ALONE_;
  303. $val += $freq;
  304. $dict[$first][$word] = $val;
  305. }
  306. // ????
  307. $len = strlen($word);
  308. while ($len > 4)
  309. {
  310. $len -= 2;
  311. $word = substr($word, 0, -2);
  312. if (!isset($dict[$first][$word]))
  313. $dict[$first][$word] = 0;
  314. $dict[$first][$word] |= _WORD_PART_;
  315. }
  316. }
  317. fclose($fd);
  318. // ????
  319. if ($has_ea)
  320. {
  321. eaccelerator_rm(_EAKEY_DICT_);
  322. eaccelerator_put(_EAKEY_DICT_, $dict);
  323. eaccelerator_put(_EAKEY_DICT_ . '_time', time());
  324. }
  325. }
  326. }
  327. function _unload($fpath)
  328. {
  329. unset($this->_wordlist);
  330. $this->_wordlist = false;
  331. }
  332. function _find($word)
  333. {
  334. if (!$this->_wordlist)
  335. {
  336. trigger_error('?????????????', E_USER_WARNING);
  337. return -1;
  338. }
  339. $first = substr($word, 0, 2);
  340. $value = $this->_wordlist[$first][$word];
  341. if (!isset($value)) $value = false;
  342. return $value;
  343. }
  344. }
  345. ?>