/python/engine/PinYin/ZhengJu.py

http://scim-python.googlecode.com/ · Python · 1268 lines · 1097 code · 83 blank · 88 comment · 403 complexity · 46afeedb53a388189acf7cb3c399f7df MD5 · raw file

  1. # -*- coding: utf-8 -*-
  2. # vim: set noet ts=4:
  3. #
  4. # scim-python
  5. #
  6. # Copyright (c) 2007-2008 Yu Fan <yufanyufan@gmail.com>
  7. #
  8. #
  9. # This library is free software; you can redistribute it and/or
  10. # modify it under the terms of the GNU Lesser General Public
  11. # License as published by the Free Software Foundation; either
  12. # version 2 of the License, or (at your option) any later version.
  13. #
  14. # This library is distributed in the hope that it will be useful,
  15. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. # GNU Lesser General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Lesser General Public
  20. # License along with this program; if not, write to the
  21. # Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  22. # Boston, MA 02111-1307 USA
  23. #
  24. # $Id: $
  25. #
  26. import scim
  27. import scim.Log
  28. import os
  29. from scim import KeyCode
  30. from scim import KeyMask
  31. from scim import Property
  32. import traceback
  33. from PYDict import *
  34. from gettext import dgettext
  35. from ZhengJuDB import *
  36. import scim.ascii as ascii
  37. from sets import Set
  38. import popen2
  39. _ = lambda a : dgettext ("scim-python", a)
  40. RGB = lambda r, g, b : (((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff))
  41. IMEngine = scim.IMEngine
  42. IMEngineFactory = scim.IMEngineFactory
  43. (YLEN, Y0, Y1, Y2, Y3, YX, PHRASE, ADJ_FREQ) = range (0, 8)
  44. candidate_sort = lambda x,y: cmp(y[YLEN],x[YLEN]) if x[YLEN] != y[YLEN] else cmp(y[ADJ_FREQ],x[ADJ_FREQ])
  45. class InputException(Exception):
  46. def __init__ (self):
  47. Exception.__init__(self)
  48. class PinYinWord:
  49. def __init__ (self, shengmu = "", yunmu = "", pinyin = ""):
  50. self.char = ""
  51. self._pinyin_id = None
  52. self.manual = None
  53. self.char = ""
  54. self.spliter = ""
  55. if pinyin:
  56. self.set_pinyin(pinyin)
  57. else:
  58. self.set_pinyin(shengmu + yunmu)
  59. def set_pinyin(self,pinyin):
  60. if pinyin[0] == "'":
  61. self.spliter = "'"
  62. pinyin = pinyin[1:]
  63. if pinyin[:2] in SHENGMU_LIST:
  64. self.shengmu = pinyin[:2]
  65. self.yunmu = pinyin[2:]
  66. elif pinyin[:1] in SHENGMU_LIST:
  67. self.shengmu = pinyin[:1]
  68. self.yunmu = pinyin[1:]
  69. else:
  70. self.shengmu = ""
  71. self.yunmu = pinyin
  72. if self.get_pinyin() in PINYIN_LIST:
  73. self._pinyin_id = PINYIN_DICT [self.get_pinyin()]
  74. self._sheng_mu_id = SHENGMU_DICT [self.get_shengmu()]
  75. else:
  76. self._sheng_mu_id = SHENGMU_DICT [self.get_shengmu()]
  77. def mohuyin(self):
  78. pinyin = ID_PINYIN_DICT[self.real_pinyin_id]
  79. if pinyin[:2] in SHENGMU_LIST:
  80. self.shengmu = pinyin[:2]
  81. yunmu = pinyin[2:]
  82. elif pinyin[:1] in SHENGMU_LIST:
  83. self.shengmu = pinyin[:1]
  84. yunmu = pinyin[1:]
  85. else:
  86. self.shengmu = ""
  87. yunmu = pinyin
  88. if self.yunmu != "":
  89. self.yunmu = yunmu
  90. self.set_pinyin(self.get_pinyin())
  91. def get_sheng_mu_id (self):
  92. return self._sheng_mu_id
  93. def get_pinyin_id (self):
  94. return self._pinyin_id
  95. def set_pinyin_id (self, id):
  96. self.set_pinyin(ID_PINYIN_DICT[id])
  97. def get_shengmu (self):
  98. return self.shengmu
  99. def set_yunmu( self,yunmu):
  100. self.yunmu = yunmu
  101. if(yunmu != ""):
  102. self._pinyin_id = PINYIN_DICT [ self.get_pinyin() ]
  103. else:
  104. self._pinyin_id = None
  105. def set_char (self,char):
  106. self.char = char
  107. def get_pinyin (self):
  108. return self.shengmu + self.yunmu
  109. def get_screen_pinyin (self):
  110. return self.spliter + self.shengmu + self.yunmu
  111. def __str__ (self):
  112. return self.get_pinyin()
  113. def is_complete (self):
  114. return self._pinyin_id != None
  115. class Editor:
  116. database = None
  117. def __init__ (self, config = None):
  118. if config == None:
  119. config = PseudoConfig()
  120. if Editor.database == None:
  121. Editor.database = ZhengJuDB(config)
  122. self.lookup_table = scim.LookupTable (9)
  123. self.lookup_table.fix_page_size(True)
  124. self.clear()
  125. self.config = config
  126. self.load_config(config)
  127. def clear(self):
  128. self.cursor = 0
  129. self.wordlist = []
  130. self.pinyinlist = []
  131. self.candidates = []
  132. self.predict = []
  133. self.lookup_table.clear()
  134. self.lookup_table.show_cursor(False)
  135. Editor.database.clear_cache()
  136. def load_config(self, config):
  137. Editor.database.load_config(config)
  138. self.userword = config.read ("/IMEngine/Python/ZhengJu/CreateUserWords", True)
  139. self.userphrase = config.read ("/IMEngine/Python/ZhengJu/CreateUserPhrases", True)
  140. self.adjustfreq = config.read ("/IMEngine/Python/ZhengJu/AdjustWordFreq", True)
  141. self.logconverror = config.read ("/IMEngine/Python/ZhengJu/LogConvError", True)
  142. self.splitpinyin = config.read ("/IMEngine/Python/ZhengJu/SplitPinyin", True)
  143. self.enable_mohuyin = config.read ("/IMEngine/Python/ZhengJu/FuzzyPinyin", False)
  144. self.mohuyin_s_sh = config.read ("/IMEngine/Python/ZhengJu/FuzzyS_Sh", True)
  145. self.mohuyin_c_ch = config.read ("/IMEngine/Python/ZhengJu/FuzzyC_Ch", True)
  146. self.mohuyin_z_zh = config.read ("/IMEngine/Python/ZhengJu/FuzzyZ_Zh", True)
  147. self.mohuyin_l_n = config.read ("/IMEngine/Python/ZhengJu/FuzzyL_N", True)
  148. self.mohuyin_in_ing = config.read ("/IMEngine/Python/ZhengJu/FuzzyIn_Ing", True)
  149. self.mohuyin_en_eng = config.read ("/IMEngine/Python/ZhengJu/FuzzyEn_Eng", True)
  150. self.mohuyin_an_ang = config.read ("/IMEngine/Python/ZhengJu/FuzzyAn_Ang", True)
  151. self.build_mohuyin()
  152. def build_mohuyin(self):
  153. self.shengmu_mohu = {}
  154. if self.mohuyin_s_sh:
  155. self.shengmu_mohu["s"]= MOHU_SHENGMU["s"]
  156. self.shengmu_mohu["sh"]= MOHU_SHENGMU["sh"]
  157. if self.mohuyin_z_zh:
  158. self.shengmu_mohu["z"]= MOHU_SHENGMU["z"]
  159. self.shengmu_mohu["zh"]= MOHU_SHENGMU["zh"]
  160. if self.mohuyin_c_ch:
  161. self.shengmu_mohu["c"]= MOHU_SHENGMU["c"]
  162. self.shengmu_mohu["ch"]= MOHU_SHENGMU["ch"]
  163. if self.mohuyin_l_n:
  164. self.shengmu_mohu["l"]= MOHU_SHENGMU["l"]
  165. self.shengmu_mohu["n"]= MOHU_SHENGMU["n"]
  166. self.yunmu_mohu = {}
  167. if self.mohuyin_an_ang:
  168. self.yunmu_mohu["an"] = MOHU_YUNMU["an"]
  169. self.yunmu_mohu["ang"] = MOHU_YUNMU["ang"]
  170. if self.mohuyin_en_eng:
  171. self.yunmu_mohu["en"] = MOHU_YUNMU["en"]
  172. self.yunmu_mohu["eng"] = MOHU_YUNMU["eng"]
  173. if self.mohuyin_in_ing:
  174. self.yunmu_mohu["in"] = MOHU_YUNMU["in"]
  175. self.yunmu_mohu["in"] = MOHU_YUNMU["ing"]
  176. def current (self):
  177. if self.pinyinlist:
  178. return self.pinyinlist[-1]
  179. else:
  180. return None
  181. def is_empty (self):
  182. return (not self.pinyinlist) and (not self.wordlist)
  183. def is_end (self):
  184. return self.is_empty() or (not self.pinyinlist) and self.cursor == len (self.wordlist)
  185. def get_aux (self):
  186. return "".join ( u[PHRASE] for u in self.predict)
  187. def get_screen_pinyin(self):
  188. if self.splitpinyin:
  189. s = ""
  190. if self.pinyinlist:
  191. for i in range(len(self.pinyinlist)-1):
  192. p = self.pinyinlist[i].get_screen_pinyin() + self.pinyinlist[i+1].get_screen_pinyin()[0]
  193. if p in PINYIN_LIST or p in PINYIN_PARTIAL_LIST:
  194. s += self.pinyinlist[i].get_screen_pinyin() + "'"
  195. else:
  196. s += self.pinyinlist[i].get_screen_pinyin()
  197. s += self.pinyinlist[-1].get_screen_pinyin()
  198. return s
  199. else:
  200. return u"".join( i.get_screen_pinyin() for i in self.pinyinlist)
  201. def get_preedit (self):
  202. return u"".join( [i.char for i in self.wordlist[0:self.cursor] ] ) +\
  203. self.get_screen_pinyin() + \
  204. u"".join ( [i.char for i in self.wordlist[self.cursor:]] )
  205. def get_screen_cursor (self):
  206. if len(self.get_screen_pinyin())>0:
  207. return self.cursor + len(self.get_screen_pinyin())
  208. else:
  209. return self.cursor
  210. def pinyin_select (self, candidate, manual = False):
  211. phrase = candidate[PHRASE]
  212. length = len(phrase)
  213. for i in range(0,length):
  214. self.pinyinlist[i].set_char(phrase[i])
  215. if i<4:
  216. #~ self.pinyinlist[i].set_pinyin_id (candidate[i+1])
  217. self.pinyinlist[i].real_pinyin_id = candidate[i+1]
  218. if self.enable_mohuyin:
  219. self.pinyinlist[i].mohuyin()
  220. else:
  221. py = candidate[YX].split("'")
  222. self.pinyinlist[i].real_pinyin_id = PINYIN_DICT[py[i-4]]
  223. if self.enable_mohuyin:
  224. self.pinyinlist[i].mohuyin()
  225. #~ self.pinyinlist[i].set_pinyin ([py[i-4]])
  226. self.pinyinlist[i].manual = manual
  227. self.wordlist[self.cursor:self.cursor] = self.pinyinlist[:length]
  228. del self.pinyinlist[:length]
  229. self.cursor += length
  230. if manual:
  231. self.update()
  232. def reparse_backtrace(self):
  233. if self.cursor < len(self.wordlist):
  234. i = self.cursor
  235. while i >= 0:
  236. if self.wordlist[i].manual:
  237. break
  238. i-=1
  239. i += 1
  240. self.reparse(i)
  241. def convert_all (self):
  242. predicts = self.predict
  243. for i in predicts:
  244. self.pinyin_select(i)
  245. self.reparse_backtrace()
  246. self.update ()
  247. def jump_to_next_word(self):
  248. string = self.get_preedit ()
  249. phrase_list = self.split_phrase (string)
  250. p = 0;
  251. for i in phrase_list:
  252. if p <= self.cursor:
  253. p += i[1]
  254. else:
  255. break
  256. self.cursor = p
  257. self.update ()
  258. #~ predict = self.get_predict (self.wordlist[self.cursor:])
  259. #~ self.cursor += predict[0][YLEN]
  260. #~ self.update ()
  261. def predict_len(self, predicts):
  262. return sum (u[YLEN] for u in predicts)
  263. def auto_convert (self):
  264. self.update_predict()
  265. while self.predict_len(self.predict[:2]) < len (self.pinyinlist):
  266. self.pinyin_select(self.predict[0])
  267. self.update_predict()
  268. self.update_candidates()
  269. def auto_convert_quanpin(self):
  270. #~ self.update_predict()
  271. p = self.pinyinlist[-1].get_pinyin()
  272. if p not in SHENGMU_LIST and p not in PINYIN_PARTIAL_LIST:
  273. for t in PINYIN_LIST:
  274. if p != t and t[:len(p)] == p :
  275. while self.predict_len(self.predict[:2]) + 1 < len (self.pinyinlist):
  276. self.pinyin_select(self.predict[0])
  277. self.update_predict()
  278. self.update_candidates()
  279. return
  280. while self.predict_len(self.predict[:2]) < len (self.pinyinlist):
  281. self.pinyin_select(self.predict[0])
  282. self.update_predict()
  283. self.update_candidates()
  284. def update (self):
  285. self.candidate_cursor = None
  286. self.update_predict()
  287. self.update_candidates()
  288. def update_predict (self):
  289. if self.pinyinlist:
  290. self.predict = self.get_predict_pinyinlist (self.pinyinlist)
  291. else:
  292. self.predict = []
  293. def reverse(self, phrase):
  294. self.clear()
  295. while phrase:
  296. for i in range(len(phrase),0,-1):
  297. #~ print len(phrase),phrase[:i], i
  298. temp = self.database.select_phrase(phrase[:i])
  299. if temp:
  300. result = temp[0]
  301. break
  302. else:
  303. raise InputException()
  304. for i in range(result[YLEN]):
  305. pinyin = PinYinWord("'","")
  306. pinyin.set_char(phrase[i])
  307. if i < 4:
  308. pinyin.set_pinyin_id (result[i+1])
  309. else:
  310. #print candidate[YX]
  311. py = result[YX].split("'")
  312. #print py[i-5]
  313. pinyin.set_pinyin_id (PINYIN_DICT[py[i-4]])
  314. #print self.pinyinlist[i]._pinyin_id
  315. pinyin.real_pinyin_id=pinyin._pinyin_id
  316. self.wordlist.append(pinyin)
  317. phrase = phrase[result[YLEN]:]
  318. def split_phrase (self, string):
  319. start = 0
  320. phrase_list = []
  321. while start < len(self.wordlist)-1:
  322. candidate = None
  323. if len(self.wordlist) - start >= 3:
  324. phrase = Editor.database.select_words_by_pinyin_list_all (self.wordlist[start:start + 3])
  325. #~ print len(phrase)
  326. for i in phrase:
  327. if i[PHRASE] == string[start:start + len(i[PHRASE]) ]:
  328. if not candidate or candidate[PHRASE] < i[PHRASE]:
  329. candidate = i
  330. if candidate == None:
  331. phrase = Editor.database.select_words_by_phrase (self.wordlist[start:start+2])
  332. if phrase:
  333. candidate = phrase[0]
  334. else:
  335. phrase = Editor.database.select_words_by_phrase (self.wordlist[start:start+2])
  336. if phrase:
  337. candidate = phrase[0]
  338. if candidate == None:
  339. phrase_list.append ( (start, 1, string[start]))
  340. start += 1
  341. else:
  342. phrase_list.append ( (start, len(candidate[PHRASE]), candidate[PHRASE]))
  343. start += len(candidate[PHRASE])
  344. if start < len (self.wordlist):
  345. phrase_list.append ((start,1,string[-1]))
  346. return phrase_list
  347. def split_phrasev2 (self, string):
  348. start = 0
  349. phrase_list = []
  350. while start < len(self.wordlist)-1:
  351. phrase = Editor.database.select_words_by_pinyin_list_all (self.wordlist[start:start+2])
  352. candidate = None
  353. for i in phrase:
  354. if i[PHRASE] == string[start:start + len(i[PHRASE]) ]:
  355. if not candidate or candidate[PHRASE] < i[PHRASE]:
  356. candidate = i
  357. if candidate == None:
  358. phrase_list.append ( (start, 1, string[start]))
  359. start += 1
  360. else:
  361. phrase_list.append ( (start, len(candidate[PHRASE]), candidate[PHRASE]))
  362. start += len(candidate[PHRASE])
  363. if start < len (self.wordlist):
  364. phrase_list.append ((start,1,string[-1]))
  365. return phrase_list
  366. def learn_user_words(self, phrase_list, string, sentence):
  367. if not self.userword:
  368. return
  369. start = 0
  370. while start < len (phrase_list):
  371. tmp_phrase_start = phrase_list[start][0]
  372. tmp_phrase = ""
  373. while start < len (phrase_list) and phrase_list[start][1] == 1 \
  374. and string[phrase_list[start][0]] != sentence[phrase_list[start][0]]:
  375. tmp_phrase += phrase_list[start][2]
  376. del phrase_list[start]
  377. if tmp_phrase:
  378. phrase_list.insert (start, (tmp_phrase_start, len(tmp_phrase), tmp_phrase) )
  379. if len (tmp_phrase) > 1:
  380. Editor.database.add_phrase (self.wordlist[tmp_phrase_start:tmp_phrase_start + len(tmp_phrase)], USER_WORD)
  381. self.log_conv_error( sentence, string, phrase_list, tmp_phrase_start, tmp_phrase_start, 0)
  382. string = string[:tmp_phrase_start] + sentence[tmp_phrase_start:tmp_phrase_start + len(tmp_phrase)] + string[tmp_phrase_start + len(tmp_phrase):]
  383. start += 1
  384. return string
  385. def split_predict (self):
  386. predict = []
  387. start = 0
  388. while start < len (self.wordlist):
  389. p = self.get_predict (self.wordlist[start:])
  390. predict.append ( (start,len(p[0][PHRASE]), p[0][PHRASE]) )
  391. start += len (p[0][PHRASE])
  392. return predict
  393. def addphrase (self, phrase_list, pstart, pend, freq):
  394. if pstart < 0:
  395. return
  396. if pend >= len (phrase_list):
  397. return
  398. Editor.database.add_phrase(\
  399. self.wordlist[phrase_list[pstart][0]:(phrase_list[pend][0]+phrase_list[pend][1])], freq)
  400. def adjust_all_freq (self, phrase_list):
  401. if not self.userphrase:
  402. return
  403. p = [ self.wordlist[i[0]:i[0]+i[1]] for i in phrase_list]
  404. for i in p:
  405. Editor.database.adjust_phrase_freq (i)
  406. def adjust_freq (self, phrase_list, phrase_begin):
  407. if not self.adjustfreq:
  408. return
  409. i = phrase_list[phrase_begin]
  410. p = self.wordlist[i[0]:i[0]+i[1]]
  411. Editor.database.adjust_phrase_freq (p)
  412. def delete_phrase(self, n):
  413. if n >= self.lookup_table.get_current_page_size():
  414. raise InputException()
  415. candidate = self.candidates[self.lookup_table.get_current_page_start() + n]
  416. if candidate[ADJ_FREQ] == 0 or \
  417. candidate[ADJ_FREQ]%USER_PHRASE and candidate[ADJ_FREQ]%USER_WORD:
  418. raise InputException()
  419. Editor.database.remove_phrase (candidate)
  420. self.update ()
  421. def delete_cursor_phrase(self):
  422. candidate = self.candidates[self.lookup_table.get_cursor_pos() ]
  423. if candidate[ADJ_FREQ] == 0 or \
  424. candidate[ADJ_FREQ]%USER_PHRASE and candidate[ADJ_FREQ]%USER_WORD:
  425. self.candidate_cursor = None
  426. raise InputException()
  427. Editor.database.remove_phrase (candidate)
  428. self.update ()
  429. def log_conv_error(self, predict, sentence, phrase_list, pstart, pend, type):
  430. if pstart < 0:
  431. return
  432. if pend >= len (phrase_list):
  433. return
  434. if self.logconverror:
  435. begin = phrase_list[pstart][0]
  436. end = phrase_list[pend][0]+phrase_list[pend][1]
  437. p = open(os.path.expanduser ("~/.scim/zhengju-conv-error.log"),'a')
  438. print >> p, sentence[begin:end].encode ("utf-8"), predict[begin:end].encode ("utf-8"), type
  439. p.close ()
  440. def learn (self):
  441. if not self.userword and not self.userphrase and not self.adjustfreq and not self.logconverror:
  442. return
  443. predict = self.split_predict ()
  444. sentence = u"".join ([ i[2] for i in predict])
  445. for i in self.wordlist:
  446. i._pinyin_id = i.real_pinyin_id
  447. string = self.get_preedit ()
  448. phrase_list = self.split_phrase (string)
  449. string = self.learn_user_words(phrase_list, string, sentence)
  450. #~ print "out"
  451. #~ for i in phrase_list:
  452. #~ print i[1],i[2]
  453. #~ for i in predict:
  454. #~ print i[1],i[2]
  455. if not self.userphrase and not self.adjustfreq:
  456. return
  457. cur_phrase = 0
  458. cur_predict = 0
  459. phrase_begin = 0
  460. predict_begin = 0
  461. while cur_phrase < len(phrase_list):
  462. while predict[cur_predict][0]+ predict[cur_predict][1] < phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]:
  463. cur_predict += 1
  464. if predict[cur_predict][0]+ predict[cur_predict][1] > phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]:
  465. cur_phrase += 1
  466. else:
  467. #~ print string[phrase_list[phrase_begin][0]:phrase_list[cur_phrase][0]+phrase_list[cur_phrase][1]]
  468. #~ print sentence[predict[predict_begin][0]:predict[cur_predict][0]+predict[cur_predict][1]]
  469. if string[phrase_list[phrase_begin][0]:phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]]!=\
  470. sentence[predict[predict_begin][0]:predict[cur_predict][0] + predict[cur_predict][1]]:
  471. if cur_phrase - phrase_begin == 0:
  472. if cur_predict - predict_begin == 0:
  473. self.addphrase(phrase_list, phrase_begin - 1, cur_phrase, USER_PHRASE)
  474. self.log_conv_error(sentence, string, phrase_list, phrase_begin - 1, cur_phrase, 1)
  475. self.addphrase(phrase_list, phrase_begin, cur_phrase + 1, USER_PHRASE)
  476. self.log_conv_error(sentence, string, phrase_list, phrase_begin, cur_phrase + 1, 1)
  477. self.adjust_freq (phrase_list, phrase_begin)
  478. else:
  479. self.addphrase (phrase_list, phrase_begin, cur_phrase, USER_PHRASE)
  480. self.log_conv_error(sentence, string, phrase_list, phrase_begin, cur_phrase, 2)
  481. phrase_begin = cur_phrase + 1
  482. predict_begin = cur_predict + 1
  483. cur_phrase += 1
  484. Editor.database.clean_useless_phrase()
  485. def freq_alg(self, phrase1, phrase2):
  486. freq = 0
  487. if len(phrase1[PHRASE]) == 1:
  488. freq += phrase1[ADJ_FREQ] * 10
  489. elif len(phrase1[PHRASE]) < 4:
  490. freq += phrase1[ADJ_FREQ] * len(phrase1[PHRASE])
  491. else:
  492. freq += phrase1[ADJ_FREQ] * pow( len(phrase1[PHRASE]) , 2)
  493. if len(phrase2[PHRASE]) == 1:
  494. freq += phrase2[ADJ_FREQ] * 10
  495. elif len(phrase2[PHRASE]) < 4:
  496. freq += phrase2[ADJ_FREQ] * len(phrase2[PHRASE])
  497. else:
  498. freq += phrase2[ADJ_FREQ] * pow( len(phrase2[PHRASE]) , 2)
  499. return freq
  500. return phrase1[ADJ_FREQ] + phrase2[ADJ_FREQ]
  501. return phrase1[ADJ_FREQ] * len(phrase1[PHRASE]) + phrase2[ADJ_FREQ] * len(phrase2[PHRASE])
  502. return phrase1[ADJ_FREQ] * pow( len(phrase1[PHRASE]) , 2.5) + phrase2[ADJ_FREQ] * pow( len(phrase2[PHRASE]) , 2.5)
  503. return pow (phrase1[ADJ_FREQ], len(phrase1[PHRASE]) / 5.) + pow( phrase2[ADJ_FREQ], len(phrase2[PHRASE]) /5)
  504. def get_predict_pinyinlist (self, pinyinlist):
  505. #~ print "Dd", u" ".join( i.get_screen_pinyin() for i in pinyinlist), len(pinyinlist)
  506. candidates = Editor.database.select_words_by_pinyin_list (pinyinlist)
  507. if candidates:
  508. #~ print "phrase1",candidates[0][PHRASE],candidates[0][ADJ_FREQ]
  509. return [candidates[0]]
  510. else:
  511. candidates = Editor.database.select_words_by_pinyin_list_all(pinyinlist)
  512. if candidates:
  513. #~ print candidates[0][PHRASE]
  514. p = list (candidates[0]);
  515. p[YLEN] = len (pinyinlist)
  516. p[PHRASE] = p[PHRASE][:p[YLEN]]
  517. return [p]
  518. max_freq = 0
  519. predict = []
  520. for length in range(len (pinyinlist), 1, -1):
  521. for i in range (1, length):
  522. candidates = Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
  523. if not candidates:
  524. continue
  525. candidates2 = Editor.database.select_words_by_pinyin_list(pinyinlist[i:length])
  526. if not candidates2:
  527. candidates2 = Editor.database.select_words_by_pinyin_list_all(pinyinlist[i:length])
  528. if candidates2:
  529. p = list (candidates2[0]);
  530. p[YLEN] = length - i
  531. p[PHRASE] = p[PHRASE][:p[YLEN]]
  532. tmp_phrase = candidates[0]
  533. tmp_phrase2 = p
  534. else:
  535. continue
  536. else:
  537. tmp_phrase = candidates[0]
  538. tmp_phrase2 = candidates2[0]
  539. new_freq = self.freq_alg(tmp_phrase, tmp_phrase2)
  540. #~ print tmp_phrase[PHRASE].encode ("utf-8"),tmp_phrase2[PHRASE].encode ("utf-8"), tmp_phrase[ADJ_FREQ],tmp_phrase2[ADJ_FREQ], new_freq
  541. #~ if tmp_phrase[ADJ_FREQ] + tmp_phrase2[ADJ_FREQ] >= max_freq:
  542. if new_freq >= max_freq:
  543. predict = [tmp_phrase, tmp_phrase2]
  544. max_freq = new_freq
  545. if predict:
  546. break
  547. if self.predict_len(predict) < len (pinyinlist):
  548. #~ return
  549. #~ for i in range(1, predict[0][YLEN]):
  550. #~ candidates = Editor.database.select_words_by_pinyin_list(pinyinlist[:i])
  551. #~ if candidates and candidates[0][PHRASE] == predict[0][PHRASE][:i]:
  552. #~ print "try", i, candidates[0][PHRASE]
  553. #~ temp = self.get_predict_pinyinlist(pinyinlist[i:self.predict_len(predict)])
  554. #~ print "resule", temp[0][PHRASE] + temp[1][PHRASE]
  555. #~ print "match", predict[0][PHRASE][i:] + predict[1][PHRASE]
  556. #~ if predict[0][PHRASE][i:] + predict[1][PHRASE] == (temp[0][PHRASE] + temp[1][PHRASE]):
  557. #~ print "go", candidates[0][PHRASE]
  558. #~ return [candidates[0]] + temp
  559. return predict + self.get_predict_pinyinlist(pinyinlist[self.predict_len(predict):])
  560. else:
  561. return predict
  562. def get_predict (self, pinyinlist):
  563. if not pinyinlist:
  564. return []
  565. candidates = Editor.database.select_words_by_pinyin_list(pinyinlist)
  566. if candidates:
  567. #~ print "phrase1",candidates[0][PHRASE],candidates[0][ADJ_FREQ]
  568. return [candidates[0]]
  569. else:
  570. candidates = Editor.database.select_words_by_pinyin_list_all(pinyinlist)
  571. if candidates:
  572. p = list (candidates[0]);
  573. p[YLEN] = len (pinyinlist)
  574. p[PHRASE] = p[PHRASE][:p[YLEN]]
  575. return [p]
  576. max_freq = 0
  577. max_length =0
  578. #~ print "try words"
  579. #~ if longest==1:
  580. #~ return [Editor.database.select_words_by_pinyin_list(pinyinlist[:1])[0][PHRASE]]
  581. #~ print longest
  582. for i in range (1, len(pinyinlist)):
  583. candidates = Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
  584. if not candidates:
  585. continue
  586. tmp_phrase = candidates[0]
  587. tmp_freq = tmp_phrase[ADJ_FREQ]
  588. longest2 = Editor.database.get_longest_phrase_length (pinyinlist[i:])
  589. #~ print "phrase1",tmp_phrase[PHRASE]
  590. #~ print i,longest2
  591. for p in range(i + longest2,i-1,-1):
  592. if p < max_length:
  593. continue
  594. candidates2 = Editor.database.select_words_by_pinyin_list(pinyinlist[i:p+1])
  595. #~ print len(candidates2)
  596. if candidates2:
  597. tmp_phrase2 = candidates2[0]
  598. #~ print "phrase2",tmp_phrase2[PHRASE]
  599. tmp_freq2 = tmp_phrase2[ADJ_FREQ]
  600. #~ print tmp_phrase, " ", candidates2[0][PYSQLiteDB.PHRASE]
  601. new_freq = self.freq_alg(tmp_phrase, tmp_phrase2)
  602. if p > max_length or \
  603. (new_freq >= max_freq and p == max_length):
  604. predict = [tmp_phrase, tmp_phrase2]
  605. #~ print tmp_phrase[PHRASE],tmp_phrase2[PHRASE], tmp_phrase[ADJ_FREQ],tmp_phrase2[ADJ_FREQ]
  606. max_freq = new_freq
  607. max_length = p
  608. #~ print "get_predict" + predict[0], max_length
  609. return predict
  610. def reparse (self, start):
  611. #~ print "reparse"
  612. if start == len (self.wordlist):
  613. return
  614. predict = self.get_predict (self.wordlist[start:])
  615. phrase = predict[0][PHRASE]
  616. length = len (phrase)
  617. #~ if len(phrase)<len(self.wordlist)-start \
  618. #~ else len(self.wordlist)-start
  619. #~ print string
  620. for i in range(0, length):
  621. if self.wordlist[start+i].manual:
  622. return
  623. self.wordlist[start+i].set_char(phrase[i])
  624. self.reparse (start+length)
  625. def wordlist_manual_select (self, candidate):
  626. phrase = candidate[PHRASE]
  627. for i in range (0, len (phrase) ):
  628. if i < 4:
  629. self.wordlist[ self.cursor + i ].real_pinyin_id = candidate[ i + 1 ]
  630. if self.enable_mohuyin:
  631. self.wordlist[ self.cursor + i ].mohuyin()
  632. else:
  633. py = candidate[YX].split("'")
  634. self.wordlist[ self.cursor + i ].real_pinyin_id = PINYIN_DICT[ py[ i - 4 ] ]
  635. if self.enable_mohuyin:
  636. self.wordlist[ self.cursor + i ].mohuyin()
  637. self.wordlist[ self.cursor + i ].set_char( phrase[i] )
  638. self.wordlist[ self.cursor + i ].manual = True
  639. self.cursor += len (phrase)
  640. if self.cursor < len (self.wordlist):
  641. self.reparse (self.cursor);
  642. self.update ()
  643. def commit (self):
  644. if self.pinyinlist:
  645. self.convert_all ()
  646. string = self.get_preedit ()
  647. self.learn ()
  648. self.clear ()
  649. return string
  650. def del_current (self):
  651. if self.pinyinlist:
  652. raise InputException ()
  653. if self.cursor > 0:
  654. del self.wordlist[self.cursor-1]
  655. self.cursor -= 1
  656. if len (self.wordlist) == 0:
  657. self.clear ()
  658. self.reparse_backtrace ();
  659. self.update()
  660. elif self.wordlist and self.cursor == 0:
  661. raise InputException()
  662. def del_next (self):
  663. if self.pinyinlist or self.cursor == len (self.wordlist):
  664. raise InputException ()
  665. else:
  666. del self.wordlist[self.cursor]
  667. if len (self.wordlist)==0:
  668. self.clear ()
  669. self.reparse_backtrace ();
  670. self.update()
  671. def move_cursor (self, move):
  672. if self.is_empty():
  673. raise InputException()
  674. if self.pinyinlist and (move<0 or self.candidate_cursor == None):
  675. raise InputException()
  676. if move > 0 and self.candidate_cursor != None:
  677. self.select_cursor()
  678. else:
  679. self.cursor += move
  680. if self.cursor < 0:
  681. self.cursor += len (self.wordlist) + 1
  682. elif self.cursor > len (self.wordlist):
  683. self.cursor = 0
  684. self.update ()
  685. def move_cursor_to (self, pos):
  686. if self.is_empty ():
  687. raise InputException ()
  688. if self.pinyinlist:
  689. self.convert_all ()
  690. if pos == 0:
  691. self.cursor = len(self.wordlist)
  692. elif pos > len(self.wordlist) + 1:
  693. raise InputException ()
  694. else:
  695. self.cursor = pos - 1
  696. self.update ()
  697. def select (self, n):
  698. #~ print self.lookup_table.get_current_page_size()
  699. if n >= self.lookup_table.get_current_page_size():
  700. raise InputException()
  701. candidate = self.candidates[self.lookup_table.get_current_page_start()+n]
  702. if self.pinyinlist:
  703. self.pinyin_select(candidate, True)
  704. else:
  705. self.wordlist_manual_select(candidate)
  706. def select_cursor (self):
  707. candidate = self.candidates[self.lookup_table.get_cursor_pos()]
  708. #~ print candidate[PHRASE]
  709. if self.pinyinlist:
  710. self.pinyin_select(candidate, True)
  711. else:
  712. self.wordlist_manual_select(candidate)
  713. def recursive_mohuyin_pinyinlist(self, pinyinlist):
  714. for i in self.mohuyin(pinyinlist[0].get_screen_pinyin()):
  715. if pinyinlist[1:]:
  716. for p in self.recursive_mohuyin_pinyinlist(pinyinlist[1:]):
  717. yield [PinYinWord(pinyin = i)] + p
  718. else:
  719. yield [PinYinWord(pinyin = i)]
  720. def recursive_mohuyin(self, strl):
  721. for i in self.mohuyin(strl[0]):
  722. if strl[1:]:
  723. for p in self.recursive_mohu(strl[1:]):
  724. yield [i] + p
  725. else:
  726. yield [i]
  727. def mohuyin(self, pinyin):
  728. #~ print pinyin
  729. if pinyin[0] == "'":
  730. spliter = "'"
  731. pinyin = pinyin[1:]
  732. else:
  733. spliter = ""
  734. if pinyin[:2] in SHENGMU_LIST:
  735. shengmu = pinyin[:2]
  736. yunmu = pinyin[2:]
  737. elif pinyin[:1] in SHENGMU_LIST:
  738. shengmu = pinyin[:1]
  739. yunmu = pinyin[1:]
  740. else:
  741. shengmu = ""
  742. yunmu = pinyin
  743. if shengmu in self.shengmu_mohu:
  744. shengmu = self.shengmu_mohu[shengmu]
  745. else:
  746. shengmu = [shengmu]
  747. if yunmu in self.yunmu_mohu:
  748. yunmu = self.yunmu_mohu[yunmu]
  749. else:
  750. yunmu = [yunmu]
  751. if pinyin in PINYIN_PARTIAL_LIST:
  752. for q in yunmu:
  753. if i + q in SHENGMU_LIST or i + q in PINYIN_LIST or i + q in PINYIN_PARTIAL_LIST:
  754. yield spliter + i + q
  755. else:
  756. for i in shengmu:
  757. for q in yunmu:
  758. if i + q in SHENGMU_LIST or i + q in PINYIN_LIST:
  759. yield spliter + i + q
  760. def parsr_mohuyin(self, pinyinlist):
  761. candidates = []
  762. if self.enable_mohuyin:
  763. ss = Set()
  764. for p in self.recursive_mohuyin_pinyinlist(pinyinlist):
  765. #~ print u" ".join( i.get_pinyin() for i in p)
  766. for i in range (len (p), 0, -1):
  767. ss.update(Editor.database.select_words_by_pinyin_list (p[:i]))
  768. candidates = list(ss)
  769. candidates.sort(cmp = candidate_sort)
  770. else:
  771. for i in range (len (pinyinlist), 0, -1):
  772. candidates += Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
  773. return candidates
  774. def update_candidates (self):
  775. if self.is_empty():
  776. self.candidates = []
  777. elif self.pinyinlist:
  778. self.candidates = self.parsr_mohuyin(self.pinyinlist)
  779. elif len(self.wordlist)>self.cursor:
  780. self.candidates = self.parsr_mohuyin(self.wordlist[self.cursor:])
  781. else:
  782. self.candidates = []
  783. self.update_lookup_table()
  784. def update_lookup_table (self):
  785. self.lookup_table.clear()
  786. self.lookup_table.show_cursor(False)
  787. for c in self.candidates:
  788. if c[ADJ_FREQ] == 0 or c[ADJ_FREQ]%USER_PHRASE and c[ADJ_FREQ]%USER_WORD:
  789. self.lookup_table.append_candidate (c[PHRASE])
  790. else:
  791. attrs = [scim.Attribute (0, len(c[PHRASE]), scim.ATTR_FOREGROUND, RGB (0, 0, 0xef))]
  792. self.lookup_table.append_candidate (c[PHRASE], attrs)
  793. class Engine (IMEngine):
  794. def __init__ (self, factory, config, encoding, id):
  795. IMEngine.__init__ (self, factory, config, encoding, id)
  796. self._editor = Editor ()
  797. self._lookup_table = scim.LookupTable (9)
  798. self._status_property = Property ("chinese", "CN")
  799. self._setup_property = Property ("setup", "", "/usr/share/scim/icons/setup.png")
  800. self._chinese_mode = True
  801. self.reload_config(config)
  802. self._log = scim.Log.Log ("ZhengJu")
  803. #~ print "init"
  804. def clear(self):
  805. pass
  806. def reset(self):
  807. #~ print "reset"
  808. if self._editor.wordlist:
  809. self.commit_string (self._editor.commit())
  810. else:
  811. self._editor.clear()
  812. self.clear()
  813. self.origin_string = None
  814. self._double_quotation_state = False
  815. self._single_quotation_state = False
  816. self._prev_key = None
  817. self._shift_key = None
  818. self.pipe = None
  819. self.update ()
  820. props = [self._status_property, self._setup_property]
  821. self.register_properties (props)
  822. self.update_properties ()
  823. def update_preedit (self):
  824. string = self._editor.get_preedit () + self.get_extra_string()
  825. if (string == u""):
  826. self.hide_preedit_string ()
  827. else:
  828. self.show_preedit_string ()
  829. self.update_preedit_string (string , [])
  830. self.update_preedit_caret (self._editor.get_screen_cursor())
  831. def focus_out(self):
  832. #~ print "out reset"
  833. self.reset()
  834. IMEngine.focus_out (self)
  835. def focus_in (self):
  836. #~ print "in reset"
  837. self.reset()
  838. IMEngine.focus_in (self)
  839. def trigger_property (self, property):
  840. if property == "chinese":
  841. self.change_mode ()
  842. elif property == "setup":
  843. self.start_helper ("61af6de6-c29d-421e-9e1b-e34a29c68c76")
  844. def update_candidate (self):
  845. if self._editor.candidates:
  846. self.update_lookup_table(self._editor.lookup_table)
  847. self.show_lookup_table()
  848. else:
  849. self.hide_lookup_table ()
  850. def update_aux(self):
  851. if self._editor.predict:
  852. self.show_aux_string ()
  853. string = self._editor.get_aux ()
  854. attrs = [scim.Attribute (0, len (string), scim.ATTR_FOREGROUND, RGB (0, 0, 0xef))]
  855. self.update_aux_string (string, attrs)
  856. else:
  857. self.hide_aux_string ()
  858. self.update_aux_string (u"")
  859. def update (self):
  860. self.update_preedit ()
  861. self.update_aux ()
  862. self.update_candidate ()
  863. def update_properties (self):
  864. if self._chinese_mode: # refresh mode
  865. self._status_property.label = _("CN")
  866. else:
  867. self._status_property.label = _("EN")
  868. self.update_property(self._status_property)
  869. def change_mode(self):
  870. if self._chinese_mode:
  871. self.commit_string (self._editor.commit())
  872. self.update()
  873. self._chinese_mode = not self._chinese_mode
  874. self.update_properties ()
  875. #~ print "change_mode", self._chinese_mode
  876. def reload_config (self, config):
  877. self._editor.load_config(config)
  878. self.progresivepromp = config.read ("/IMEngine/Python/ZhengJu/ProgressivePrompt", False)
  879. def lookup_table_page_down (self):
  880. self._editor.lookup_table.page_down ();
  881. self.update()
  882. return True
  883. def lookup_table_page_up (self):
  884. self._editor.lookup_table.page_up ();
  885. self.update()
  886. return True
  887. def process_key_event (self, key):
  888. #~ print key.code
  889. if self._chinese_mode and self._editor.is_end() and not self.get_extra_string():
  890. if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R) \
  891. and key.mask & KeyMask.ReleaseMask:
  892. if self._shift_key:
  893. self.change_mode()
  894. return True
  895. if key.mask == KeyMask.ShiftMask and (key.code >= KeyCode.KEY_A and key.code <= KeyCode.KEY_Z) \
  896. or key.mask & KeyMask.CapsLockMask:
  897. self.change_mode()
  898. elif not self.get_extra_string():
  899. if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R)\
  900. and key.mask & KeyMask.ReleaseMask:
  901. if self._shift_key:
  902. self.change_mode()
  903. return True
  904. if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R) \
  905. and key.mask == KeyMask.NullMask:
  906. self._shift_key = True;
  907. else:
  908. self._shift_key = None;
  909. if self.pipe and self.pipe.poll() != -1:
  910. try:
  911. self.origin_string = unicode(self.pipe.fromchild.read()[:-1],"utf8")
  912. self._editor.reverse(self.origin_string)
  913. self._editor.move_cursor_to (1)
  914. except:
  915. self._editor.clear()
  916. self.beep ()
  917. else:
  918. self.commit_string(u"")
  919. self.update()
  920. finally:
  921. self.pipe = None
  922. return True
  923. if key.mask & KeyMask.ReleaseMask:
  924. return False
  925. try:
  926. if self._chinese_mode:
  927. result = self.chinese_process_key_event (key)
  928. else:
  929. result = self.english_process_key_event (key)
  930. self._prev_key = key
  931. except InputException, e:
  932. self.beep ()
  933. return True
  934. except Exception, e:
  935. self.beep ()
  936. self._log.print_exc()
  937. self._log.log("DEBUG", self._editor.cursor)
  938. self._log.log("DEBUG", [i.char.encode("utf-8") for i in self._editor.wordlist] )
  939. self._log.log("DEBUG", [i.get_screen_pinyin() for i in self._editor.pinyinlist] )
  940. self._log.log("DEBUG", self._editor.get_preedit().encode ("utf-8"))
  941. return True
  942. else:
  943. return result
  944. finally:
  945. self.update()
  946. def english_process_key_event (self, key):
  947. return False
  948. def chinese_process_key_event (self, key):
  949. if self._editor.is_empty() and not self.get_extra_string():
  950. if key.code <= 127 and ascii.ispunct (chr (key.code)):
  951. self.commit_string (self.convert_to_full_width (unichr (key.code)))
  952. return True
  953. elif key.code == KeyCode.KEY_r and key.mask == KeyMask.ControlMask:
  954. if not self.pipe:
  955. self.pipe = popen2.Popen3("python -c" +'"import gtk; print gtk.clipboard_get(selection=\\"PRIMARY\\").wait_for_text()"')
  956. return True
  957. else:
  958. return False
  959. #~ print unichr (key.code)
  960. if key.code in (KeyCode.KEY_Control_L,KeyCode.KEY_Control_R,
  961. KeyCode.KEY_Alt_L, KeyCode.KEY_Alt_R):
  962. return True
  963. elif key.code in (KeyCode.KEY_KP_Space, KeyCode.KEY_space):
  964. #~ print self._editor.get_candidate_cursor()
  965. if self._editor.candidates and self._editor.lookup_table.is_cursor_visible():
  966. self._editor.select_cursor()
  967. return True
  968. elif self._editor.pinyinlist:
  969. self._editor.convert_all ()
  970. return True
  971. elif self._editor.cursor < len (self._editor.wordlist):
  972. self._editor.jump_to_next_word()
  973. return True
  974. else:
  975. self.commit_string (self._editor.commit())
  976. return True
  977. elif key.code == KeyCode.KEY_BackSpace:
  978. if not self._editor.pinyinlist and self.get_extra_string():
  979. raise InputException()
  980. self._editor.del_current()
  981. return True
  982. elif key.code == KeyCode.KEY_Delete:
  983. if self._editor.lookup_table.is_cursor_visible():
  984. self._editor.delete_cursor_phrase ()
  985. else:
  986. self._editor.del_next ()
  987. return True
  988. elif key.code >= KeyCode.KEY_0 and key.code <= KeyCode.KEY_9 and key.mask & KeyMask.ControlMask:
  989. self._editor.delete_phrase (key.code - KeyCode.KEY_1)
  990. return True
  991. elif key.code >= KeyCode.KEY_0 and key.code <= KeyCode.KEY_9 and key.mask & KeyMask.AltMask:
  992. self._editor.move_cursor_to (key.code - KeyCode.KEY_0)
  993. return True
  994. elif key.code >= KeyCode.KEY_1 and key.code <= KeyCode.KEY_9:
  995. self._editor.select (key.code-KeyCode.KEY_1)
  996. return True
  997. elif key.code >= KeyCode.KEY_KP_1 and key.code <= KeyCode.KEY_KP_9:
  998. self._editor.select (key.code-KeyCode.KEY_KP_1)
  999. return True
  1000. elif key.code == KeyCode.KEY_Shift_L:
  1001. if not self._editor.is_end():
  1002. self._editor.select (0)
  1003. self._shift_key = None
  1004. return True
  1005. elif key.code == KeyCode.KEY_Shift_R:
  1006. if not self._editor.is_end():
  1007. self._editor.select (1)
  1008. self._shift_key = None
  1009. return True
  1010. elif key.code in (KeyCode.KEY_equal, KeyCode.KEY_bracketright, KeyCode.KEY_Page_Down):
  1011. if self._editor.candidates:
  1012. self._editor.lookup_table.page_down ();
  1013. return True
  1014. else:
  1015. raise InputException()
  1016. elif key.code in (KeyCode.KEY_minus, KeyCode.KEY_bracketleft, KeyCode.KEY_Page_Up):
  1017. if self._editor.candidates:
  1018. self._editor.lookup_table.page_up ();
  1019. return True
  1020. else:
  1021. raise InputException()
  1022. elif key.code==KeyCode.KEY_Up:
  1023. if self._editor.candidates:
  1024. self._editor.lookup_table.cursor_up()
  1025. self._editor.lookup_table.show_cursor(True)
  1026. return True
  1027. else:
  1028. raise InputException()
  1029. elif key.code==KeyCode.KEY_Down:
  1030. if self._editor.candidates:
  1031. self._editor.lookup_table.cursor_down()
  1032. self._editor.lookup_table.show_cursor(True)
  1033. return True
  1034. else:
  1035. raise InputException()
  1036. elif key.code == KeyCode.KEY_Left or key.code == KeyCode.KEY_b and key.mask & KeyMask.ControlMask:
  1037. self._editor.move_cursor (-1)
  1038. return True
  1039. elif key.code == KeyCode.KEY_Right or key.code == KeyCode.KEY_f and key.mask & KeyMask.ControlMask:
  1040. if self.get_extra_string():
  1041. raise InputException()
  1042. self._editor.move_cursor (1)
  1043. return True
  1044. elif key.code == KeyCode.KEY_h and key.mask & KeyMask.ControlMask or key.code == KeyCode.KEY_Home:
  1045. if self.get_extra_string():
  1046. raise InputException()
  1047. self._editor.move_cursor_to (1)
  1048. return True
  1049. elif key.code == KeyCode.KEY_e and key.mask & KeyMask.ControlMask or key.code == KeyCode.KEY_End:
  1050. if self.get_extra_string():
  1051. raise InputException()
  1052. self._editor.move_cursor_to (0)
  1053. return True
  1054. elif key.code in (KeyCode.KEY_Return, KeyCode.KEY_KP_Enter):
  1055. self.commit_string (self._editor.commit() + self.get_extra_string())
  1056. self.clear()
  1057. return True
  1058. elif key.code == KeyCode.KEY_Escape or key.code == KeyCode.KEY_c and key.mask & KeyMask.ControlMask:
  1059. if self.origin_string:
  1060. self.commit_string(self.origin_string)
  1061. self._editor.clear()
  1062. self.origin_string = None
  1063. elif self._editor.lookup_table.is_cursor_visible():
  1064. self._editor.lookup_table.show_cursor(False)
  1065. self._editor.update()
  1066. else:
  1067. self.clear()
  1068. self._editor.clear()
  1069. return True
  1070. elif key.code <= 127 and ascii.ispunct (chr (key.code)) and not self.get_extra_string():
  1071. if not self._editor.is_empty ():
  1072. self.commit_string (self._editor.commit ())
  1073. self.commit_string (self.convert_to_full_width (unichr (key.code)))
  1074. return True
  1075. else:
  1076. raise InputException ()
  1077. def convert_to_full_width (self, c):
  1078. if c == u".":
  1079. if self._prev_key and self._prev_key.code >= KeyCode.KEY_0 and self._prev_key.code <= KeyCode.KEY_9:
  1080. return u"."
  1081. else:
  1082. return u"\u3002"
  1083. elif c == u"\\":
  1084. return u"\u3001"
  1085. elif c == u"^":
  1086. return u"\u2026\u2026"
  1087. elif c == u"_":
  1088. return u"\u2014\u2014"
  1089. elif c == u"$":
  1090. return u"\uffe5"
  1091. elif c == u"\"":
  1092. self._double_quotation_state = not self._double_quotation_state
  1093. if self._double_quotation_state:
  1094. return u"\u201c"
  1095. else:
  1096. return u"\u201d"
  1097. elif c == u"'":
  1098. self._single_quotation_state = not self._single_quotation_state
  1099. if self._single_quotation_state:
  1100. return u"\u2018"
  1101. else:
  1102. return u"\u2019"
  1103. elif c == u"<":
  1104. return u"\u300a"
  1105. elif c == u">":
  1106. return u"\u300b"
  1107. return scim.unichar_half_to_full (c)
  1108. class ZhengJuFactory (IMEngineFactory):
  1109. def __init__ (self, config):
  1110. IMEngineFactory.__init__ (self, config)
  1111. self.name = _(u"ZhengJu")
  1112. self.uuid = "59e29ad8-3c95-4cd0-b02f-e21bf1317f7a"
  1113. self.authors = u"Yu Fan <yufanyufan@gmail.com>"
  1114. self.icon_file = "/usr/share/scim/icons/scim-python.png"
  1115. self.credits = u"GPL"
  1116. self.help = _(u"Help For ZhengJu")
  1117. self.set_languages ("zh")
  1118. self._config = config
  1119. def create_instance (self, encoding, id):
  1120. pinyin = self._config.read ("/IMEngine/Python/ZhengJu/PinYinSchema", "JianPin")
  1121. if pinyin == "JianPin":
  1122. import JianPin
  1123. engine = JianPin.JianPinEngine (self, self._config, encoding, id)
  1124. elif pinyin == "QuanPin":
  1125. import QuanPin
  1126. engine = QuanPin.QuanPinEngine (self, self._config, encoding, id)
  1127. elif pinyin == "ShuangPin":
  1128. import ShuangPin
  1129. engine = ShuangPin.ShuangPinEngine (self, self._config, encoding, id)
  1130. else:
  1131. import JianPin
  1132. engine = JianPin.JianPinEngine (self, self._config, encoding, id)
  1133. return engine
  1134. def reload_config (self, config):
  1135. self._config = config
  1136. class PseudoConfig:
  1137. def read(self, string, default):
  1138. return default;
  1139. def train(file_name):
  1140. print "Training by " + file_name
  1141. editor = Editor()
  1142. import re
  1143. ex = re.compile(ur"[\da-zA-Z\W]",re.UNICODE)
  1144. for l in file(file_name):
  1145. ll = unicode(l,"utf8")[:-1]
  1146. t = ex.split(ll)
  1147. for i in t:
  1148. if i:
  1149. try:
  1150. editor.reverse(i)
  1151. #~ print i
  1152. editor.learn()
  1153. except:
  1154. print file
  1155. traceback.print_exc ()
  1156. raise Exception()
  1157. def print_usage():
  1158. print "ZhengJu -f FILE\tRead Sentenc from file"
  1159. print "ZhengJu \tConvert parameter to pinyin"
  1160. if __name__ == "__main__":
  1161. editor = Editor()
  1162. import sys
  1163. try:
  1164. if len(sys.argv) == 3:
  1165. if sys.argv[1] == "-f":
  1166. train(sys.argv[2])
  1167. else:
  1168. raise Exception()
  1169. elif len(sys.argv) == 2:
  1170. try:
  1171. editor.reverse(unicode(sys.argv[1],"utf8"))
  1172. for i in editor.wordlist:
  1173. print ID_PINYIN_DICT[i.get_pinyin_id()],
  1174. print i.char,
  1175. except:
  1176. print "Can't convert this to pinyin"
  1177. raise Exception()
  1178. else:
  1179. raise Exception()
  1180. except:
  1181. traceback.print_exc ()
  1182. print_usage()
  1183. sys.exit(1)
  1184. else:
  1185. sys.exit(0)