PageRenderTime 38ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/python/engine/PinYin/QuanPin.py

http://scim-python.googlecode.com/
Python | 198 lines | 171 code | 2 blank | 25 comment | 1 complexity | ea563582f37f3c72924ea36599526202 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. # vim: set noet ts=4:
  3. #
  4. # scim-python
  5. #
  6. # Copyright (c) 2007-2008 Yu Fan <yufanyufan@gmail.com>
  7. #
  8. #
  9. # This library is free software; you can redistribute it and/or
  10. # modify it under the terms of the GNU Lesser General Public
  11. # License as published by the Free Software Foundation; either
  12. # version 2 of the License, or (at your option) any later version.
  13. #
  14. # This library is distributed in the hope that it will be useful,
  15. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. # GNU Lesser General Public License for more details.
  18. #
  19. # You should have received a copy of the GNU Lesser General Public
  20. # License along with this program; if not, write to the
  21. # Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  22. # Boston, MA 02111-1307 USA
  23. #
  24. # $Id: $
  25. #
  26. from ZhengJu import *
  27. import scim
  28. import os
  29. from scim import KeyCode
  30. from scim import KeyMask
  31. from scim import Property
  32. import traceback
  33. import sys
  34. from PYDict import *
  35. from gettext import dgettext
  36. _ = lambda a : dgettext ("scim-python", a)
  37. strip = lambda a: a if a == "" or a == "'" or a[0]!="'" else a[1:]
  38. class QuanPinEngine(Engine):
  39. def __init__ (self, factory, config, encoding, id):
  40. Engine.__init__(self, factory, config, encoding, id)
  41. def clear(self):
  42. self.extra_string = ""
  43. Engine.clear(self)
  44. def get_extra_string(self):
  45. return self.extra_string
  46. def is_valid_head(self, str):
  47. #~ print str
  48. if str == "'" or strip(str)[0] in SHENGMU_LIST and (strip(str) in PINYIN_LIST or strip(str) in PINYIN_PARTIAL_LIST):
  49. return True
  50. for i in range(len(str), 0, -1):
  51. if strip(str)[0] in SHENGMU_LIST and strip(str[:i]) in PINYIN_LIST:
  52. return True
  53. return False
  54. def split(self, strs):
  55. if strip(strs) in PINYIN_LIST \
  56. or strip(strs) in PINYIN_PARTIAL_LIST \
  57. or strip(strs) in SHENGMU_LIST \
  58. or strs == "'":
  59. yield (strs, "")
  60. else:
  61. for i in range(len(strs), 0, -1):
  62. if strs[:i][-1] == "'":
  63. continue
  64. if strip(strs[:i]) in PINYIN_LIST:
  65. yield ( strs[:i], strs[i:] )
  66. if strip(strs[:i-1]) in PINYIN_LIST and strip(strs[:i])[-1] in SHENGMU_LIST and self.is_valid_head(strs[i-1:]):
  67. yield ( strs[:i-1], strs[i-1:])
  68. break
  69. def recursive_parse(self, str):
  70. for (word, strleft) in self.split(str):
  71. if strleft:
  72. for pinyinlist in self.recursive_parse(strleft):
  73. yield [word] + pinyinlist
  74. else:
  75. yield [word]
  76. def process_pinyin(self, c):
  77. p = u"".join( i.get_screen_pinyin() for i in self._editor.pinyinlist) + c
  78. all = list(self.recursive_parse(p))
  79. if not all:
  80. raise InputException()
  81. min_len = 100
  82. freq = 0
  83. for q in all:
  84. pinyinlist = []
  85. for i in q:
  86. #~ print i,
  87. pinyinlist.append(PinYinWord(pinyin=i))
  88. #~ print len(pinyinlist)
  89. predicts = self._editor.get_predict_pinyinlist(pinyinlist)
  90. #~ print len(predicts)
  91. if len(predicts) < min_len:
  92. min_len = len(predicts)
  93. #~ freq = sum([i[ADJ_FREQ] for i in predicts])
  94. if len(predicts)>1:
  95. freq = self._editor.freq_alg(predicts[0],predicts[1])
  96. else:
  97. freq = predicts[0][ADJ_FREQ]
  98. #~ print freq
  99. best = (pinyinlist,predicts)
  100. elif len(predicts) == min_len:
  101. if len(predicts)>1:
  102. tfreq = self._editor.freq_alg(predicts[0],predicts[1])
  103. else:
  104. tfreq = predicts[0][ADJ_FREQ]
  105. #~ print tfreq
  106. if tfreq > freq:
  107. freq = tfreq
  108. best = (pinyinlist,predicts)
  109. self._editor.pinyinlist = best[0]
  110. self._editor.predict = best[1]
  111. def chinese_process_key_event (self, key):
  112. if key.mask == KeyMask.NullMask and (\
  113. (key.code >= KeyCode.KEY_a and key.code <= KeyCode.KEY_z) or \
  114. key.code == KeyCode.KEY_apostrophe):
  115. self.process_pinyin(unichr (key.code))
  116. self._editor.auto_convert_quanpin ()
  117. return True
  118. elif self._editor.pinyinlist and key.code == KeyCode.KEY_BackSpace:
  119. p = self._editor.pinyinlist[-1].get_screen_pinyin()
  120. if len(p)>1:
  121. self._editor.pinyinlist[-1].set_pinyin(p[:-1])
  122. else:
  123. del self._editor.pinyinlist[-1]
  124. self._editor.update ()
  125. return True
  126. elif (self.extra_string or self._editor.pinyinlist) and (key.code == KeyCode.KEY_Left or (key.code == KeyCode.KEY_b and key.mask & KeyMask.ControlMask)):
  127. if self._editor.pinyinlist:
  128. p = self._editor.pinyinlist[-1].get_screen_pinyin()
  129. self.extra_string = p + self.extra_string
  130. del self._editor.pinyinlist[-1]
  131. self._editor.update ()
  132. return True
  133. elif (self.extra_string or self._editor.pinyinlist) and (key.code == KeyCode.KEY_Right or (key.code == KeyCode.KEY_f and key.mask & KeyMask.ControlMask)):
  134. if self.extra_string:
  135. #~ print self.extra_string[0]
  136. self.process_pinyin( self.extra_string[0])
  137. self.extra_string = self.extra_string[1:]
  138. self._editor.auto_convert_quanpin ()
  139. else:
  140. self.extra_string = u"".join( i.get_screen_pinyin() for i in self._editor.pinyinlist)
  141. self._editor.pinyinlist = []
  142. self._editor.update()
  143. return True
  144. elif self.extra_string and key.code == KeyCode.KEY_Delete:
  145. if self.extra_string:
  146. self.extra_string = self.extra_string[1:]
  147. return True
  148. else:
  149. raise InputException()
  150. elif self.extra_string and key.code in (KeyCode.KEY_KP_Space, KeyCode.KEY_space):
  151. p = u"".join( i.get_screen_pinyin() for i in self._editor.pinyinlist)
  152. c = ""
  153. while True:
  154. c += self.extra_string[0]
  155. if list(self.recursive_parse(p+c)):
  156. self.extra_string = self.extra_string[1:]
  157. else:
  158. break
  159. if not self.extra_string:
  160. break
  161. if c:
  162. while c:
  163. self.process_pinyin(c[0])
  164. self._editor.auto_convert_quanpin ()
  165. c = c[1:]
  166. return True
  167. else:
  168. raise InputException()
  169. elif Engine.chinese_process_key_event (self,key):
  170. return True;
  171. return False
  172. class QuanPinFactory (IMEngineFactory):
  173. def __init__ (self, config):
  174. IMEngineFactory.__init__ (self, config)
  175. self.name = _(u"QuanPin")
  176. self.uuid = "00c1690f-214c-447c-be18-1db199bae183"
  177. self.authors = u"Yu Fan <yufanyufan@gmail.com>"
  178. self.icon_file = "/usr/share/scim/icons/scim-python.png"
  179. self.credits = u"GPL"
  180. self.help = _(u"Help For QuanPin")
  181. self.set_languages ("zh")
  182. self._config = config
  183. def create_instance (self, encoding, id):
  184. engine = QuanPinEngine (self, self._config, encoding, id)
  185. return engine
  186. def reload_config (self, config):
  187. pass