PageRenderTime 90ms CodeModel.GetById 3ms app.highlight 77ms RepoModel.GetById 1ms app.codeStats 0ms

/python/engine/PinYin/ZhengJu.py

http://scim-python.googlecode.com/
Python | 1268 lines | 1239 code | 4 blank | 25 comment | 2 complexity | 46afeedb53a388189acf7cb3c399f7df MD5 | raw file
   1# -*- coding: utf-8 -*-
   2# vim: set noet ts=4:
   3#
   4# scim-python
   5#
   6# Copyright (c) 2007-2008 Yu Fan <yufanyufan@gmail.com>
   7#
   8#
   9# This library is free software; you can redistribute it and/or
  10# modify it under the terms of the GNU Lesser General Public
  11# License as published by the Free Software Foundation; either
  12# version 2 of the License, or (at your option) any later version.
  13#
  14# This library is distributed in the hope that it will be useful,
  15# but WITHOUT ANY WARRANTY; without even the implied warranty of
  16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17# GNU Lesser General Public License for more details.
  18#
  19# You should have received a copy of the GNU Lesser General Public
  20# License along with this program; if not, write to the
  21# Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  22# Boston, MA  02111-1307  USA
  23#
  24# $Id: $
  25#
  26import scim
  27import scim.Log
  28import os
  29from scim import KeyCode
  30from scim import KeyMask
  31from scim import Property
  32import traceback
  33from PYDict import *
  34from gettext import dgettext
  35from ZhengJuDB import *
  36import scim.ascii as ascii
  37from sets import Set
  38import popen2
  39
  40_ = lambda a : dgettext ("scim-python", a)
  41RGB = lambda r, g, b : (((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff))
  42IMEngine = scim.IMEngine
  43IMEngineFactory = scim.IMEngineFactory
  44
  45(YLEN, Y0, Y1, Y2, Y3, YX, PHRASE, ADJ_FREQ) = range (0, 8)
  46candidate_sort = lambda x,y: cmp(y[YLEN],x[YLEN]) if x[YLEN] != y[YLEN] else cmp(y[ADJ_FREQ],x[ADJ_FREQ])
  47
  48class InputException(Exception):
  49	def __init__ (self):
  50		Exception.__init__(self)
  51
  52class PinYinWord:
  53	def __init__ (self, shengmu = "", yunmu = "", pinyin = ""):
  54		self.char = ""
  55		self._pinyin_id = None
  56		self.manual = None
  57		self.char = ""
  58		self.spliter = ""
  59		if pinyin:
  60			self.set_pinyin(pinyin)
  61		else:
  62			self.set_pinyin(shengmu + yunmu)
  63	def set_pinyin(self,pinyin):
  64		if pinyin[0] == "'":
  65			self.spliter = "'"
  66			pinyin = pinyin[1:]
  67		if pinyin[:2] in SHENGMU_LIST:
  68			self.shengmu = pinyin[:2]
  69			self.yunmu = pinyin[2:]
  70		elif pinyin[:1] in SHENGMU_LIST:
  71			self.shengmu = pinyin[:1]
  72			self.yunmu = pinyin[1:]
  73		else:
  74			self.shengmu = ""
  75			self.yunmu = pinyin
  76		if self.get_pinyin() in PINYIN_LIST:
  77			self._pinyin_id = PINYIN_DICT [self.get_pinyin()]
  78			self._sheng_mu_id = SHENGMU_DICT [self.get_shengmu()]
  79		else:
  80			self._sheng_mu_id = SHENGMU_DICT [self.get_shengmu()]
  81	def mohuyin(self):
  82		pinyin = ID_PINYIN_DICT[self.real_pinyin_id]
  83		if pinyin[:2] in SHENGMU_LIST:
  84			self.shengmu = pinyin[:2]
  85			yunmu = pinyin[2:]
  86		elif pinyin[:1] in SHENGMU_LIST:
  87			self.shengmu = pinyin[:1]
  88			yunmu = pinyin[1:]
  89		else:
  90			self.shengmu = ""
  91			yunmu = pinyin
  92		if self.yunmu != "":
  93			 self.yunmu = yunmu
  94		self.set_pinyin(self.get_pinyin())
  95	def get_sheng_mu_id (self):
  96		return self._sheng_mu_id
  97
  98	def get_pinyin_id (self):
  99		return self._pinyin_id
 100
 101	def set_pinyin_id (self, id):
 102		self.set_pinyin(ID_PINYIN_DICT[id])
 103		
 104	def get_shengmu (self):
 105		return self.shengmu
 106	
 107	def set_yunmu( self,yunmu):
 108		self.yunmu = yunmu
 109		if(yunmu != ""):
 110			self._pinyin_id = PINYIN_DICT [ self.get_pinyin() ]
 111		else:
 112			self._pinyin_id = None
 113
 114	def set_char (self,char):
 115		self.char = char
 116
 117	def get_pinyin (self):
 118		return self.shengmu + self.yunmu
 119
 120	def get_screen_pinyin (self):
 121		return self.spliter + self.shengmu + self.yunmu
 122
 123	def __str__ (self):
 124		return self.get_pinyin()
 125
 126	def is_complete (self):
 127		return self._pinyin_id != None
 128
 129		
 130class Editor:
 131	database = None
 132	def __init__ (self, config = None):
 133		if config == None:
 134			config = PseudoConfig()
 135		if Editor.database == None:
 136			Editor.database = ZhengJuDB(config)
 137		self.lookup_table = scim.LookupTable (9)
 138		self.lookup_table.fix_page_size(True)
 139		self.clear()
 140		self.config = config
 141		self.load_config(config)
 142
 143	def clear(self):
 144		self.cursor = 0
 145		self.wordlist = []
 146		self.pinyinlist = []
 147		self.candidates = []
 148		self.predict = []
 149		self.lookup_table.clear()
 150		self.lookup_table.show_cursor(False)
 151		Editor.database.clear_cache()
 152	def load_config(self, config):
 153		Editor.database.load_config(config)
 154		self.userword = config.read ("/IMEngine/Python/ZhengJu/CreateUserWords", True)
 155		self.userphrase = config.read ("/IMEngine/Python/ZhengJu/CreateUserPhrases", True)
 156		self.adjustfreq = config.read ("/IMEngine/Python/ZhengJu/AdjustWordFreq", True)
 157		self.logconverror = config.read ("/IMEngine/Python/ZhengJu/LogConvError", True)
 158		self.splitpinyin = config.read ("/IMEngine/Python/ZhengJu/SplitPinyin", True)
 159		self.enable_mohuyin = config.read ("/IMEngine/Python/ZhengJu/FuzzyPinyin", False)
 160		self.mohuyin_s_sh = config.read ("/IMEngine/Python/ZhengJu/FuzzyS_Sh", True)
 161		self.mohuyin_c_ch = config.read ("/IMEngine/Python/ZhengJu/FuzzyC_Ch", True)
 162		self.mohuyin_z_zh = config.read ("/IMEngine/Python/ZhengJu/FuzzyZ_Zh", True)
 163		self.mohuyin_l_n = config.read ("/IMEngine/Python/ZhengJu/FuzzyL_N", True)
 164		self.mohuyin_in_ing = config.read ("/IMEngine/Python/ZhengJu/FuzzyIn_Ing", True)
 165		self.mohuyin_en_eng = config.read ("/IMEngine/Python/ZhengJu/FuzzyEn_Eng", True)
 166		self.mohuyin_an_ang = config.read ("/IMEngine/Python/ZhengJu/FuzzyAn_Ang", True)
 167		self.build_mohuyin()
 168	def build_mohuyin(self):
 169		self.shengmu_mohu = {}
 170		if self.mohuyin_s_sh:
 171			self.shengmu_mohu["s"]= MOHU_SHENGMU["s"]
 172			self.shengmu_mohu["sh"]= MOHU_SHENGMU["sh"]
 173		if self.mohuyin_z_zh:
 174			self.shengmu_mohu["z"]= MOHU_SHENGMU["z"]
 175			self.shengmu_mohu["zh"]= MOHU_SHENGMU["zh"]
 176		if self.mohuyin_c_ch:
 177			self.shengmu_mohu["c"]= MOHU_SHENGMU["c"]
 178			self.shengmu_mohu["ch"]= MOHU_SHENGMU["ch"]
 179		if self.mohuyin_l_n:
 180			self.shengmu_mohu["l"]= MOHU_SHENGMU["l"]
 181			self.shengmu_mohu["n"]= MOHU_SHENGMU["n"]
 182		self.yunmu_mohu = {}
 183		if self.mohuyin_an_ang:
 184			self.yunmu_mohu["an"] = MOHU_YUNMU["an"]
 185			self.yunmu_mohu["ang"] = MOHU_YUNMU["ang"]
 186		if self.mohuyin_en_eng:
 187			self.yunmu_mohu["en"] = MOHU_YUNMU["en"]
 188			self.yunmu_mohu["eng"] = MOHU_YUNMU["eng"]
 189		if self.mohuyin_in_ing:
 190			self.yunmu_mohu["in"] = MOHU_YUNMU["in"]
 191			self.yunmu_mohu["in"] = MOHU_YUNMU["ing"]
 192	def current (self):
 193		if self.pinyinlist:
 194			return self.pinyinlist[-1]
 195		else:
 196			return None
 197
 198	def is_empty (self):
 199		return (not self.pinyinlist) and (not self.wordlist)
 200
 201	def is_end (self):
 202		return self.is_empty() or (not self.pinyinlist) and self.cursor == len (self.wordlist)
 203
 204	def get_aux (self):
 205		return "".join ( u[PHRASE] for u in self.predict)
 206
 207	def get_screen_pinyin(self):
 208		if self.splitpinyin:
 209			s = ""
 210			if self.pinyinlist:
 211				for i in range(len(self.pinyinlist)-1):
 212					p = self.pinyinlist[i].get_screen_pinyin() + self.pinyinlist[i+1].get_screen_pinyin()[0]
 213					if p in PINYIN_LIST or p in PINYIN_PARTIAL_LIST:
 214						s += self.pinyinlist[i].get_screen_pinyin() + "'"
 215					else:
 216						s += self.pinyinlist[i].get_screen_pinyin()
 217				s += self.pinyinlist[-1].get_screen_pinyin()
 218			return s
 219		else:
 220			return u"".join( i.get_screen_pinyin() for i in self.pinyinlist)
 221
 222	def get_preedit (self):			
 223		return u"".join( [i.char for i in self.wordlist[0:self.cursor] ] ) +\
 224			self.get_screen_pinyin() + \
 225			u"".join ( [i.char for i in self.wordlist[self.cursor:]] )
 226
 227	def get_screen_cursor (self):
 228		if len(self.get_screen_pinyin())>0:
 229			return self.cursor + len(self.get_screen_pinyin())
 230		else:
 231			return self.cursor
 232	
 233	def pinyin_select (self, candidate, manual = False):
 234		phrase = candidate[PHRASE]
 235		length = len(phrase)
 236		for i in range(0,length):
 237			self.pinyinlist[i].set_char(phrase[i])
 238			if i<4:
 239				#~ self.pinyinlist[i].set_pinyin_id (candidate[i+1])
 240				self.pinyinlist[i].real_pinyin_id = candidate[i+1]
 241				if self.enable_mohuyin:
 242					self.pinyinlist[i].mohuyin()
 243			else:
 244				py = candidate[YX].split("'")
 245				self.pinyinlist[i].real_pinyin_id = PINYIN_DICT[py[i-4]]
 246				if self.enable_mohuyin:
 247					self.pinyinlist[i].mohuyin()
 248				#~ self.pinyinlist[i].set_pinyin ([py[i-4]])
 249			self.pinyinlist[i].manual = manual
 250		self.wordlist[self.cursor:self.cursor] = self.pinyinlist[:length]
 251		del self.pinyinlist[:length]
 252		self.cursor += length
 253		if manual:
 254			self.update()
 255	def reparse_backtrace(self):
 256		if self.cursor < len(self.wordlist):
 257			i = self.cursor
 258			while i >= 0:
 259				if self.wordlist[i].manual:
 260					break
 261				i-=1
 262			i += 1
 263			self.reparse(i)
 264		
 265	def convert_all (self):
 266		predicts = self.predict
 267		for i in predicts:
 268			self.pinyin_select(i)		
 269		self.reparse_backtrace()
 270		self.update ()
 271
 272	def jump_to_next_word(self):
 273		string = self.get_preedit ()
 274		phrase_list = self.split_phrase (string)
 275		p = 0;
 276		for i in phrase_list:
 277			if p <= self.cursor:
 278				p += i[1]
 279			else:
 280				break
 281		self.cursor = p
 282		self.update ()
 283		#~ predict = self.get_predict (self.wordlist[self.cursor:])
 284		#~ self.cursor += predict[0][YLEN]
 285		#~ self.update ()
 286
 287	def predict_len(self, predicts):
 288		return sum (u[YLEN] for u in predicts)
 289
 290	def auto_convert (self):
 291		self.update_predict()
 292		while self.predict_len(self.predict[:2]) < len (self.pinyinlist):
 293			self.pinyin_select(self.predict[0])
 294			self.update_predict()
 295		self.update_candidates()
 296
 297	def auto_convert_quanpin(self):
 298		#~ self.update_predict()
 299		p = self.pinyinlist[-1].get_pinyin()
 300		if p not in SHENGMU_LIST and p not in PINYIN_PARTIAL_LIST:
 301			for t in PINYIN_LIST:
 302				if p != t and t[:len(p)] == p :
 303					while self.predict_len(self.predict[:2]) + 1 < len (self.pinyinlist):
 304						self.pinyin_select(self.predict[0])
 305						self.update_predict()
 306					self.update_candidates()
 307					return
 308		while self.predict_len(self.predict[:2]) < len (self.pinyinlist):
 309			self.pinyin_select(self.predict[0])	
 310			self.update_predict()
 311		self.update_candidates()
 312
 313	def update (self):
 314		self.candidate_cursor = None
 315		self.update_predict()
 316		self.update_candidates()
 317
 318	def update_predict (self):
 319		if self.pinyinlist:
 320			self.predict = self.get_predict_pinyinlist (self.pinyinlist) 
 321		else:
 322			self.predict = []
 323
 324	def reverse(self, phrase):
 325		self.clear()
 326		while phrase:
 327			for i in range(len(phrase),0,-1):
 328				#~ print len(phrase),phrase[:i], i
 329				temp = self.database.select_phrase(phrase[:i])
 330				if temp:
 331					result = temp[0]
 332					break
 333			else:
 334				raise InputException()
 335			for i in range(result[YLEN]):
 336				pinyin = PinYinWord("'","")
 337				pinyin.set_char(phrase[i])
 338				if i < 4:
 339					pinyin.set_pinyin_id (result[i+1])
 340				else:
 341					#print candidate[YX]
 342					py = result[YX].split("'")
 343					#print py[i-5]
 344					pinyin.set_pinyin_id (PINYIN_DICT[py[i-4]])
 345					#print self.pinyinlist[i]._pinyin_id
 346				pinyin.real_pinyin_id=pinyin._pinyin_id
 347				self.wordlist.append(pinyin)
 348			phrase = phrase[result[YLEN]:]
 349	def split_phrase (self, string):
 350		start = 0
 351		phrase_list = []
 352		while start < len(self.wordlist)-1:
 353			candidate = None
 354			if len(self.wordlist) - start >= 3:
 355				phrase = Editor.database.select_words_by_pinyin_list_all (self.wordlist[start:start + 3])
 356				#~ print len(phrase)
 357				for i in phrase:
 358					if i[PHRASE] == string[start:start + len(i[PHRASE]) ]:
 359						if not candidate or candidate[PHRASE] < i[PHRASE]:
 360							candidate = i
 361				if candidate == None:
 362					phrase = Editor.database.select_words_by_phrase (self.wordlist[start:start+2])
 363					if phrase:
 364						candidate = phrase[0]			
 365			else:
 366				phrase = Editor.database.select_words_by_phrase (self.wordlist[start:start+2])
 367				if phrase:
 368					candidate = phrase[0]
 369			if candidate == None:
 370				phrase_list.append ( (start, 1, string[start]))
 371				start += 1
 372			else:
 373				phrase_list.append ( (start, len(candidate[PHRASE]), candidate[PHRASE]))
 374				start += len(candidate[PHRASE])
 375		if start < len (self.wordlist):
 376			phrase_list.append ((start,1,string[-1]))
 377		return phrase_list
 378	def split_phrasev2 (self, string):
 379		start = 0
 380		phrase_list = []
 381		while start < len(self.wordlist)-1:
 382			phrase = Editor.database.select_words_by_pinyin_list_all (self.wordlist[start:start+2])
 383			candidate = None
 384			for i in phrase:
 385				if i[PHRASE] == string[start:start + len(i[PHRASE]) ]:
 386					if not candidate or candidate[PHRASE] < i[PHRASE]:
 387						candidate = i
 388			if candidate == None:
 389				phrase_list.append ( (start, 1, string[start]))
 390				start += 1
 391			else:
 392				phrase_list.append ( (start, len(candidate[PHRASE]), candidate[PHRASE]))
 393				start += len(candidate[PHRASE])
 394		if start < len (self.wordlist):
 395			phrase_list.append ((start,1,string[-1]))
 396		return phrase_list
 397
 398	def learn_user_words(self, phrase_list, string, sentence):
 399		if not self.userword:
 400			return
 401		start = 0
 402		while start < len (phrase_list):
 403			tmp_phrase_start = phrase_list[start][0]
 404			tmp_phrase = ""
 405			while start < len (phrase_list) and phrase_list[start][1] == 1 \
 406				and string[phrase_list[start][0]] != sentence[phrase_list[start][0]]:
 407				tmp_phrase += phrase_list[start][2]
 408				del phrase_list[start]			
 409			if tmp_phrase:
 410				phrase_list.insert (start, (tmp_phrase_start, len(tmp_phrase), tmp_phrase) )
 411			if len (tmp_phrase) > 1:
 412				Editor.database.add_phrase (self.wordlist[tmp_phrase_start:tmp_phrase_start + len(tmp_phrase)], USER_WORD) 	
 413				self.log_conv_error( sentence, string, phrase_list, tmp_phrase_start, tmp_phrase_start, 0)
 414				string = string[:tmp_phrase_start] + sentence[tmp_phrase_start:tmp_phrase_start + len(tmp_phrase)] + string[tmp_phrase_start + len(tmp_phrase):]
 415			start += 1
 416		return string
 417		
 418
 419	def split_predict (self):
 420		predict = []
 421		start = 0
 422		while start < len (self.wordlist):
 423			p = self.get_predict (self.wordlist[start:])
 424			predict.append ( (start,len(p[0][PHRASE]), p[0][PHRASE]) )
 425			start += len (p[0][PHRASE])
 426		return predict
 427
 428	def addphrase (self, phrase_list, pstart, pend, freq):
 429		if pstart < 0:
 430			return
 431		if pend >= len (phrase_list):
 432			return
 433		Editor.database.add_phrase(\
 434			self.wordlist[phrase_list[pstart][0]:(phrase_list[pend][0]+phrase_list[pend][1])], freq)
 435
 436	def adjust_all_freq (self, phrase_list):
 437		if not self.userphrase:
 438			return
 439		p = [ self.wordlist[i[0]:i[0]+i[1]] for i in phrase_list]
 440		for i in p:
 441			Editor.database.adjust_phrase_freq (i)
 442
 443	def adjust_freq (self, phrase_list, phrase_begin):
 444		if not self.adjustfreq:
 445			return
 446		i = phrase_list[phrase_begin]
 447		p = self.wordlist[i[0]:i[0]+i[1]]
 448		Editor.database.adjust_phrase_freq (p)
 449				
 450	def delete_phrase(self, n):
 451		if n >= self.lookup_table.get_current_page_size():
 452			raise InputException()
 453		candidate = self.candidates[self.lookup_table.get_current_page_start() + n]
 454		if candidate[ADJ_FREQ] == 0 or \
 455			candidate[ADJ_FREQ]%USER_PHRASE and candidate[ADJ_FREQ]%USER_WORD:
 456			raise InputException()
 457		Editor.database.remove_phrase (candidate)
 458		self.update ()
 459	
 460	def delete_cursor_phrase(self):
 461		candidate = self.candidates[self.lookup_table.get_cursor_pos() ]
 462		if candidate[ADJ_FREQ] == 0 or \
 463			candidate[ADJ_FREQ]%USER_PHRASE and candidate[ADJ_FREQ]%USER_WORD:
 464			self.candidate_cursor = None
 465			raise InputException()
 466		Editor.database.remove_phrase (candidate)
 467		self.update ()
 468
 469	def log_conv_error(self, predict, sentence, phrase_list, pstart, pend, type):
 470		if pstart < 0:
 471			return
 472		if pend >= len (phrase_list):
 473			return
 474		if self.logconverror:
 475			begin = phrase_list[pstart][0]
 476			end = phrase_list[pend][0]+phrase_list[pend][1]
 477			p = open(os.path.expanduser ("~/.scim/zhengju-conv-error.log"),'a')
 478			print >> p, sentence[begin:end].encode ("utf-8"), predict[begin:end].encode ("utf-8"), type
 479			p.close ()
 480			
 481	def learn (self):
 482		if not self.userword and not self.userphrase and not self.adjustfreq and not self.logconverror:
 483			return
 484		predict = self.split_predict ()
 485		sentence = u"".join ([ i[2] for i in predict])
 486		for i in self.wordlist:
 487			i._pinyin_id = i.real_pinyin_id
 488		string = self.get_preedit ()
 489		phrase_list = self.split_phrase (string)
 490		string = self.learn_user_words(phrase_list, string, sentence)
 491		#~ print "out"
 492		#~ for i in phrase_list:
 493			#~ print i[1],i[2]
 494		#~ for i in predict:
 495			#~ print i[1],i[2]
 496		if not self.userphrase and not self.adjustfreq:
 497			return
 498		cur_phrase = 0
 499		cur_predict = 0
 500		phrase_begin = 0
 501		predict_begin = 0
 502		while cur_phrase < len(phrase_list):
 503			while predict[cur_predict][0]+ predict[cur_predict][1] < phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]:
 504				cur_predict += 1
 505			if predict[cur_predict][0]+ predict[cur_predict][1] > phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]:
 506				cur_phrase += 1
 507			else:
 508				#~ print string[phrase_list[phrase_begin][0]:phrase_list[cur_phrase][0]+phrase_list[cur_phrase][1]]
 509				#~ print sentence[predict[predict_begin][0]:predict[cur_predict][0]+predict[cur_predict][1]]
 510				if string[phrase_list[phrase_begin][0]:phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]]!=\
 511					sentence[predict[predict_begin][0]:predict[cur_predict][0] + predict[cur_predict][1]]:
 512					if cur_phrase - phrase_begin == 0:
 513						if cur_predict - predict_begin == 0:
 514							self.addphrase(phrase_list, phrase_begin - 1, cur_phrase, USER_PHRASE)
 515							self.log_conv_error(sentence, string, phrase_list, phrase_begin - 1, cur_phrase, 1)
 516							self.addphrase(phrase_list, phrase_begin, cur_phrase + 1, USER_PHRASE)
 517							self.log_conv_error(sentence, string, phrase_list, phrase_begin, cur_phrase + 1, 1)
 518							self.adjust_freq (phrase_list, phrase_begin)
 519					else:
 520						self.addphrase (phrase_list, phrase_begin, cur_phrase, USER_PHRASE)
 521						self.log_conv_error(sentence, string, phrase_list, phrase_begin, cur_phrase, 2)
 522				phrase_begin = cur_phrase + 1
 523				predict_begin = cur_predict + 1
 524				cur_phrase += 1
 525		Editor.database.clean_useless_phrase()
 526	
 527	def freq_alg(self, phrase1, phrase2):
 528		freq = 0
 529		if len(phrase1[PHRASE]) == 1:
 530			freq += phrase1[ADJ_FREQ] * 10
 531		elif len(phrase1[PHRASE]) < 4:
 532			freq += phrase1[ADJ_FREQ] * len(phrase1[PHRASE])
 533		else:
 534			freq += phrase1[ADJ_FREQ] * pow( len(phrase1[PHRASE]) , 2)
 535		if len(phrase2[PHRASE]) == 1:
 536			freq += phrase2[ADJ_FREQ] * 10
 537		elif len(phrase2[PHRASE]) < 4:
 538			freq += phrase2[ADJ_FREQ] * len(phrase2[PHRASE])
 539		else:
 540			freq += phrase2[ADJ_FREQ] * pow( len(phrase2[PHRASE]) , 2)
 541		return freq
 542		return phrase1[ADJ_FREQ] + phrase2[ADJ_FREQ]
 543		return phrase1[ADJ_FREQ] * len(phrase1[PHRASE]) + phrase2[ADJ_FREQ] * len(phrase2[PHRASE])
 544		return phrase1[ADJ_FREQ] * pow( len(phrase1[PHRASE]) , 2.5) + phrase2[ADJ_FREQ] * pow( len(phrase2[PHRASE]) , 2.5)
 545		return pow (phrase1[ADJ_FREQ], len(phrase1[PHRASE]) / 5.) + pow( phrase2[ADJ_FREQ], len(phrase2[PHRASE]) /5)
 546
 547
 548	def get_predict_pinyinlist (self, pinyinlist):
 549		#~ print "Dd", u" ".join( i.get_screen_pinyin() for i in pinyinlist), len(pinyinlist)
 550		candidates = Editor.database.select_words_by_pinyin_list (pinyinlist)
 551		if candidates:
 552			#~ print "phrase1",candidates[0][PHRASE],candidates[0][ADJ_FREQ]
 553			return [candidates[0]]
 554		else:
 555			candidates = Editor.database.select_words_by_pinyin_list_all(pinyinlist)
 556			if candidates:
 557				#~ print candidates[0][PHRASE]
 558				p = list (candidates[0]);
 559				p[YLEN] = len (pinyinlist)
 560				p[PHRASE] = p[PHRASE][:p[YLEN]]
 561				return [p]
 562		max_freq = 0
 563		predict = []
 564		for length in range(len (pinyinlist), 1, -1):
 565			for i in range (1, length):
 566				candidates = Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
 567				if not candidates:
 568					continue
 569				candidates2 = Editor.database.select_words_by_pinyin_list(pinyinlist[i:length])
 570				if not candidates2:
 571					candidates2 = Editor.database.select_words_by_pinyin_list_all(pinyinlist[i:length])
 572					if candidates2:
 573						p = list (candidates2[0]);
 574						p[YLEN] = length - i
 575						p[PHRASE] = p[PHRASE][:p[YLEN]]
 576						tmp_phrase = candidates[0]
 577						tmp_phrase2 = p
 578					else:
 579						continue
 580				else:
 581					tmp_phrase = candidates[0]
 582					tmp_phrase2 = candidates2[0]
 583				new_freq = self.freq_alg(tmp_phrase, tmp_phrase2)
 584				#~ print tmp_phrase[PHRASE].encode ("utf-8"),tmp_phrase2[PHRASE].encode ("utf-8"), tmp_phrase[ADJ_FREQ],tmp_phrase2[ADJ_FREQ], new_freq
 585				#~ if tmp_phrase[ADJ_FREQ] + tmp_phrase2[ADJ_FREQ] >= max_freq:
 586				if new_freq >= max_freq:
 587					predict = [tmp_phrase, tmp_phrase2]
 588					max_freq = new_freq
 589			if predict:
 590				break
 591		if self.predict_len(predict) < len (pinyinlist):
 592			#~ return
 593			#~ for i in range(1, predict[0][YLEN]):
 594				#~ candidates =  Editor.database.select_words_by_pinyin_list(pinyinlist[:i])
 595				#~ if candidates and candidates[0][PHRASE] == predict[0][PHRASE][:i]:
 596					#~ print "try", i, candidates[0][PHRASE]
 597					#~ temp = self.get_predict_pinyinlist(pinyinlist[i:self.predict_len(predict)])
 598					#~ print "resule", temp[0][PHRASE] +  temp[1][PHRASE]
 599					#~ print "match", predict[0][PHRASE][i:] + predict[1][PHRASE]
 600					#~ if predict[0][PHRASE][i:] + predict[1][PHRASE] == (temp[0][PHRASE] + temp[1][PHRASE]):
 601						#~ print "go", candidates[0][PHRASE]
 602						#~ return [candidates[0]] + temp
 603			return predict + self.get_predict_pinyinlist(pinyinlist[self.predict_len(predict):])
 604		else:
 605			return predict
 606
 607	def get_predict (self, pinyinlist):
 608		if not pinyinlist:
 609			return []
 610		candidates = Editor.database.select_words_by_pinyin_list(pinyinlist)
 611		if candidates:
 612			#~ print "phrase1",candidates[0][PHRASE],candidates[0][ADJ_FREQ]
 613			return [candidates[0]]
 614		else:
 615			candidates = Editor.database.select_words_by_pinyin_list_all(pinyinlist)
 616			if candidates:
 617				p = list (candidates[0]);
 618				p[YLEN] = len (pinyinlist)
 619				p[PHRASE] = p[PHRASE][:p[YLEN]]
 620				return [p]
 621		max_freq = 0
 622		max_length =0
 623		#~ print "try words"
 624		#~ if longest==1:
 625			#~ return [Editor.database.select_words_by_pinyin_list(pinyinlist[:1])[0][PHRASE]]
 626		#~ print longest
 627		for i in range (1, len(pinyinlist)):
 628			candidates = Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
 629			if not candidates:
 630				continue
 631			tmp_phrase = candidates[0]
 632			tmp_freq = tmp_phrase[ADJ_FREQ]
 633			longest2 = Editor.database.get_longest_phrase_length (pinyinlist[i:])
 634			#~ print "phrase1",tmp_phrase[PHRASE]
 635			#~ print i,longest2
 636			for p in range(i + longest2,i-1,-1):
 637				if p < max_length:
 638					continue
 639				candidates2 = Editor.database.select_words_by_pinyin_list(pinyinlist[i:p+1])
 640				#~ print len(candidates2)
 641				if candidates2:
 642					tmp_phrase2 = candidates2[0]
 643					#~ print "phrase2",tmp_phrase2[PHRASE]
 644					tmp_freq2 = tmp_phrase2[ADJ_FREQ]
 645					#~ print tmp_phrase, " ", candidates2[0][PYSQLiteDB.PHRASE]
 646					new_freq = self.freq_alg(tmp_phrase, tmp_phrase2)
 647					if p > max_length or \
 648						(new_freq >= max_freq and p == max_length):
 649						predict = [tmp_phrase, tmp_phrase2]
 650						#~ print tmp_phrase[PHRASE],tmp_phrase2[PHRASE], tmp_phrase[ADJ_FREQ],tmp_phrase2[ADJ_FREQ]
 651						max_freq = new_freq
 652						max_length = p
 653		#~ print "get_predict" + predict[0], max_length
 654		return predict
 655
 656	def reparse (self, start):
 657		#~ print "reparse"
 658		if start == len (self.wordlist):
 659			return
 660		predict = self.get_predict (self.wordlist[start:])
 661		phrase = predict[0][PHRASE]
 662		length = len (phrase) 
 663		#~ if len(phrase)<len(self.wordlist)-start \
 664					#~ else len(self.wordlist)-start
 665		#~ print string
 666		for i in range(0, length):
 667			if self.wordlist[start+i].manual:
 668				return
 669			self.wordlist[start+i].set_char(phrase[i])
 670		self.reparse (start+length)
 671
 672	def wordlist_manual_select (self, candidate):
 673		phrase = candidate[PHRASE]
 674		for i in range (0, len (phrase) ):
 675			if i < 4:
 676				self.wordlist[ self.cursor + i ].real_pinyin_id = candidate[ i + 1 ]
 677				if self.enable_mohuyin:
 678					self.wordlist[ self.cursor + i ].mohuyin()
 679			else:
 680				py = candidate[YX].split("'")
 681				self.wordlist[ self.cursor + i ].real_pinyin_id = PINYIN_DICT[ py[ i - 4 ] ]
 682				if self.enable_mohuyin:
 683					self.wordlist[ self.cursor + i ].mohuyin()
 684			self.wordlist[ self.cursor + i ].set_char( phrase[i] )
 685			self.wordlist[ self.cursor + i ].manual = True
 686		self.cursor += len (phrase)
 687		if self.cursor < len (self.wordlist):
 688			self.reparse (self.cursor);
 689		self.update ()
 690
 691	def commit (self):
 692		if self.pinyinlist:
 693			self.convert_all ()
 694		string = self.get_preedit ()
 695		self.learn ()
 696		self.clear ()
 697		return string
 698
 699	def del_current (self):
 700		if self.pinyinlist:
 701			raise InputException ()
 702		if self.cursor > 0:
 703			del self.wordlist[self.cursor-1]
 704			self.cursor -= 1
 705			if len (self.wordlist) == 0:
 706				self.clear ()
 707			self.reparse_backtrace ();
 708			self.update()
 709		elif self.wordlist and self.cursor == 0:
 710			raise InputException()
 711		
 712
 713	def del_next (self):
 714		if self.pinyinlist or self.cursor == len (self.wordlist):
 715			raise InputException ()
 716		else:
 717			del self.wordlist[self.cursor]
 718			if len (self.wordlist)==0:
 719				self.clear ()
 720			self.reparse_backtrace ();
 721			self.update()
 722
 723	def move_cursor (self, move):
 724		if self.is_empty():
 725			raise InputException()
 726		if self.pinyinlist and (move<0 or self.candidate_cursor == None):
 727			raise InputException()
 728		if move > 0 and self.candidate_cursor != None:
 729			self.select_cursor()
 730		else:
 731			self.cursor += move
 732			if self.cursor < 0:
 733				self.cursor += len (self.wordlist) + 1
 734			elif self.cursor > len (self.wordlist):
 735				self.cursor = 0
 736			self.update ()
 737
 738	def move_cursor_to (self, pos):
 739		if self.is_empty ():
 740			raise InputException ()
 741		if self.pinyinlist:
 742			self.convert_all ()
 743		if pos == 0:
 744			self.cursor = len(self.wordlist)
 745		elif pos > len(self.wordlist) + 1:
 746			raise InputException ()
 747		else:
 748			self.cursor = pos - 1
 749		self.update ()
 750
 751	def select (self, n):
 752		#~ print self.lookup_table.get_current_page_size()
 753		if n >= self.lookup_table.get_current_page_size():
 754			raise InputException()
 755		candidate = self.candidates[self.lookup_table.get_current_page_start()+n]
 756		if self.pinyinlist:
 757			self.pinyin_select(candidate, True)
 758		else:
 759			self.wordlist_manual_select(candidate)
 760
 761	def select_cursor (self):
 762		candidate = self.candidates[self.lookup_table.get_cursor_pos()]
 763		#~ print candidate[PHRASE]
 764		if self.pinyinlist:
 765			self.pinyin_select(candidate, True)
 766		else:
 767			self.wordlist_manual_select(candidate)
 768
 769	def recursive_mohuyin_pinyinlist(self, pinyinlist):
 770		for i in self.mohuyin(pinyinlist[0].get_screen_pinyin()):
 771			if pinyinlist[1:]:
 772				for p in self.recursive_mohuyin_pinyinlist(pinyinlist[1:]):
 773					yield [PinYinWord(pinyin = i)] + p
 774			else:
 775				yield [PinYinWord(pinyin = i)]
 776
 777	def recursive_mohuyin(self, strl):
 778		for i in self.mohuyin(strl[0]):
 779			if strl[1:]:
 780				for p in self.recursive_mohu(strl[1:]):
 781					yield [i] + p
 782			else:
 783				yield [i]
 784	
 785	def mohuyin(self, pinyin):
 786		#~ print pinyin
 787		if pinyin[0] == "'":
 788			spliter = "'"
 789			pinyin = pinyin[1:]
 790		else:
 791			spliter = ""
 792		if pinyin[:2] in SHENGMU_LIST:
 793			shengmu = pinyin[:2]
 794			yunmu = pinyin[2:]
 795		elif pinyin[:1] in SHENGMU_LIST:
 796			shengmu = pinyin[:1]
 797			yunmu = pinyin[1:]
 798		else:
 799			shengmu = ""
 800			yunmu = pinyin
 801		if shengmu in self.shengmu_mohu:
 802			shengmu = self.shengmu_mohu[shengmu]
 803		else:
 804			shengmu = [shengmu]
 805		if yunmu in self.yunmu_mohu:
 806			yunmu = self.yunmu_mohu[yunmu]
 807		else:
 808			yunmu = [yunmu]
 809		if pinyin in PINYIN_PARTIAL_LIST:
 810			for q in yunmu:
 811				if i + q in SHENGMU_LIST or i + q in PINYIN_LIST or i + q in PINYIN_PARTIAL_LIST:
 812					yield spliter + i + q
 813		else:
 814			for i in shengmu:
 815				for q in yunmu:
 816					if i + q in SHENGMU_LIST or i + q in PINYIN_LIST:
 817						yield spliter + i + q
 818	def parsr_mohuyin(self, pinyinlist):
 819		candidates = []
 820		if self.enable_mohuyin:
 821			ss = Set()
 822			for p in self.recursive_mohuyin_pinyinlist(pinyinlist):
 823				#~ print u" ".join( i.get_pinyin() for i in p) 
 824				for i in range (len (p), 0, -1):
 825					ss.update(Editor.database.select_words_by_pinyin_list (p[:i]))
 826			candidates = list(ss)
 827			candidates.sort(cmp = candidate_sort)
 828		else:
 829			for i in range (len (pinyinlist), 0, -1):
 830				candidates += Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
 831		return candidates
 832				
 833	def update_candidates (self):
 834		if self.is_empty():
 835			self.candidates = []
 836		elif self.pinyinlist:
 837			self.candidates = self.parsr_mohuyin(self.pinyinlist)
 838		elif len(self.wordlist)>self.cursor:
 839			self.candidates = self.parsr_mohuyin(self.wordlist[self.cursor:])
 840		else:
 841			self.candidates = []
 842		self.update_lookup_table()
 843
 844	def update_lookup_table (self):
 845		self.lookup_table.clear()
 846		self.lookup_table.show_cursor(False)
 847		for c in self.candidates:
 848			if  c[ADJ_FREQ] == 0 or c[ADJ_FREQ]%USER_PHRASE and c[ADJ_FREQ]%USER_WORD:
 849				self.lookup_table.append_candidate (c[PHRASE])
 850			else:
 851				attrs = [scim.Attribute (0, len(c[PHRASE]), scim.ATTR_FOREGROUND, RGB (0, 0, 0xef))]
 852				self.lookup_table.append_candidate (c[PHRASE], attrs)
 853
 854class Engine (IMEngine):
 855	def __init__ (self, factory, config, encoding, id):
 856		IMEngine.__init__ (self, factory, config, encoding, id)
 857		self._editor = Editor ()
 858		self._lookup_table = scim.LookupTable (9)
 859		self._status_property = Property ("chinese", "CN")
 860		self._setup_property = Property ("setup", "", "/usr/share/scim/icons/setup.png")
 861		self._chinese_mode = True
 862		self.reload_config(config)
 863		self._log = scim.Log.Log ("ZhengJu")
 864		#~ print "init"
 865	def clear(self):
 866		pass
 867	def reset(self):
 868		#~ print "reset"
 869		if self._editor.wordlist:
 870			self.commit_string (self._editor.commit())
 871		else:
 872			self._editor.clear()
 873		self.clear()
 874		self.origin_string = None
 875		self._double_quotation_state = False
 876		self._single_quotation_state = False
 877		self._prev_key = None
 878		self._shift_key = None	
 879		self.pipe = None	
 880		self.update ()	
 881		props = [self._status_property, self._setup_property]
 882		self.register_properties (props)
 883		self.update_properties ()
 884	def update_preedit (self):
 885		string = self._editor.get_preedit () + self.get_extra_string()
 886		if (string == u""):
 887			self.hide_preedit_string ()
 888		else:
 889			self.show_preedit_string ()
 890			self.update_preedit_string (string , [])
 891			self.update_preedit_caret (self._editor.get_screen_cursor())
 892
 893	def focus_out(self):
 894		#~ print "out reset"
 895		self.reset()
 896		IMEngine.focus_out (self)
 897		
 898	def focus_in (self):
 899		#~ print "in reset"
 900		self.reset()
 901		IMEngine.focus_in (self)
 902	
 903	def trigger_property (self, property):
 904		if property == "chinese":
 905			self.change_mode ()
 906		elif property == "setup":
 907			self.start_helper ("61af6de6-c29d-421e-9e1b-e34a29c68c76")
 908
 909	def update_candidate (self):
 910		if self._editor.candidates:
 911			self.update_lookup_table(self._editor.lookup_table)
 912			self.show_lookup_table()
 913		else:
 914			self.hide_lookup_table ()
 915
 916	def update_aux(self):
 917		if self._editor.predict:
 918			self.show_aux_string ()
 919			string = self._editor.get_aux ()
 920			attrs = [scim.Attribute (0, len (string), scim.ATTR_FOREGROUND, RGB (0, 0, 0xef))]
 921			self.update_aux_string (string, attrs)
 922		else:
 923			self.hide_aux_string ()
 924			self.update_aux_string (u"")
 925
 926	def update (self):
 927		self.update_preedit ()
 928		self.update_aux ()
 929		self.update_candidate ()
 930
 931	def update_properties (self):
 932		if self._chinese_mode: # refresh mode
 933			self._status_property.label = _("CN")
 934		else:
 935			self._status_property.label = _("EN")
 936		self.update_property(self._status_property)
 937
 938	def change_mode(self):
 939		if self._chinese_mode:
 940			self.commit_string (self._editor.commit())
 941			self.update()
 942		self._chinese_mode = not self._chinese_mode
 943		self.update_properties ()
 944		#~ print "change_mode", self._chinese_mode
 945	def reload_config (self, config):
 946		self._editor.load_config(config)
 947		self.progresivepromp = config.read ("/IMEngine/Python/ZhengJu/ProgressivePrompt", False)
 948
 949	def lookup_table_page_down (self):
 950		self._editor.lookup_table.page_down ();
 951		self.update()
 952		return True
 953
 954	def lookup_table_page_up (self):
 955		self._editor.lookup_table.page_up ();
 956		self.update()
 957		return True
 958		
 959	def process_key_event (self, key):
 960		#~ print key.code
 961		if self._chinese_mode and self._editor.is_end() and not self.get_extra_string():
 962			if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R)	\
 963				and key.mask & KeyMask.ReleaseMask:
 964				if self._shift_key:
 965					self.change_mode()
 966					return True
 967			if key.mask == KeyMask.ShiftMask and (key.code >= KeyCode.KEY_A and key.code <= KeyCode.KEY_Z) \
 968				or key.mask & KeyMask.CapsLockMask:
 969				self.change_mode()
 970		elif not self.get_extra_string():
 971			if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R)\
 972				and key.mask & KeyMask.ReleaseMask:
 973				if self._shift_key:
 974					self.change_mode()
 975					return True
 976		if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R)	\
 977			and key.mask == KeyMask.NullMask:
 978			self._shift_key = True;
 979		else:
 980			self._shift_key = None;
 981		if self.pipe and self.pipe.poll() != -1:
 982			try: 
 983				self.origin_string = unicode(self.pipe.fromchild.read()[:-1],"utf8")
 984				self._editor.reverse(self.origin_string)
 985				self._editor.move_cursor_to (1)
 986			except:
 987				self._editor.clear()
 988				self.beep ()
 989			else:
 990				self.commit_string(u"")
 991				self.update()			
 992			finally:
 993				self.pipe = None
 994				return True
 995		if key.mask & KeyMask.ReleaseMask:
 996			return False
 997		try:
 998			if self._chinese_mode:
 999				result = self.chinese_process_key_event (key)
1000			else:
1001				result = self.english_process_key_event (key)
1002			self._prev_key = key
1003		except InputException, e:
1004			self.beep ()
1005			return True
1006		except Exception, e:
1007			self.beep ()
1008			self._log.print_exc()
1009			self._log.log("DEBUG", self._editor.cursor)
1010			self._log.log("DEBUG", [i.char.encode("utf-8") for i in self._editor.wordlist] )
1011			self._log.log("DEBUG", [i.get_screen_pinyin() for i in self._editor.pinyinlist] )
1012			self._log.log("DEBUG", self._editor.get_preedit().encode ("utf-8"))
1013			return True
1014		else:
1015			return result	
1016		finally:
1017			self.update()
1018
1019	def english_process_key_event (self, key):
1020		return False
1021
1022	def chinese_process_key_event (self, key):
1023		if self._editor.is_empty() and not self.get_extra_string():
1024			if key.code <= 127 and ascii.ispunct (chr (key.code)):
1025				self.commit_string (self.convert_to_full_width (unichr (key.code)))
1026				return True
1027			elif key.code == KeyCode.KEY_r and key.mask == KeyMask.ControlMask:
1028				if not self.pipe:
1029					self.pipe = popen2.Popen3("python -c" +'"import gtk; print gtk.clipboard_get(selection=\\"PRIMARY\\").wait_for_text()"')
1030				return True
1031			else:
1032				return False
1033		#~ print unichr (key.code)
1034		if key.code in (KeyCode.KEY_Control_L,KeyCode.KEY_Control_R,
1035			KeyCode.KEY_Alt_L, KeyCode.KEY_Alt_R):
1036			return True
1037		elif key.code in (KeyCode.KEY_KP_Space, KeyCode.KEY_space):
1038			#~ print self._editor.get_candidate_cursor()
1039			if self._editor.candidates and self._editor.lookup_table.is_cursor_visible():
1040				self._editor.select_cursor()
1041				return True
1042			elif self._editor.pinyinlist:
1043				self._editor.convert_all ()
1044				return True
1045			elif self._editor.cursor < len (self._editor.wordlist):
1046				self._editor.jump_to_next_word()
1047				return True
1048			else:
1049				self.commit_string (self._editor.commit())
1050				return True
1051		elif key.code == KeyCode.KEY_BackSpace:
1052			if not self._editor.pinyinlist and self.get_extra_string():
1053				raise InputException()
1054			self._editor.del_current()
1055			return True
1056		elif key.code == KeyCode.KEY_Delete:
1057			if self._editor.lookup_table.is_cursor_visible():
1058				self._editor.delete_cursor_phrase ()
1059			else:
1060				self._editor.del_next ()
1061			return True
1062		elif key.code >= KeyCode.KEY_0 and key.code <= KeyCode.KEY_9 and key.mask & KeyMask.ControlMask:
1063			self._editor.delete_phrase (key.code - KeyCode.KEY_1)
1064			return True
1065		elif key.code >= KeyCode.KEY_0 and key.code <= KeyCode.KEY_9 and key.mask & KeyMask.AltMask:
1066			self._editor.move_cursor_to (key.code - KeyCode.KEY_0)
1067			return True
1068		elif key.code >= KeyCode.KEY_1 and key.code <= KeyCode.KEY_9:
1069			self._editor.select (key.code-KeyCode.KEY_1)
1070			return True
1071		elif key.code >= KeyCode.KEY_KP_1 and key.code <= KeyCode.KEY_KP_9:
1072			self._editor.select (key.code-KeyCode.KEY_KP_1)
1073			return True
1074		elif key.code == KeyCode.KEY_Shift_L:
1075			if not self._editor.is_end():
1076				self._editor.select (0)
1077				self._shift_key = None
1078			return True
1079		elif key.code == KeyCode.KEY_Shift_R:
1080			if not self._editor.is_end():
1081				self._editor.select (1)
1082				self._shift_key = None
1083			return True
1084		elif key.code in (KeyCode.KEY_equal, KeyCode.KEY_bracketright, KeyCode.KEY_Page_Down):
1085			if self._editor.candidates:
1086				self._editor.lookup_table.page_down ();
1087				return True
1088			else:
1089				raise InputException()				
1090		elif key.code in (KeyCode.KEY_minus, KeyCode.KEY_bracketleft, KeyCode.KEY_Page_Up):
1091			if self._editor.candidates:
1092				self._editor.lookup_table.page_up ();
1093				return True
1094			else:
1095				raise InputException()
1096		elif key.code==KeyCode.KEY_Up:
1097			if self._editor.candidates:
1098				self._editor.lookup_table.cursor_up()
1099				self._editor.lookup_table.show_cursor(True)
1100				return True
1101			else:
1102				raise InputException()
1103		elif key.code==KeyCode.KEY_Down:
1104			if self._editor.candidates:
1105				self._editor.lookup_table.cursor_down()
1106				self._editor.lookup_table.show_cursor(True)
1107				return True
1108			else:
1109				raise InputException()
1110		elif key.code == KeyCode.KEY_Left or key.code == KeyCode.KEY_b and key.mask & KeyMask.ControlMask:
1111			self._editor.move_cursor (-1)
1112			return True
1113		elif key.code == KeyCode.KEY_Right or key.code == KeyCode.KEY_f and key.mask & KeyMask.ControlMask:
1114			if self.get_extra_string():
1115				raise InputException()
1116			self._editor.move_cursor (1)
1117			return True
1118		elif key.code == KeyCode.KEY_h and key.mask & KeyMask.ControlMask or key.code == KeyCode.KEY_Home:
1119			if self.get_extra_string():
1120				raise InputException()
1121			self._editor.move_cursor_to (1)
1122			return True
1123		elif key.code == KeyCode.KEY_e and key.mask & KeyMask.ControlMask or key.code == KeyCode.KEY_End:
1124			if self.get_extra_string():
1125				raise InputException()
1126			self._editor.move_cursor_to (0)
1127			return True
1128		elif key.code in (KeyCode.KEY_Return, KeyCode.KEY_KP_Enter):
1129			self.commit_string (self._editor.commit() + self.get_extra_string())
1130			self.clear()
1131			return True
1132		elif key.code == KeyCode.KEY_Escape or key.code == KeyCode.KEY_c and key.mask & KeyMask.ControlMask:
1133			if self.origin_string:
1134				self.commit_string(self.origin_string)
1135				self._editor.clear()
1136				self.origin_string = None
1137			elif self._editor.lookup_table.is_cursor_visible():
1138				self._editor.lookup_table.show_cursor(False)
1139				self._editor.update()
1140			else:
1141				self.clear()
1142				self._editor.clear()
1143			return True
1144		elif key.code <= 127 and ascii.ispunct (chr (key.code)) and not self.get_extra_string():
1145			if not self._editor.is_empty ():
1146				self.commit_string (self._editor.commit ())
1147			self.commit_string (self.convert_to_full_width (unichr (key.code)))
1148			return True
1149		else:
1150			raise InputException ()
1151
1152	def convert_to_full_width (self, c):
1153		if c == u".":
1154			if self._prev_key and self._prev_key.code >= KeyCode.KEY_0 and self._prev_key.code <= KeyCode.KEY_9:
1155				return u"."
1156			else:
1157				return u"\u3002"
1158		elif c == u"\\":
1159			return u"\u3001"
1160		elif c == u"^":
1161			return u"\u2026\u2026"
1162		elif c == u"_":
1163			return u"\u2014\u2014"
1164		elif c == u"$":
1165			return u"\uffe5"
1166		elif c == u"\"":
1167			self._double_quotation_state = not self._double_quotation_state
1168			if self._double_quotation_state:
1169				return u"\u201c"
1170			else:
1171				return u"\u201d"
1172		elif c == u"'":
1173			self._single_quotation_state = not self._single_quotation_state
1174			if self._single_quotation_state:
1175				return u"\u2018"
1176			else:
1177				return u"\u2019"
1178		elif c == u"<":
1179			return u"\u300a"
1180		elif c == u">":
1181			return u"\u300b"
1182		return scim.unichar_half_to_full (c)
1183
1184class ZhengJuFactory (IMEngineFactory):
1185	def __init__ (self, config):
1186		IMEngineFactory.__init__ (self, config)
1187		self.name 		= _(u"ZhengJu")
1188		self.uuid 		= "59e29ad8-3c95-4cd0-b02f-e21bf1317f7a"
1189		self.authors	= u"Yu Fan <yufanyufan@gmail.com>"
1190		self.icon_file 	= "/usr/share/scim/icons/scim-python.png"
1191		self.credits 	= u"GPL"
1192		self.help		= _(u"Help For ZhengJu")
1193		self.set_languages ("zh")
1194		self._config	= config
1195
1196	def create_instance (self, encoding, id):
1197		pinyin = self._config.read ("/IMEngine/Python/ZhengJu/PinYinSchema", "JianPin")
1198		if pinyin == "JianPin":
1199			import JianPin
1200			engine = JianPin.JianPinEngine (self, self._config, encoding, id)
1201		elif pinyin == "QuanPin":
1202			import QuanPin
1203			engine = QuanPin.QuanPinEngine (self, self._config, encoding, id)
1204		elif pinyin == "ShuangPin":
1205			import ShuangPin
1206			engine = ShuangPin.ShuangPinEngine (self, self._config, encoding, id)
1207		else:
1208			import JianPin
1209			engine = JianPin.JianPinEngine (self, self._config, encoding, id)			
1210		return engine
1211
1212	def reload_config (self, config):
1213		self._config	= config
1214		
1215class PseudoConfig:
1216	def read(self, string, default):
1217		return default;
1218
1219def train(file_name):
1220	print "Training by " + file_name
1221	editor = Editor()
1222	import re
1223	ex = re.compile(ur"[\da-zA-Z\W]",re.UNICODE)
1224	for l in file(file_name):
1225		ll = unicode(l,"utf8")[:-1]
1226		t = ex.split(ll)
1227		for i in t:
1228			if i:
1229				try:
1230					editor.reverse(i)
1231					#~ print i
1232					editor.learn()
1233				except:
1234					print file
1235					traceback.print_exc ()
1236					raise Exception()				
1237
1238def print_usage():
1239	print "ZhengJu -f FILE\tRead Sentenc from file"
1240	print "ZhengJu \tConvert parameter to pinyin"
1241
1242if __name__ == "__main__":
1243	editor = Editor()
1244	import sys
1245	try:
1246		if len(sys.argv) == 3:
1247			if sys.argv[1] == "-f":
1248				train(sys.argv[2])
1249			else:
1250				raise Exception()
1251		elif len(sys.argv) == 2:
1252			try:
1253				editor.reverse(unicode(sys.argv[1],"utf8"))
1254				for i in editor.wordlist:
1255					print ID_PINYIN_DICT[i.get_pinyin_id()],
1256					print i.char,
1257			except:
1258				print "Can't convert this to pinyin"
1259				raise Exception()
1260		else:
1261			raise Exception()
1262	except:
1263		traceback.print_exc ()
1264		print_usage()
1265		sys.exit(1)
1266	else:
1267		sys.exit(0)
1268