ZhengJu.py - This Python script is a Chinese input method e…

/python/engine/PinYin/ZhengJu.py

http://scim-python.googlecode.com/ · Python · 1268 lines · 1097 code · 83 blank · 88 comment · 403 complexity · 46afeedb53a388189acf7cb3c399f7df MD5 · raw file

# -*- coding: utf-8 -*-
# vim: set noet ts=4:
#
# scim-python
#
# Copyright (c) 2007-2008 Yu Fan <yufanyufan@gmail.com>
#
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this program; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330,
# Boston, MA  02111-1307  USA
#
# $Id: $
#
import scim
import scim.Log
import os
from scim import KeyCode
from scim import KeyMask
from scim import Property
import traceback
from PYDict import *
from gettext import dgettext
from ZhengJuDB import *
import scim.ascii as ascii
from sets import Set
import popen2

_ = lambda a : dgettext ("scim-python", a)
RGB = lambda r, g, b : (((r & 0xff) << 16) | ((g & 0xff) << 8) | (b & 0xff))
IMEngine = scim.IMEngine
IMEngineFactory = scim.IMEngineFactory

(YLEN, Y0, Y1, Y2, Y3, YX, PHRASE, ADJ_FREQ) = range (0, 8)
candidate_sort = lambda x,y: cmp(y[YLEN],x[YLEN]) if x[YLEN] != y[YLEN] else cmp(y[ADJ_FREQ],x[ADJ_FREQ])

class InputException(Exception):
	def __init__ (self):
		Exception.__init__(self)

class PinYinWord:
	def __init__ (self, shengmu = "", yunmu = "", pinyin = ""):
		self.char = ""
		self._pinyin_id = None
		self.manual = None
		self.char = ""
		self.spliter = ""
		if pinyin:
			self.set_pinyin(pinyin)
		else:
			self.set_pinyin(shengmu + yunmu)
	def set_pinyin(self,pinyin):
		if pinyin[0] == "'":
			self.spliter = "'"
			pinyin = pinyin[1:]
		if pinyin[:2] in SHENGMU_LIST:
			self.shengmu = pinyin[:2]
			self.yunmu = pinyin[2:]
		elif pinyin[:1] in SHENGMU_LIST:
			self.shengmu = pinyin[:1]
			self.yunmu = pinyin[1:]
		else:
			self.shengmu = ""
			self.yunmu = pinyin
		if self.get_pinyin() in PINYIN_LIST:
			self._pinyin_id = PINYIN_DICT [self.get_pinyin()]
			self._sheng_mu_id = SHENGMU_DICT [self.get_shengmu()]
		else:
			self._sheng_mu_id = SHENGMU_DICT [self.get_shengmu()]
	def mohuyin(self):
		pinyin = ID_PINYIN_DICT[self.real_pinyin_id]
		if pinyin[:2] in SHENGMU_LIST:
			self.shengmu = pinyin[:2]
			yunmu = pinyin[2:]
		elif pinyin[:1] in SHENGMU_LIST:
			self.shengmu = pinyin[:1]
			yunmu = pinyin[1:]
		else:
			self.shengmu = ""
			yunmu = pinyin
		if self.yunmu != "":
			 self.yunmu = yunmu
		self.set_pinyin(self.get_pinyin())
	def get_sheng_mu_id (self):
		return self._sheng_mu_id

	def get_pinyin_id (self):
		return self._pinyin_id

	def set_pinyin_id (self, id):
		self.set_pinyin(ID_PINYIN_DICT[id])
		
	def get_shengmu (self):
		return self.shengmu
	
	def set_yunmu( self,yunmu):
		self.yunmu = yunmu
		if(yunmu != ""):
			self._pinyin_id = PINYIN_DICT [ self.get_pinyin() ]
		else:
			self._pinyin_id = None

	def set_char (self,char):
		self.char = char

	def get_pinyin (self):
		return self.shengmu + self.yunmu

	def get_screen_pinyin (self):
		return self.spliter + self.shengmu + self.yunmu

	def __str__ (self):
		return self.get_pinyin()

	def is_complete (self):
		return self._pinyin_id != None

		
class Editor:
	database = None
	def __init__ (self, config = None):
		if config == None:
			config = PseudoConfig()
		if Editor.database == None:
			Editor.database = ZhengJuDB(config)
		self.lookup_table = scim.LookupTable (9)
		self.lookup_table.fix_page_size(True)
		self.clear()
		self.config = config
		self.load_config(config)

	def clear(self):
		self.cursor = 0
		self.wordlist = []
		self.pinyinlist = []
		self.candidates = []
		self.predict = []
		self.lookup_table.clear()
		self.lookup_table.show_cursor(False)
		Editor.database.clear_cache()
	def load_config(self, config):
		Editor.database.load_config(config)
		self.userword = config.read ("/IMEngine/Python/ZhengJu/CreateUserWords", True)
		self.userphrase = config.read ("/IMEngine/Python/ZhengJu/CreateUserPhrases", True)
		self.adjustfreq = config.read ("/IMEngine/Python/ZhengJu/AdjustWordFreq", True)
		self.logconverror = config.read ("/IMEngine/Python/ZhengJu/LogConvError", True)
		self.splitpinyin = config.read ("/IMEngine/Python/ZhengJu/SplitPinyin", True)
		self.enable_mohuyin = config.read ("/IMEngine/Python/ZhengJu/FuzzyPinyin", False)
		self.mohuyin_s_sh = config.read ("/IMEngine/Python/ZhengJu/FuzzyS_Sh", True)
		self.mohuyin_c_ch = config.read ("/IMEngine/Python/ZhengJu/FuzzyC_Ch", True)
		self.mohuyin_z_zh = config.read ("/IMEngine/Python/ZhengJu/FuzzyZ_Zh", True)
		self.mohuyin_l_n = config.read ("/IMEngine/Python/ZhengJu/FuzzyL_N", True)
		self.mohuyin_in_ing = config.read ("/IMEngine/Python/ZhengJu/FuzzyIn_Ing", True)
		self.mohuyin_en_eng = config.read ("/IMEngine/Python/ZhengJu/FuzzyEn_Eng", True)
		self.mohuyin_an_ang = config.read ("/IMEngine/Python/ZhengJu/FuzzyAn_Ang", True)
		self.build_mohuyin()
	def build_mohuyin(self):
		self.shengmu_mohu = {}
		if self.mohuyin_s_sh:
			self.shengmu_mohu["s"]= MOHU_SHENGMU["s"]
			self.shengmu_mohu["sh"]= MOHU_SHENGMU["sh"]
		if self.mohuyin_z_zh:
			self.shengmu_mohu["z"]= MOHU_SHENGMU["z"]
			self.shengmu_mohu["zh"]= MOHU_SHENGMU["zh"]
		if self.mohuyin_c_ch:
			self.shengmu_mohu["c"]= MOHU_SHENGMU["c"]
			self.shengmu_mohu["ch"]= MOHU_SHENGMU["ch"]
		if self.mohuyin_l_n:
			self.shengmu_mohu["l"]= MOHU_SHENGMU["l"]
			self.shengmu_mohu["n"]= MOHU_SHENGMU["n"]
		self.yunmu_mohu = {}
		if self.mohuyin_an_ang:
			self.yunmu_mohu["an"] = MOHU_YUNMU["an"]
			self.yunmu_mohu["ang"] = MOHU_YUNMU["ang"]
		if self.mohuyin_en_eng:
			self.yunmu_mohu["en"] = MOHU_YUNMU["en"]
			self.yunmu_mohu["eng"] = MOHU_YUNMU["eng"]
		if self.mohuyin_in_ing:
			self.yunmu_mohu["in"] = MOHU_YUNMU["in"]
			self.yunmu_mohu["in"] = MOHU_YUNMU["ing"]
	def current (self):
		if self.pinyinlist:
			return self.pinyinlist[-1]
		else:
			return None

	def is_empty (self):
		return (not self.pinyinlist) and (not self.wordlist)

	def is_end (self):
		return self.is_empty() or (not self.pinyinlist) and self.cursor == len (self.wordlist)

	def get_aux (self):
		return "".join ( u[PHRASE] for u in self.predict)

	def get_screen_pinyin(self):
		if self.splitpinyin:
			s = ""
			if self.pinyinlist:
				for i in range(len(self.pinyinlist)-1):
					p = self.pinyinlist[i].get_screen_pinyin() + self.pinyinlist[i+1].get_screen_pinyin()[0]
					if p in PINYIN_LIST or p in PINYIN_PARTIAL_LIST:
						s += self.pinyinlist[i].get_screen_pinyin() + "'"
					else:
						s += self.pinyinlist[i].get_screen_pinyin()
				s += self.pinyinlist[-1].get_screen_pinyin()
			return s
		else:
			return u"".join( i.get_screen_pinyin() for i in self.pinyinlist)

	def get_preedit (self):			
		return u"".join( [i.char for i in self.wordlist[0:self.cursor] ] ) +\
			self.get_screen_pinyin() + \
			u"".join ( [i.char for i in self.wordlist[self.cursor:]] )

	def get_screen_cursor (self):
		if len(self.get_screen_pinyin())>0:
			return self.cursor + len(self.get_screen_pinyin())
		else:
			return self.cursor
	
	def pinyin_select (self, candidate, manual = False):
		phrase = candidate[PHRASE]
		length = len(phrase)
		for i in range(0,length):
			self.pinyinlist[i].set_char(phrase[i])
			if i<4:
				#~ self.pinyinlist[i].set_pinyin_id (candidate[i+1])
				self.pinyinlist[i].real_pinyin_id = candidate[i+1]
				if self.enable_mohuyin:
					self.pinyinlist[i].mohuyin()
			else:
				py = candidate[YX].split("'")
				self.pinyinlist[i].real_pinyin_id = PINYIN_DICT[py[i-4]]
				if self.enable_mohuyin:
					self.pinyinlist[i].mohuyin()
				#~ self.pinyinlist[i].set_pinyin ([py[i-4]])
			self.pinyinlist[i].manual = manual
		self.wordlist[self.cursor:self.cursor] = self.pinyinlist[:length]
		del self.pinyinlist[:length]
		self.cursor += length
		if manual:
			self.update()
	def reparse_backtrace(self):
		if self.cursor < len(self.wordlist):
			i = self.cursor
			while i >= 0:
				if self.wordlist[i].manual:
					break
				i-=1
			i += 1
			self.reparse(i)
		
	def convert_all (self):
		predicts = self.predict
		for i in predicts:
			self.pinyin_select(i)		
		self.reparse_backtrace()
		self.update ()

	def jump_to_next_word(self):
		string = self.get_preedit ()
		phrase_list = self.split_phrase (string)
		p = 0;
		for i in phrase_list:
			if p <= self.cursor:
				p += i[1]
			else:
				break
		self.cursor = p
		self.update ()
		#~ predict = self.get_predict (self.wordlist[self.cursor:])
		#~ self.cursor += predict[0][YLEN]
		#~ self.update ()

	def predict_len(self, predicts):
		return sum (u[YLEN] for u in predicts)

	def auto_convert (self):
		self.update_predict()
		while self.predict_len(self.predict[:2]) < len (self.pinyinlist):
			self.pinyin_select(self.predict[0])
			self.update_predict()
		self.update_candidates()

	def auto_convert_quanpin(self):
		#~ self.update_predict()
		p = self.pinyinlist[-1].get_pinyin()
		if p not in SHENGMU_LIST and p not in PINYIN_PARTIAL_LIST:
			for t in PINYIN_LIST:
				if p != t and t[:len(p)] == p :
					while self.predict_len(self.predict[:2]) + 1 < len (self.pinyinlist):
						self.pinyin_select(self.predict[0])
						self.update_predict()
					self.update_candidates()
					return
		while self.predict_len(self.predict[:2]) < len (self.pinyinlist):
			self.pinyin_select(self.predict[0])	
			self.update_predict()
		self.update_candidates()

	def update (self):
		self.candidate_cursor = None
		self.update_predict()
		self.update_candidates()

	def update_predict (self):
		if self.pinyinlist:
			self.predict = self.get_predict_pinyinlist (self.pinyinlist) 
		else:
			self.predict = []

	def reverse(self, phrase):
		self.clear()
		while phrase:
			for i in range(len(phrase),0,-1):
				#~ print len(phrase),phrase[:i], i
				temp = self.database.select_phrase(phrase[:i])
				if temp:
					result = temp[0]
					break
			else:
				raise InputException()
			for i in range(result[YLEN]):
				pinyin = PinYinWord("'","")
				pinyin.set_char(phrase[i])
				if i < 4:
					pinyin.set_pinyin_id (result[i+1])
				else:
					#print candidate[YX]
					py = result[YX].split("'")
					#print py[i-5]
					pinyin.set_pinyin_id (PINYIN_DICT[py[i-4]])
					#print self.pinyinlist[i]._pinyin_id
				pinyin.real_pinyin_id=pinyin._pinyin_id
				self.wordlist.append(pinyin)
			phrase = phrase[result[YLEN]:]
	def split_phrase (self, string):
		start = 0
		phrase_list = []
		while start < len(self.wordlist)-1:
			candidate = None
			if len(self.wordlist) - start >= 3:
				phrase = Editor.database.select_words_by_pinyin_list_all (self.wordlist[start:start + 3])
				#~ print len(phrase)
				for i in phrase:
					if i[PHRASE] == string[start:start + len(i[PHRASE]) ]:
						if not candidate or candidate[PHRASE] < i[PHRASE]:
							candidate = i
				if candidate == None:
					phrase = Editor.database.select_words_by_phrase (self.wordlist[start:start+2])
					if phrase:
						candidate = phrase[0]			
			else:
				phrase = Editor.database.select_words_by_phrase (self.wordlist[start:start+2])
				if phrase:
					candidate = phrase[0]
			if candidate == None:
				phrase_list.append ( (start, 1, string[start]))
				start += 1
			else:
				phrase_list.append ( (start, len(candidate[PHRASE]), candidate[PHRASE]))
				start += len(candidate[PHRASE])
		if start < len (self.wordlist):
			phrase_list.append ((start,1,string[-1]))
		return phrase_list
	def split_phrasev2 (self, string):
		start = 0
		phrase_list = []
		while start < len(self.wordlist)-1:
			phrase = Editor.database.select_words_by_pinyin_list_all (self.wordlist[start:start+2])
			candidate = None
			for i in phrase:
				if i[PHRASE] == string[start:start + len(i[PHRASE]) ]:
					if not candidate or candidate[PHRASE] < i[PHRASE]:
						candidate = i
			if candidate == None:
				phrase_list.append ( (start, 1, string[start]))
				start += 1
			else:
				phrase_list.append ( (start, len(candidate[PHRASE]), candidate[PHRASE]))
				start += len(candidate[PHRASE])
		if start < len (self.wordlist):
			phrase_list.append ((start,1,string[-1]))
		return phrase_list

	def learn_user_words(self, phrase_list, string, sentence):
		if not self.userword:
			return
		start = 0
		while start < len (phrase_list):
			tmp_phrase_start = phrase_list[start][0]
			tmp_phrase = ""
			while start < len (phrase_list) and phrase_list[start][1] == 1 \
				and string[phrase_list[start][0]] != sentence[phrase_list[start][0]]:
				tmp_phrase += phrase_list[start][2]
				del phrase_list[start]			
			if tmp_phrase:
				phrase_list.insert (start, (tmp_phrase_start, len(tmp_phrase), tmp_phrase) )
			if len (tmp_phrase) > 1:
				Editor.database.add_phrase (self.wordlist[tmp_phrase_start:tmp_phrase_start + len(tmp_phrase)], USER_WORD) 	
				self.log_conv_error( sentence, string, phrase_list, tmp_phrase_start, tmp_phrase_start, 0)
				string = string[:tmp_phrase_start] + sentence[tmp_phrase_start:tmp_phrase_start + len(tmp_phrase)] + string[tmp_phrase_start + len(tmp_phrase):]
			start += 1
		return string
		

	def split_predict (self):
		predict = []
		start = 0
		while start < len (self.wordlist):
			p = self.get_predict (self.wordlist[start:])
			predict.append ( (start,len(p[0][PHRASE]), p[0][PHRASE]) )
			start += len (p[0][PHRASE])
		return predict

	def addphrase (self, phrase_list, pstart, pend, freq):
		if pstart < 0:
			return
		if pend >= len (phrase_list):
			return
		Editor.database.add_phrase(\
			self.wordlist[phrase_list[pstart][0]:(phrase_list[pend][0]+phrase_list[pend][1])], freq)

	def adjust_all_freq (self, phrase_list):
		if not self.userphrase:
			return
		p = [ self.wordlist[i[0]:i[0]+i[1]] for i in phrase_list]
		for i in p:
			Editor.database.adjust_phrase_freq (i)

	def adjust_freq (self, phrase_list, phrase_begin):
		if not self.adjustfreq:
			return
		i = phrase_list[phrase_begin]
		p = self.wordlist[i[0]:i[0]+i[1]]
		Editor.database.adjust_phrase_freq (p)
				
	def delete_phrase(self, n):
		if n >= self.lookup_table.get_current_page_size():
			raise InputException()
		candidate = self.candidates[self.lookup_table.get_current_page_start() + n]
		if candidate[ADJ_FREQ] == 0 or \
			candidate[ADJ_FREQ]%USER_PHRASE and candidate[ADJ_FREQ]%USER_WORD:
			raise InputException()
		Editor.database.remove_phrase (candidate)
		self.update ()
	
	def delete_cursor_phrase(self):
		candidate = self.candidates[self.lookup_table.get_cursor_pos() ]
		if candidate[ADJ_FREQ] == 0 or \
			candidate[ADJ_FREQ]%USER_PHRASE and candidate[ADJ_FREQ]%USER_WORD:
			self.candidate_cursor = None
			raise InputException()
		Editor.database.remove_phrase (candidate)
		self.update ()

	def log_conv_error(self, predict, sentence, phrase_list, pstart, pend, type):
		if pstart < 0:
			return
		if pend >= len (phrase_list):
			return
		if self.logconverror:
			begin = phrase_list[pstart][0]
			end = phrase_list[pend][0]+phrase_list[pend][1]
			p = open(os.path.expanduser ("~/.scim/zhengju-conv-error.log"),'a')
			print >> p, sentence[begin:end].encode ("utf-8"), predict[begin:end].encode ("utf-8"), type
			p.close ()
			
	def learn (self):
		if not self.userword and not self.userphrase and not self.adjustfreq and not self.logconverror:
			return
		predict = self.split_predict ()
		sentence = u"".join ([ i[2] for i in predict])
		for i in self.wordlist:
			i._pinyin_id = i.real_pinyin_id
		string = self.get_preedit ()
		phrase_list = self.split_phrase (string)
		string = self.learn_user_words(phrase_list, string, sentence)
		#~ print "out"
		#~ for i in phrase_list:
			#~ print i[1],i[2]
		#~ for i in predict:
			#~ print i[1],i[2]
		if not self.userphrase and not self.adjustfreq:
			return
		cur_phrase = 0
		cur_predict = 0
		phrase_begin = 0
		predict_begin = 0
		while cur_phrase < len(phrase_list):
			while predict[cur_predict][0]+ predict[cur_predict][1] < phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]:
				cur_predict += 1
			if predict[cur_predict][0]+ predict[cur_predict][1] > phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]:
				cur_phrase += 1
			else:
				#~ print string[phrase_list[phrase_begin][0]:phrase_list[cur_phrase][0]+phrase_list[cur_phrase][1]]
				#~ print sentence[predict[predict_begin][0]:predict[cur_predict][0]+predict[cur_predict][1]]
				if string[phrase_list[phrase_begin][0]:phrase_list[cur_phrase][0] + phrase_list[cur_phrase][1]]!=\
					sentence[predict[predict_begin][0]:predict[cur_predict][0] + predict[cur_predict][1]]:
					if cur_phrase - phrase_begin == 0:
						if cur_predict - predict_begin == 0:
							self.addphrase(phrase_list, phrase_begin - 1, cur_phrase, USER_PHRASE)
							self.log_conv_error(sentence, string, phrase_list, phrase_begin - 1, cur_phrase, 1)
							self.addphrase(phrase_list, phrase_begin, cur_phrase + 1, USER_PHRASE)
							self.log_conv_error(sentence, string, phrase_list, phrase_begin, cur_phrase + 1, 1)
							self.adjust_freq (phrase_list, phrase_begin)
					else:
						self.addphrase (phrase_list, phrase_begin, cur_phrase, USER_PHRASE)
						self.log_conv_error(sentence, string, phrase_list, phrase_begin, cur_phrase, 2)
				phrase_begin = cur_phrase + 1
				predict_begin = cur_predict + 1
				cur_phrase += 1
		Editor.database.clean_useless_phrase()
	
	def freq_alg(self, phrase1, phrase2):
		freq = 0
		if len(phrase1[PHRASE]) == 1:
			freq += phrase1[ADJ_FREQ] * 10
		elif len(phrase1[PHRASE]) < 4:
			freq += phrase1[ADJ_FREQ] * len(phrase1[PHRASE])
		else:
			freq += phrase1[ADJ_FREQ] * pow( len(phrase1[PHRASE]) , 2)
		if len(phrase2[PHRASE]) == 1:
			freq += phrase2[ADJ_FREQ] * 10
		elif len(phrase2[PHRASE]) < 4:
			freq += phrase2[ADJ_FREQ] * len(phrase2[PHRASE])
		else:
			freq += phrase2[ADJ_FREQ] * pow( len(phrase2[PHRASE]) , 2)
		return freq
		return phrase1[ADJ_FREQ] + phrase2[ADJ_FREQ]
		return phrase1[ADJ_FREQ] * len(phrase1[PHRASE]) + phrase2[ADJ_FREQ] * len(phrase2[PHRASE])
		return phrase1[ADJ_FREQ] * pow( len(phrase1[PHRASE]) , 2.5) + phrase2[ADJ_FREQ] * pow( len(phrase2[PHRASE]) , 2.5)
		return pow (phrase1[ADJ_FREQ], len(phrase1[PHRASE]) / 5.) + pow( phrase2[ADJ_FREQ], len(phrase2[PHRASE]) /5)


	def get_predict_pinyinlist (self, pinyinlist):
		#~ print "Dd", u" ".join( i.get_screen_pinyin() for i in pinyinlist), len(pinyinlist)
		candidates = Editor.database.select_words_by_pinyin_list (pinyinlist)
		if candidates:
			#~ print "phrase1",candidates[0][PHRASE],candidates[0][ADJ_FREQ]
			return [candidates[0]]
		else:
			candidates = Editor.database.select_words_by_pinyin_list_all(pinyinlist)
			if candidates:
				#~ print candidates[0][PHRASE]
				p = list (candidates[0]);
				p[YLEN] = len (pinyinlist)
				p[PHRASE] = p[PHRASE][:p[YLEN]]
				return [p]
		max_freq = 0
		predict = []
		for length in range(len (pinyinlist), 1, -1):
			for i in range (1, length):
				candidates = Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
				if not candidates:
					continue
				candidates2 = Editor.database.select_words_by_pinyin_list(pinyinlist[i:length])
				if not candidates2:
					candidates2 = Editor.database.select_words_by_pinyin_list_all(pinyinlist[i:length])
					if candidates2:
						p = list (candidates2[0]);
						p[YLEN] = length - i
						p[PHRASE] = p[PHRASE][:p[YLEN]]
						tmp_phrase = candidates[0]
						tmp_phrase2 = p
					else:
						continue
				else:
					tmp_phrase = candidates[0]
					tmp_phrase2 = candidates2[0]
				new_freq = self.freq_alg(tmp_phrase, tmp_phrase2)
				#~ print tmp_phrase[PHRASE].encode ("utf-8"),tmp_phrase2[PHRASE].encode ("utf-8"), tmp_phrase[ADJ_FREQ],tmp_phrase2[ADJ_FREQ], new_freq
				#~ if tmp_phrase[ADJ_FREQ] + tmp_phrase2[ADJ_FREQ] >= max_freq:
				if new_freq >= max_freq:
					predict = [tmp_phrase, tmp_phrase2]
					max_freq = new_freq
			if predict:
				break
		if self.predict_len(predict) < len (pinyinlist):
			#~ return
			#~ for i in range(1, predict[0][YLEN]):
				#~ candidates =  Editor.database.select_words_by_pinyin_list(pinyinlist[:i])
				#~ if candidates and candidates[0][PHRASE] == predict[0][PHRASE][:i]:
					#~ print "try", i, candidates[0][PHRASE]
					#~ temp = self.get_predict_pinyinlist(pinyinlist[i:self.predict_len(predict)])
					#~ print "resule", temp[0][PHRASE] +  temp[1][PHRASE]
					#~ print "match", predict[0][PHRASE][i:] + predict[1][PHRASE]
					#~ if predict[0][PHRASE][i:] + predict[1][PHRASE] == (temp[0][PHRASE] + temp[1][PHRASE]):
						#~ print "go", candidates[0][PHRASE]
						#~ return [candidates[0]] + temp
			return predict + self.get_predict_pinyinlist(pinyinlist[self.predict_len(predict):])
		else:
			return predict

	def get_predict (self, pinyinlist):
		if not pinyinlist:
			return []
		candidates = Editor.database.select_words_by_pinyin_list(pinyinlist)
		if candidates:
			#~ print "phrase1",candidates[0][PHRASE],candidates[0][ADJ_FREQ]
			return [candidates[0]]
		else:
			candidates = Editor.database.select_words_by_pinyin_list_all(pinyinlist)
			if candidates:
				p = list (candidates[0]);
				p[YLEN] = len (pinyinlist)
				p[PHRASE] = p[PHRASE][:p[YLEN]]
				return [p]
		max_freq = 0
		max_length =0
		#~ print "try words"
		#~ if longest==1:
			#~ return [Editor.database.select_words_by_pinyin_list(pinyinlist[:1])[0][PHRASE]]
		#~ print longest
		for i in range (1, len(pinyinlist)):
			candidates = Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
			if not candidates:
				continue
			tmp_phrase = candidates[0]
			tmp_freq = tmp_phrase[ADJ_FREQ]
			longest2 = Editor.database.get_longest_phrase_length (pinyinlist[i:])
			#~ print "phrase1",tmp_phrase[PHRASE]
			#~ print i,longest2
			for p in range(i + longest2,i-1,-1):
				if p < max_length:
					continue
				candidates2 = Editor.database.select_words_by_pinyin_list(pinyinlist[i:p+1])
				#~ print len(candidates2)
				if candidates2:
					tmp_phrase2 = candidates2[0]
					#~ print "phrase2",tmp_phrase2[PHRASE]
					tmp_freq2 = tmp_phrase2[ADJ_FREQ]
					#~ print tmp_phrase, " ", candidates2[0][PYSQLiteDB.PHRASE]
					new_freq = self.freq_alg(tmp_phrase, tmp_phrase2)
					if p > max_length or \
						(new_freq >= max_freq and p == max_length):
						predict = [tmp_phrase, tmp_phrase2]
						#~ print tmp_phrase[PHRASE],tmp_phrase2[PHRASE], tmp_phrase[ADJ_FREQ],tmp_phrase2[ADJ_FREQ]
						max_freq = new_freq
						max_length = p
		#~ print "get_predict" + predict[0], max_length
		return predict

	def reparse (self, start):
		#~ print "reparse"
		if start == len (self.wordlist):
			return
		predict = self.get_predict (self.wordlist[start:])
		phrase = predict[0][PHRASE]
		length = len (phrase) 
		#~ if len(phrase)<len(self.wordlist)-start \
					#~ else len(self.wordlist)-start
		#~ print string
		for i in range(0, length):
			if self.wordlist[start+i].manual:
				return
			self.wordlist[start+i].set_char(phrase[i])
		self.reparse (start+length)

	def wordlist_manual_select (self, candidate):
		phrase = candidate[PHRASE]
		for i in range (0, len (phrase) ):
			if i < 4:
				self.wordlist[ self.cursor + i ].real_pinyin_id = candidate[ i + 1 ]
				if self.enable_mohuyin:
					self.wordlist[ self.cursor + i ].mohuyin()
			else:
				py = candidate[YX].split("'")
				self.wordlist[ self.cursor + i ].real_pinyin_id = PINYIN_DICT[ py[ i - 4 ] ]
				if self.enable_mohuyin:
					self.wordlist[ self.cursor + i ].mohuyin()
			self.wordlist[ self.cursor + i ].set_char( phrase[i] )
			self.wordlist[ self.cursor + i ].manual = True
		self.cursor += len (phrase)
		if self.cursor < len (self.wordlist):
			self.reparse (self.cursor);
		self.update ()

	def commit (self):
		if self.pinyinlist:
			self.convert_all ()
		string = self.get_preedit ()
		self.learn ()
		self.clear ()
		return string

	def del_current (self):
		if self.pinyinlist:
			raise InputException ()
		if self.cursor > 0:
			del self.wordlist[self.cursor-1]
			self.cursor -= 1
			if len (self.wordlist) == 0:
				self.clear ()
			self.reparse_backtrace ();
			self.update()
		elif self.wordlist and self.cursor == 0:
			raise InputException()
		

	def del_next (self):
		if self.pinyinlist or self.cursor == len (self.wordlist):
			raise InputException ()
		else:
			del self.wordlist[self.cursor]
			if len (self.wordlist)==0:
				self.clear ()
			self.reparse_backtrace ();
			self.update()

	def move_cursor (self, move):
		if self.is_empty():
			raise InputException()
		if self.pinyinlist and (move<0 or self.candidate_cursor == None):
			raise InputException()
		if move > 0 and self.candidate_cursor != None:
			self.select_cursor()
		else:
			self.cursor += move
			if self.cursor < 0:
				self.cursor += len (self.wordlist) + 1
			elif self.cursor > len (self.wordlist):
				self.cursor = 0
			self.update ()

	def move_cursor_to (self, pos):
		if self.is_empty ():
			raise InputException ()
		if self.pinyinlist:
			self.convert_all ()
		if pos == 0:
			self.cursor = len(self.wordlist)
		elif pos > len(self.wordlist) + 1:
			raise InputException ()
		else:
			self.cursor = pos - 1
		self.update ()

	def select (self, n):
		#~ print self.lookup_table.get_current_page_size()
		if n >= self.lookup_table.get_current_page_size():
			raise InputException()
		candidate = self.candidates[self.lookup_table.get_current_page_start()+n]
		if self.pinyinlist:
			self.pinyin_select(candidate, True)
		else:
			self.wordlist_manual_select(candidate)

	def select_cursor (self):
		candidate = self.candidates[self.lookup_table.get_cursor_pos()]
		#~ print candidate[PHRASE]
		if self.pinyinlist:
			self.pinyin_select(candidate, True)
		else:
			self.wordlist_manual_select(candidate)

	def recursive_mohuyin_pinyinlist(self, pinyinlist):
		for i in self.mohuyin(pinyinlist[0].get_screen_pinyin()):
			if pinyinlist[1:]:
				for p in self.recursive_mohuyin_pinyinlist(pinyinlist[1:]):
					yield [PinYinWord(pinyin = i)] + p
			else:
				yield [PinYinWord(pinyin = i)]

	def recursive_mohuyin(self, strl):
		for i in self.mohuyin(strl[0]):
			if strl[1:]:
				for p in self.recursive_mohu(strl[1:]):
					yield [i] + p
			else:
				yield [i]
	
	def mohuyin(self, pinyin):
		#~ print pinyin
		if pinyin[0] == "'":
			spliter = "'"
			pinyin = pinyin[1:]
		else:
			spliter = ""
		if pinyin[:2] in SHENGMU_LIST:
			shengmu = pinyin[:2]
			yunmu = pinyin[2:]
		elif pinyin[:1] in SHENGMU_LIST:
			shengmu = pinyin[:1]
			yunmu = pinyin[1:]
		else:
			shengmu = ""
			yunmu = pinyin
		if shengmu in self.shengmu_mohu:
			shengmu = self.shengmu_mohu[shengmu]
		else:
			shengmu = [shengmu]
		if yunmu in self.yunmu_mohu:
			yunmu = self.yunmu_mohu[yunmu]
		else:
			yunmu = [yunmu]
		if pinyin in PINYIN_PARTIAL_LIST:
			for q in yunmu:
				if i + q in SHENGMU_LIST or i + q in PINYIN_LIST or i + q in PINYIN_PARTIAL_LIST:
					yield spliter + i + q
		else:
			for i in shengmu:
				for q in yunmu:
					if i + q in SHENGMU_LIST or i + q in PINYIN_LIST:
						yield spliter + i + q
	def parsr_mohuyin(self, pinyinlist):
		candidates = []
		if self.enable_mohuyin:
			ss = Set()
			for p in self.recursive_mohuyin_pinyinlist(pinyinlist):
				#~ print u" ".join( i.get_pinyin() for i in p) 
				for i in range (len (p), 0, -1):
					ss.update(Editor.database.select_words_by_pinyin_list (p[:i]))
			candidates = list(ss)
			candidates.sort(cmp = candidate_sort)
		else:
			for i in range (len (pinyinlist), 0, -1):
				candidates += Editor.database.select_words_by_pinyin_list (pinyinlist[:i])
		return candidates
				
	def update_candidates (self):
		if self.is_empty():
			self.candidates = []
		elif self.pinyinlist:
			self.candidates = self.parsr_mohuyin(self.pinyinlist)
		elif len(self.wordlist)>self.cursor:
			self.candidates = self.parsr_mohuyin(self.wordlist[self.cursor:])
		else:
			self.candidates = []
		self.update_lookup_table()

	def update_lookup_table (self):
		self.lookup_table.clear()
		self.lookup_table.show_cursor(False)
		for c in self.candidates:
			if  c[ADJ_FREQ] == 0 or c[ADJ_FREQ]%USER_PHRASE and c[ADJ_FREQ]%USER_WORD:
				self.lookup_table.append_candidate (c[PHRASE])
			else:
				attrs = [scim.Attribute (0, len(c[PHRASE]), scim.ATTR_FOREGROUND, RGB (0, 0, 0xef))]
				self.lookup_table.append_candidate (c[PHRASE], attrs)

class Engine (IMEngine):
	def __init__ (self, factory, config, encoding, id):
		IMEngine.__init__ (self, factory, config, encoding, id)
		self._editor = Editor ()
		self._lookup_table = scim.LookupTable (9)
		self._status_property = Property ("chinese", "CN")
		self._setup_property = Property ("setup", "", "/usr/share/scim/icons/setup.png")
		self._chinese_mode = True
		self.reload_config(config)
		self._log = scim.Log.Log ("ZhengJu")
		#~ print "init"
	def clear(self):
		pass
	def reset(self):
		#~ print "reset"
		if self._editor.wordlist:
			self.commit_string (self._editor.commit())
		else:
			self._editor.clear()
		self.clear()
		self.origin_string = None
		self._double_quotation_state = False
		self._single_quotation_state = False
		self._prev_key = None
		self._shift_key = None	
		self.pipe = None	
		self.update ()	
		props = [self._status_property, self._setup_property]
		self.register_properties (props)
		self.update_properties ()
	def update_preedit (self):
		string = self._editor.get_preedit () + self.get_extra_string()
		if (string == u""):
			self.hide_preedit_string ()
		else:
			self.show_preedit_string ()
			self.update_preedit_string (string , [])
			self.update_preedit_caret (self._editor.get_screen_cursor())

	def focus_out(self):
		#~ print "out reset"
		self.reset()
		IMEngine.focus_out (self)
		
	def focus_in (self):
		#~ print "in reset"
		self.reset()
		IMEngine.focus_in (self)
	
	def trigger_property (self, property):
		if property == "chinese":
			self.change_mode ()
		elif property == "setup":
			self.start_helper ("61af6de6-c29d-421e-9e1b-e34a29c68c76")

	def update_candidate (self):
		if self._editor.candidates:
			self.update_lookup_table(self._editor.lookup_table)
			self.show_lookup_table()
		else:
			self.hide_lookup_table ()

	def update_aux(self):
		if self._editor.predict:
			self.show_aux_string ()
			string = self._editor.get_aux ()
			attrs = [scim.Attribute (0, len (string), scim.ATTR_FOREGROUND, RGB (0, 0, 0xef))]
			self.update_aux_string (string, attrs)
		else:
			self.hide_aux_string ()
			self.update_aux_string (u"")

	def update (self):
		self.update_preedit ()
		self.update_aux ()
		self.update_candidate ()

	def update_properties (self):
		if self._chinese_mode: # refresh mode
			self._status_property.label = _("CN")
		else:
			self._status_property.label = _("EN")
		self.update_property(self._status_property)

	def change_mode(self):
		if self._chinese_mode:
			self.commit_string (self._editor.commit())
			self.update()
		self._chinese_mode = not self._chinese_mode
		self.update_properties ()
		#~ print "change_mode", self._chinese_mode
	def reload_config (self, config):
		self._editor.load_config(config)
		self.progresivepromp = config.read ("/IMEngine/Python/ZhengJu/ProgressivePrompt", False)

	def lookup_table_page_down (self):
		self._editor.lookup_table.page_down ();
		self.update()
		return True

	def lookup_table_page_up (self):
		self._editor.lookup_table.page_up ();
		self.update()
		return True
		
	def process_key_event (self, key):
		#~ print key.code
		if self._chinese_mode and self._editor.is_end() and not self.get_extra_string():
			if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R)	\
				and key.mask & KeyMask.ReleaseMask:
				if self._shift_key:
					self.change_mode()
					return True
			if key.mask == KeyMask.ShiftMask and (key.code >= KeyCode.KEY_A and key.code <= KeyCode.KEY_Z) \
				or key.mask & KeyMask.CapsLockMask:
				self.change_mode()
		elif not self.get_extra_string():
			if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R)\
				and key.mask & KeyMask.ReleaseMask:
				if self._shift_key:
					self.change_mode()
					return True
		if (key.code == KeyCode.KEY_Shift_L or key.code == KeyCode.KEY_Shift_R)	\
			and key.mask == KeyMask.NullMask:
			self._shift_key = True;
		else:
			self._shift_key = None;
		if self.pipe and self.pipe.poll() != -1:
			try: 
				self.origin_string = unicode(self.pipe.fromchild.read()[:-1],"utf8")
				self._editor.reverse(self.origin_string)
				self._editor.move_cursor_to (1)
			except:
				self._editor.clear()
				self.beep ()
			else:
				self.commit_string(u"")
				self.update()			
			finally:
				self.pipe = None
				return True
		if key.mask & KeyMask.ReleaseMask:
			return False
		try:
			if self._chinese_mode:
				result = self.chinese_process_key_event (key)
			else:
				result = self.english_process_key_event (key)
			self._prev_key = key
		except InputException, e:
			self.beep ()
			return True
		except Exception, e:
			self.beep ()
			self._log.print_exc()
			self._log.log("DEBUG", self._editor.cursor)
			self._log.log("DEBUG", [i.char.encode("utf-8") for i in self._editor.wordlist] )
			self._log.log("DEBUG", [i.get_screen_pinyin() for i in self._editor.pinyinlist] )
			self._log.log("DEBUG", self._editor.get_preedit().encode ("utf-8"))
			return True
		else:
			return result	
		finally:
			self.update()

	def english_process_key_event (self, key):
		return False

	def chinese_process_key_event (self, key):
		if self._editor.is_empty() and not self.get_extra_string():
			if key.code <= 127 and ascii.ispunct (chr (key.code)):
				self.commit_string (self.convert_to_full_width (unichr (key.code)))
				return True
			elif key.code == KeyCode.KEY_r and key.mask == KeyMask.ControlMask:
				if not self.pipe:
					self.pipe = popen2.Popen3("python -c" +'"import gtk; print gtk.clipboard_get(selection=\\"PRIMARY\\").wait_for_text()"')
				return True
			else:
				return False
		#~ print unichr (key.code)
		if key.code in (KeyCode.KEY_Control_L,KeyCode.KEY_Control_R,
			KeyCode.KEY_Alt_L, KeyCode.KEY_Alt_R):
			return True
		elif key.code in (KeyCode.KEY_KP_Space, KeyCode.KEY_space):
			#~ print self._editor.get_candidate_cursor()
			if self._editor.candidates and self._editor.lookup_table.is_cursor_visible():
				self._editor.select_cursor()
				return True
			elif self._editor.pinyinlist:
				self._editor.convert_all ()
				return True
			elif self._editor.cursor < len (self._editor.wordlist):
				self._editor.jump_to_next_word()
				return True
			else:
				self.commit_string (self._editor.commit())
				return True
		elif key.code == KeyCode.KEY_BackSpace:
			if not self._editor.pinyinlist and self.get_extra_string():
				raise InputException()
			self._editor.del_current()
			return True
		elif key.code == KeyCode.KEY_Delete:
			if self._editor.lookup_table.is_cursor_visible():
				self._editor.delete_cursor_phrase ()
			else:
				self._editor.del_next ()
			return True
		elif key.code >= KeyCode.KEY_0 and key.code <= KeyCode.KEY_9 and key.mask & KeyMask.ControlMask:
			self._editor.delete_phrase (key.code - KeyCode.KEY_1)
			return True
		elif key.code >= KeyCode.KEY_0 and key.code <= KeyCode.KEY_9 and key.mask & KeyMask.AltMask:
			self._editor.move_cursor_to (key.code - KeyCode.KEY_0)
			return True
		elif key.code >= KeyCode.KEY_1 and key.code <= KeyCode.KEY_9:
			self._editor.select (key.code-KeyCode.KEY_1)
			return True
		elif key.code >= KeyCode.KEY_KP_1 and key.code <= KeyCode.KEY_KP_9:
			self._editor.select (key.code-KeyCode.KEY_KP_1)
			return True
		elif key.code == KeyCode.KEY_Shift_L:
			if not self._editor.is_end():
				self._editor.select (0)
				self._shift_key = None
			return True
		elif key.code == KeyCode.KEY_Shift_R:
			if not self._editor.is_end():
				self._editor.select (1)
				self._shift_key = None
			return True
		elif key.code in (KeyCode.KEY_equal, KeyCode.KEY_bracketright, KeyCode.KEY_Page_Down):
			if self._editor.candidates:
				self._editor.lookup_table.page_down ();
				return True
			else:
				raise InputException()				
		elif key.code in (KeyCode.KEY_minus, KeyCode.KEY_bracketleft, KeyCode.KEY_Page_Up):
			if self._editor.candidates:
				self._editor.lookup_table.page_up ();
				return True
			else:
				raise InputException()
		elif key.code==KeyCode.KEY_Up:
			if self._editor.candidates:
				self._editor.lookup_table.cursor_up()
				self._editor.lookup_table.show_cursor(True)
				return True
			else:
				raise InputException()
		elif key.code==KeyCode.KEY_Down:
			if self._editor.candidates:
				self._editor.lookup_table.cursor_down()
				self._editor.lookup_table.show_cursor(True)
				return True
			else:
				raise InputException()
		elif key.code == KeyCode.KEY_Left or key.code == KeyCode.KEY_b and key.mask & KeyMask.ControlMask:
			self._editor.move_cursor (-1)
			return True
		elif key.code == KeyCode.KEY_Right or key.code == KeyCode.KEY_f and key.mask & KeyMask.ControlMask:
			if self.get_extra_string():
				raise InputException()
			self._editor.move_cursor (1)
			return True
		elif key.code == KeyCode.KEY_h and key.mask & KeyMask.ControlMask or key.code == KeyCode.KEY_Home:
			if self.get_extra_string():
				raise InputException()
			self._editor.move_cursor_to (1)
			return True
		elif key.code == KeyCode.KEY_e and key.mask & KeyMask.ControlMask or key.code == KeyCode.KEY_End:
			if self.get_extra_string():
				raise InputException()
			self._editor.move_cursor_to (0)
			return True
		elif key.code in (KeyCode.KEY_Return, KeyCode.KEY_KP_Enter):
			self.commit_string (self._editor.commit() + self.get_extra_string())
			self.clear()
			return True
		elif key.code == KeyCode.KEY_Escape or key.code == KeyCode.KEY_c and key.mask & KeyMask.ControlMask:
			if self.origin_string:
				self.commit_string(self.origin_string)
				self._editor.clear()
				self.origin_string = None
			elif self._editor.lookup_table.is_cursor_visible():
				self._editor.lookup_table.show_cursor(False)
				self._editor.update()
			else:
				self.clear()
				self._editor.clear()
			return True
		elif key.code <= 127 and ascii.ispunct (chr (key.code)) and not self.get_extra_string():
			if not self._editor.is_empty ():
				self.commit_string (self._editor.commit ())
			self.commit_string (self.convert_to_full_width (unichr (key.code)))
			return True
		else:
			raise InputException ()

	def convert_to_full_width (self, c):
		if c == u".":
			if self._prev_key and self._prev_key.code >= KeyCode.KEY_0 and self._prev_key.code <= KeyCode.KEY_9:
				return u"."
			else:
				return u"\u3002"
		elif c == u"\\":
			return u"\u3001"
		elif c == u"^":
			return u"\u2026\u2026"
		elif c == u"_":
			return u"\u2014\u2014"
		elif c == u"$":
			return u"\uffe5"
		elif c == u"\"":
			self._double_quotation_state = not self._double_quotation_state
			if self._double_quotation_state:
				return u"\u201c"
			else:
				return u"\u201d"
		elif c == u"'":
			self._single_quotation_state = not self._single_quotation_state
			if self._single_quotation_state:
				return u"\u2018"
			else:
				return u"\u2019"
		elif c == u"<":
			return u"\u300a"
		elif c == u">":
			return u"\u300b"
		return scim.unichar_half_to_full (c)

class ZhengJuFactory (IMEngineFactory):
	def __init__ (self, config):
		IMEngineFactory.__init__ (self, config)
		self.name 		= _(u"ZhengJu")
		self.uuid 		= "59e29ad8-3c95-4cd0-b02f-e21bf1317f7a"
		self.authors	= u"Yu Fan <yufanyufan@gmail.com>"
		self.icon_file 	= "/usr/share/scim/icons/scim-python.png"
		self.credits 	= u"GPL"
		self.help		= _(u"Help For ZhengJu")
		self.set_languages ("zh")
		self._config	= config

	def create_instance (self, encoding, id):
		pinyin = self._config.read ("/IMEngine/Python/ZhengJu/PinYinSchema", "JianPin")
		if pinyin == "JianPin":
			import JianPin
			engine = JianPin.JianPinEngine (self, self._config, encoding, id)
		elif pinyin == "QuanPin":
			import QuanPin
			engine = QuanPin.QuanPinEngine (self, self._config, encoding, id)
		elif pinyin == "ShuangPin":
			import ShuangPin
			engine = ShuangPin.ShuangPinEngine (self, self._config, encoding, id)
		else:
			import JianPin
			engine = JianPin.JianPinEngine (self, self._config, encoding, id)			
		return engine

	def reload_config (self, config):
		self._config	= config
		
class PseudoConfig:
	def read(self, string, default):
		return default;

def train(file_name):
	print "Training by " + file_name
	editor = Editor()
	import re
	ex = re.compile(ur"[\da-zA-Z\W]",re.UNICODE)
	for l in file(file_name):
		ll = unicode(l,"utf8")[:-1]
		t = ex.split(ll)
		for i in t:
			if i:
				try:
					editor.reverse(i)
					#~ print i
					editor.learn()
				except:
					print file
					traceback.print_exc ()
					raise Exception()				

def print_usage():
	print "ZhengJu -f FILE\tRead Sentenc from file"
	print "ZhengJu \tConvert parameter to pinyin"

if __name__ == "__main__":
	editor = Editor()
	import sys
	try:
		if len(sys.argv) == 3:
			if sys.argv[1] == "-f":
				train(sys.argv[2])
			else:
				raise Exception()
		elif len(sys.argv) == 2:
			try:
				editor.reverse(unicode(sys.argv[1],"utf8"))
				for i in editor.wordlist:
					print ID_PINYIN_DICT[i.get_pinyin_id()],
					print i.char,
			except:
				print "Can't convert this to pinyin"
				raise Exception()
		else:
			raise Exception()
	except:
		traceback.print_exc ()
		print_usage()
		sys.exit(1)
	else:
		sys.exit(0)
Summary ✨

This Python script is a Chinese input method editor (IME) called ZhengJu. It converts Chinese characters to pinyin and can be used as a command-line tool or with a file containing text to train its language model. The script reads configuration settings from a file, trains the IME on a given text file, and provides an example usage of converting a single character to pinyin.
Tech Fingerprint

Standard Library: OS Interaction
Alerts (111)

'import *' Avoid to prevent namespace pollution; import specific names or use aliases
33 35
'lambda' Avoid complex 'lambda' functions; prefer named functions for clarity and debugging
41 46
'def' Ensure functions have docstrings for documentation
63 81 95 98 101 104 107 114 117 120 126 143 152 168 192 198 201 204 207 222 227 233 255 265 272 287 290 297 313 318 324 349 378 398 419 428 436 443 450 460 469 481 527 548 607 656 672 691 699 713 723 738 751 761 769 777 785 818 833 844 865 867 884 893 898 903 909 916 926 931 938 945 949 954 959 1019 1022 1152 1196 1212 1216 1219 1238
'== None' Use 'is' for None comparisons (e.g., x is None)
133 135 361 369 388 726
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
251 408 703 717
'open(' Use 'with open()' to ensure Files are properly closed
477
'list(' Avoid unnecessary list conversions; use generators where possible
826
Complexity hotspot; lines 961 to 964 (total complexity: 7)
961 962 963 964
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
986 1233 1257 1262
'raise Exception(' Raise specific exception types for better error handling
1236 1250 1259 1261