PageRenderTime 83ms CodeModel.GetById 11ms app.highlight 63ms RepoModel.GetById 1ms app.codeStats 0ms

/python/engine/XingMa/XMSQLiteDB.py

http://scim-python.googlecode.com/
Python | 914 lines | 872 code | 6 blank | 36 comment | 7 complexity | f7a859d700e8564df830669c483322d7 MD5 | raw file
  1# -*- coding: utf-8 -*-
  2# vim: set noet ts=4:
  3#
  4# scim-python
  5#
  6# Copyright (c) 2008-2008 Yu Yuwei <acevery@gmail.com>
  7#
  8#
  9# This library is free software; you can redistribute it and/or
 10# modify it under the terms of the GNU Lesser General Public
 11# License as published by the Free Software Foundation; either
 12# version 2 of the License, or (at your option) any later version.
 13#
 14# This library is distributed in the hope that it will be useful,
 15# but WITHOUT ANY WARRANTY; without even the implied warranty of
 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17# GNU Lesser General Public License for more details.
 18#
 19# You should have received a copy of the GNU Lesser General Public
 20# License along with this program; if not, write to the
 21# Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 22# Boston, MA  02111-1307  USA
 23#
 24# $Id: $
 25#
 26
 27import os
 28import os.path as path
 29import sqlite3
 30import XMDict
 31import uuid
 32import time
 33import re
 34
 35patt_r = re.compile(r'c([ea])(\d):(.*)')
 36patt_p = re.compile(r'p(-{0,1}\d)(\d)')
 37
 38# first make some number index we will used :)
 39#(MLEN, CLEN, M0, M1, M2, M3, M4, PHRASE, FREQ, USER_FREQ) = range (0,10)
 40
 41
 42class XMSQLiteDB:
 43	'''Phrase database for XingMa'''
 44	def __init__(self, name = 'xm.db', user_db = None, filename = None ):
 45		# first we use the Parse in XMDict, which transform the char(a,b,c,...) to int(1,2,3,...) to fasten the sql enquiry
 46		self.parse = XMDict.Parse
 47		self.deparse = XMDict.Deparse
 48		if filename:
 49			self.db = sqlite3.connect( filename )
 50		else:
 51			try:
 52				os.system('cat %s > /dev/null' % name)
 53			except:
 54				pass
 55			# open system phrase db
 56			self.db = sqlite3.connect(  name )
 57		try:
 58			self.db.execute( 'PRAGMA page_size = 4096; ' )
 59			self.db.execute( 'PRAGMA cache_size = 40000; ' )
 60			# increase the cache size to speedup sqlite enquiry
 61			self.db.execute( 'PRAGMA temp_store = MEMORY; ' )
 62			self.db.execute( 'PRAGMA synchronous = OFF; ' )
 63		except:
 64			print 'encountering error when init db'
 65			pass
 66		# create IME property table
 67		sqlstr = 'CREATE TABLE IF NOT EXISTS main.ime (attr TEXT, val TEXT);' 
 68		self.db.executescript( sqlstr )
 69		# make sure we have values in ime table.
 70		if not self.db.execute('SELECT * FROM main.ime;').fetchall():
 71			ime_keys={'name':'',
 72					  'name.zh_cn':'',
 73					  'name.zh_hk':'',
 74					  'name.zh_tw':'',
 75					  'author':'somebody', 
 76					  'uuid':'%s' % uuid.uuid4(),
 77					  'serial_number':'%s' % time.strftime('%Y%m%d'),
 78					  'icon':'/usr/share/scim/icons/scim-python.png',
 79					  'credit':'GPL',
 80					  'languages':'zh',
 81					  'valid_input_chars':'abcdefghijklmnopqrstuvwxyz',
 82					  'max_key_length':'4',
 83			#		  'commit_keys':'space',
 84			#		  'forward_keys':'Return',
 85			#		  'select_keys':'1,2,3,4,5,6,7,8,9,0',
 86			#		  'page_up_keys':'Page_Up,minus',
 87			#		  'page_down_keys':'Page_Down,equal',
 88					  'status_prompt':'CN',
 89					  'def_full_width_punct':'TRUE',
 90					  'def_full_width_letter':'FALSE',
 91					  'user_can_define_phrase':'FALSE',
 92					  'pinyin_mode':'FALSE',
 93					  'dynamic_adjust':'FALSE',
 94					  'no_check_chars':u'',
 95					  'rules':''}
 96					  #'rules':'ce2:p11+p12+p21+p22;ce3:p11+p21+p22+p31;ca4:p11+p21+p31+p41'}
 97			# inital the attribute in ime table, which should be updated from mabiao
 98			for _name in ime_keys:
 99				sqlstr = 'INSERT INTO main.ime (attr,val) VALUES (?,?);'
100				self.db.execute( sqlstr, (_name,ime_keys[_name]) )
101		# share variables in this class:
102		self._mlen = int ( self.get_ime_property ("max_key_length") )
103		#(MLEN, CLEN, M0, M1, M2, M3, M4, PHRASE, FREQ, USER_FREQ) = range (0,10)
104		self._pt_index = ['mlen','clen']
105		for i in range(self._mlen):
106			self._pt_index.append ('m%d' %i)
107		self._pt_index += ['phrase','freq','user_freq']
108		self.user_can_define_phrase = self.get_ime_property('user_can_define_phrase')
109		if self.user_can_define_phrase:
110			if self.user_can_define_phrase.lower() == u'true' :
111				self.user_can_define_phrase = True
112				# cache goucima to memory to speedup gouci
113				try:
114					self.cache_goucima()
115				except:
116					pass
117			else:
118				self.user_can_define_phrase = False
119		else:
120			print 'Could not find "user_can_define_phrase" entry from database, is it a outdated database?'
121			self.user_can_define_phrase = False
122		
123		self.dynamic_adjust = self.get_ime_property('dynamic_adjust')
124		if self.dynamic_adjust:
125			if self.dynamic_adjust.lower() == u'true' :
126				self.dynamic_adjust = True
127			else:
128				self.dynamic_adjust = False
129		else:
130			print 'Could not find "dynamic_adjust" entry from database, is it a outdated database?'
131			self.dynamic_adjust = False
132
133		self._no_check_chars = self.get_no_check_chars()	
134		self.rules = self.get_rules ()
135		self._goucima = {}
136		
137		if filename:
138			return
139		
140		# user database:
141		if user_db != None:
142			home_path = os.getenv ("HOME")
143			xingma_path = path.join (home_path, ".scim", "scim-python", "xingma")
144			user_db = path.join (xingma_path, user_db)
145			if not path.isdir (xingma_path):
146				os.makedirs (xingma_path)
147			try:
148				desc = self.get_database_desc (user_db)
149				if desc == None :
150					self.init_user_db (user_db)
151				elif desc["version"] != "0.1":
152					new_name = "%s.%d" %(user_db, os.getpid())
153					print >> sys.stderr, "Can not support the user db. We will rename it to %s" % new_name
154					os.rename (user_db, new_name)
155					self.init_user_db (user_db)
156			except:
157				import traceback
158				traceback.print_exc()
159		else:
160			user_db = ":memory:"
161		
162		# open user phrase database
163		try:
164			self.db.execute ('ATTACH DATABASE "%s" AS user_db;' % user_db)
165		except:
166			print >> sys.stderr, "The user database was damaged. We will recreate it!"
167			os.rename (user_db, "%s.%d" % (user_db, os.getpid ()))
168			self.init_user_db (user_db)
169			self.db.execute ('ATTACH DATABASE "%s" AS user_db;' % user_db)
170
171		# try create all tables in user database
172		self.create_tables ("user_db")
173		self.create_indexes ("user_db")
174		self.generate_userdb_desc ()
175		
176		# attach mudb for working process
177		mudb = ":memory:"  
178		self.db.execute ('ATTACH DATABASE "%s" AS mudb;' % mudb )
179		self.create_tables ("mudb")
180	
181	def update_phrase (self, entry, database='user_db'):
182		'''update phrase freqs'''
183		#print entry
184		_con = [ entry[-1] ] + list(entry[0:2+entry[0]]) + [entry[-3]]
185		#print _con
186		_condition = u''.join( map(lambda x: 'AND m%d = ? ' % x, range(entry[0]) )	)
187		#print _condition
188		sqlstr = 'UPDATE %s.phrases SET user_freq = ? WHERE mlen = ? AND clen = ? %s AND phrase = ?;' % (database, _condition)
189		#print sqlstr
190		self.db.execute ( sqlstr , _con )
191		# because we may update different db, we'd better commit every time.
192		self.db.commit()
193
194	def sync_usrdb (self):
195		# we need to update the user_db
196		#print 'sync userdb'
197		mudata = self.db.execute ('SELECT * FROM mudb.phrases;').fetchall()
198		data_u = filter ( lambda x: x[-2] in [1,-3], mudata)
199		data_a = filter ( lambda x: x[-2]==2, mudata)
200		data_n = filter ( lambda x: x[-2]==-2, mudata)
201		#print data_a
202		data_a = map (lambda x: (u''.join ( map(self.deparse, x[2:2+x[0]])),x[-3],0,x[-1] ), data_a)
203		data_n = map (lambda x: (u''.join ( map(self.deparse, x[2:2+x[0]])),x[-3],-1,x[-1] ), data_n)
204		#print data_u
205		map (self.update_phrase, data_u)
206		#print self.db.execute('select * from user_db.phrases;').fetchall()
207		map (self.u_add_phrase,data_a)
208		map (self.u_add_phrase,data_n)
209
210	def create_tables (self, database):
211		'''Create tables that contain all phrase'''
212
213		try:
214			self.db.execute( 'PRAGMA cache_size = 20000; ' )
215			# increase the cache size to speedup sqlite enquiry
216		except:
217			pass
218		if database == 'main':
219			# create  ikeys table
220			sqlstr = 'CREATE TABLE IF NOT EXISTS %s.ikeys (ikey TEXT PRIMARY KEY, id INTEGER);' % database
221			self.db.execute ( sqlstr )
222		
223			# create goucima table, this table is used in construct new phrases
224			sqlstr = 'CREATE TABLE IF NOT EXISTS %s.goucima (zi TEXT PRIMARY KEY' % database
225			#for i in range(self._mlen):
226			#	sqlstr += ', g%d INTEGER' % i 
227			sqlstr += ''.join(map (lambda x: ', g%d INTEGER' % x, range(self._mlen)) )
228			sqlstr += ');'
229			self.db.execute ( sqlstr )
230
231			# create pinyin table, this table is used in search single character for user handly
232			sqlstr = 'CREATE TABLE IF NOT EXISTS %s.pinyin ( plen INTEGER, ' % database
233			#for i in range(6):
234			#	sqlstr += 'p%d INTEGER, ' % i 
235			sqlstr += ''.join( map (lambda x: 'p%d INTEGER, ' % x, range(6) ) )
236			sqlstr += 'zi TEXT, freq INTEGER);'
237			self.db.execute ( sqlstr )
238
239		# create phrase table (mabiao)
240		sqlstr = 'CREATE TABLE IF NOT EXISTS %s.phrases ( mlen INTEGER, clen INTEGER, ' % database
241		#for i in range(self._mlen):
242		#	sqlstr += 'm%d INTEGER, ' % i 
243		sqlstr += ''.join ( map (lambda x: 'm%d INTEGER, ' % x, range(self._mlen)) )
244		sqlstr += 'phrase TEXT, freq INTEGER, user_freq INTEGER);'
245		self.db.execute ( sqlstr )
246		self.db.commit()
247	
248	def update_ime (self, attrs):
249		'''Update attributes in ime table, attrs is a iterable object
250		Like [(attr,val), (attr,val), ...]
251		'''
252		sqlstr = 'UPDATE main.ime SET val = ? WHERE attr = ?;' 
253		for attr,val in attrs:
254			_sqlstr = 'SELECT * from main.ime WHERE attr = ?' 
255			res = self.db.execute( _sqlstr, (attr,) ).fetchall()
256			if res:
257				self.db.execute(sqlstr,(val,attr))
258			else:
259				#print '"',attr,'"'," didn't in ime property now!"
260				pass
261		# we need to update some self variables now.
262		self._mlen = int (self.get_ime_property ('max_key_length' ))
263		self._pt_index = ['mlen','clen']
264		for i in range(self._mlen):
265			self._pt_index.append ('m%d' %i)
266		self._pt_index += ['phrase','freq','user_freq']
267		self.user_can_define_phrase = self.get_ime_property('user_can_define_phrase')
268		if self.user_can_define_phrase:
269			if self.user_can_define_phrase.lower() == u'true' :
270				self.user_can_define_phrase = True
271			else:
272				self.user_can_define_phrase = False
273		else:
274			print 'Could not find "user_can_define_phrase" entry from database, is it a outdated database?'
275			self.user_can_define_phrase = False
276		self.rules = self.get_rules ()
277
278		self.db.commit()
279
280	def get_rules (self):
281		'''Get phrase construct rules'''
282		rules={'above':4}
283		if self.user_can_define_phrase:
284			try:
285				_rules = self.get_ime_property ('rules')
286				if _rules:
287					_rules = _rules.strip().split(';')
288				for rule in _rules:
289					res = patt_r.match (rule)
290					if res:
291						cms = []
292						if res.group(1) == 'a':
293							rules['above'] = int(res.group(2))
294						_cms = res.group(3).split('+')
295						if len(_cms) > int(self.get_ime_property('max_key_length')):
296							print 'rule: "%s" over max key length' % rule
297							break
298						for _cm in _cms:
299							cm_res = patt_p.match(_cm)
300							cms.append(( int(cm_res.group(1)),int(cm_res.group(2)) ))
301						rules[int(res.group(2))]=cms
302					else:
303						print 'not a legal rule: "%s"'  % rule 
304			except Exception:
305				import traceback
306				traceback.print_exc ()
307			return rules
308		else:
309			return ""
310
311	def get_no_check_chars (self):
312		'''Get the characters which engine should not change freq'''
313		_chars= self.get_ime_property('no_check_chars')
314		try:
315			_chars = _chars.decode('utf-8')
316		except:
317			pass
318		return _chars
319
320	def add_phrases (self, phrases, database = 'main'):
321		'''Add phrases to database, phrases is a iterable object
322		Like: [(xingma, phrase, freq ,user_freq), (xingma, phrase, freq, user_freq), ...]
323		'''
324		if database == 'main':
325			map (self.add_phrase, phrases)
326		else:
327			map (self.add_phrase, phrases, [database]*len(phrases),[False]*len(phrases) )
328		self.db.commit()
329	
330	def add_new_phrases (self, nphrases, database='main'):
331		'''Add new phrases into db, new phrases is a object
332		of [(phrase,freq), (phrase,freq),...]'''
333		n_phrases=[]
334		for _ph, _freq in nphrases:
335			try:
336				_xm = self.parse_phrase_to_xm (_ph)
337				if not self.check_phrase_internal (_ph, _xm, database):
338					# we don't have this phrase
339					n_phrases.append ( (_xm, _ph, _freq, 0) )
340			except:
341				print '\"%s\" would not been added' % _ph
342		if n_phrases:
343			self.add_phrases ( n_phrases, database )
344	
345
346	def u_add_phrase (self,nphrase):
347		'''Add a phrase to userdb'''
348		self.add_phrase (nphrase,database='user_db')
349
350	def add_phrase (self, aphrase, database = 'main', commit=True):
351		'''Add phrase to database, phrase is a object of
352		(xingma, phrase, freq ,user_freq)
353		'''
354		sqlstr = 'INSERT INTO %s.phrases ( mlen, clen, '
355		sql_suffix = 'VALUES ( ?, ?, '
356		mmlen = range(self._mlen)
357		sqlstr += ''.join ( map(lambda x: 'm%d, ' %x , mmlen) )
358		sql_suffix += ''.join ( map (lambda x: '?, ' , mmlen) )   
359		sqlstr += 'phrase, freq, user_freq) '
360		sql_suffix += '?, ?, ? );'
361		sqlstr += sql_suffix
362		
363		try:
364			xingma,phrase,freq,user_freq = aphrase
365		except:
366			xingma,phrase,freq = aphrase
367			user_freq = 0
368		try:
369			xm = self.parse(xingma)
370			if len(xm) != len(xingma):
371				print 'In %s %s: we parse xingma fail' % (phrase, xingma )
372				return
373			record = [None, None, None, None, None]
374			map( lambda x: record.append(None), range(self._mlen))
375			record [0] = len (xingma)
376			record [1] = len (phrase)
377			record [2: 2+len(xingma)] = map (lambda x: xm[x].get_key_id(), range(0,len(xingma)))
378			record [2+self._mlen] = phrase
379			record [2+self._mlen+1] = freq
380			record [2+self._mlen+2] = user_freq
381			self.db.execute (sqlstr % database, record)
382			if commit:
383				self.db.commit()	
384		except Exception:
385			import traceback
386
387	def add_goucima (self, gcms):
388		'''Add goucima into database, gcms is iterable object
389		Like gcms = [(zi,goucima),(zi,goucima), ...]
390		'''
391		count = 1
392		for zi,gcm in gcms:
393			_con = ''
394			_val = ''
395			_len = min ( len(gcm),self._mlen)
396			for i in range( _len ):
397				_con += ', g%d' % i
398				_val += ', ?' 
399			sqlstr = '''INSERT INTO main.goucima ( zi %s )
400			VALUES ( ? %s );''' % (_con, _val)
401			try:
402				gc = self.parse(gcm)
403				if len(gc) != len(gcm):
404					error_m = u'%s %s: Can not parse goucima' % (zi, gcm )
405					raise Exception ( error_m.encode ('utf8') )
406				record = [zi]
407				for i in range(_len):
408					record.append( gc[i].get_key_id())
409				self.db.execute (sqlstr , record)
410			
411			except Exception:
412				import traceback
413				traceback.print_exc()
414			count += 1
415		self.db.commit()
416
417	def add_pinyin (self, pinyins, database = 'main'):
418		'''Add pinyin to database, pinyins is a iterable object
419		Like: [(zi,pinyin, freq), (zi, pinyin, freq), ...]
420		'''
421		sqlstr = 'INSERT INTO %s.pinyin ( plen, '
422		sql_suffix = 'VALUES ( ?, '
423		for i in range(6):
424			sqlstr += 'p%d, ' % i
425			sql_suffix += '?, '
426		sqlstr += 'zi, freq ) '
427		sql_suffix += '?, ? );'
428		sqlstr += sql_suffix
429		
430		count = 1
431		for pinyin,zi,freq in pinyins:
432			try:
433				py = self.parse(pinyin)
434				if len(py) != len(pinyin):
435					error_m = u'%s %s: Can not parse pinyin' % (zi, pinyin )
436					raise Exception ( error_m.encode ('utf8') )
437				record = [None, None, None, None, None, None, None, None, None]
438				record [0] = len (pinyin)
439				for i in range(0,len(pinyin)):
440					record [ 1+i ] = py[i].get_key_id()
441				record [-2] = zi
442				record [-1] = freq
443				self.db.execute (sqlstr % database, record)
444			except Exception:
445				print count, ': ', zi.encode('utf8'), ' ', pinyin
446				import traceback
447				traceback.print_exc()
448			count += 1
449
450		self.db.commit()	
451	
452	def optimize_database (self, database='main'):
453		sqlstr = '''
454			CREATE TABLE tmp AS SELECT * FROM %(database)s.phrases;
455			DELETE FROM %(database)s.phrases;
456			INSERT INTO %(database)s.phrases SELECT * FROM tmp ORDER BY %(xmstr)s mlen ASC, freq DESC;
457			DROP TABLE tmp;
458			CREATE TABLE tmp AS SELECT * FROM %(database)s.goucima;
459			DELETE FROM %(database)s.goucima;
460			INSERT INTO %(database)s.goucima SELECT * FROM tmp ORDER BY zi,g0,g1;
461			DROP TABLE tmp;
462			CREATE TABLE tmp AS SELECT * FROM %(database)s.pinyin;
463			DELETE FROM %(database)s.pinyin;
464			INSERT INTO %(database)s.pinyin SELECT * FROM tmp ORDER BY p0,p1,p2,p3,p4,p5,plen ASC;
465			DROP TABLE tmp;
466			'''
467		xmstr = ''
468		for i in range(self._mlen):
469			xmstr +='m%d, ' % i
470		self.db.executescript (sqlstr % {'database':database,'xmstr':xmstr })
471		self.db.executescript ("VACUUM;")
472		self.db.commit()
473	
474	def drop_indexes(self, database):
475		'''Drop the index in database to reduce it's size'''
476		sqlstr = '''
477			DROP INDEX IF EXISTS %(database)s.goucima_index_z;
478			DROP INDEX IF EXISTS %(database)s.pinyin_index_i;
479			DROP INDEX IF EXISTS %(database)s.phrases_index_p;
480			DROP INDEX IF EXISTS %(database)s.phrases_index_i;
481			VACUUM; 
482			''' % { 'database':database }
483		
484		self.db.executescript (sqlstr)
485		self.db.commit()
486	
487	def create_indexes(self, database):
488		sqlstr = '''
489			DROP INDEX IF EXISTS %(database)s.goucima_index_z;
490			CREATE INDEX IF NOT EXISTS %(database)s.goucima_index_z ON goucima (zi);
491			DROP INDEX IF EXISTS %(database)s.pinyin_index_i;
492			CREATE INDEX IF NOT EXISTS %(database)s.pinyin_index_i ON pinyin (p0,p1,p2,p3,p4,p5,plen ASC, freq DESC);
493			VACUUM; 
494			''' % { 'database':database }
495
496		sqlstr_t = '''
497			DROP INDEX IF EXISTS %(database)s.phrases_index_p;
498			CREATE INDEX IF NOT EXISTS %(database)s.phrases_index_p ON phrases (%(xmstr)s mlen ASC, freq DESC);
499			DROP INDEX IF EXISTS %(database)s.phrases_index_i;
500			CREATE INDEX IF NOT EXISTS %(database)s.phrases_index_i ON phrases (phrase, mlen ASC);
501			''' 
502		xmstr = ''
503		for i in range(self._mlen):
504			xmstr +='m%d,' % i
505		if database == 'main':
506			sqlstr = sqlstr_t % {'database':database,'xmstr':xmstr } + sqlstr
507		else:
508			sqlstr = sqlstr_t % {'database':database,'xmstr':xmstr }
509		self.db.executescript (sqlstr)
510		self.db.commit()
511	
512	def compare (self,x,y):
513		return cmp (x[0],y[0]) or -(cmp (x[-1],y[-1])) or -(cmp (x[-2],y[-2]))
514
515	def select_words( self, xms, onechar=False ):
516		'''
517		Get phrases from database by XingMa_Key objects
518		( which should be equal or less than the max key length)
519		This method is called in XingMa by passing UserInput held data
520		Return result[:] 
521		'''
522		# firstly, we make sure the len we used is equal or less than the max key length
523		_len = min( len(xms),self._mlen )
524		_condition = ''
525		_condition += ''.join ( map (lambda x: 'AND m%d = ? ' %x, range(_len) ) )
526		if onechar:
527			# for some users really like to select only single characters
528			_condition += 'AND clen=1 '
529		# you can increase the x in _len + x to include more result, but in the most case, we only need one more key result, so we don't need the extra overhead :)
530		# we start search for 1 key more, if nothing, then 2 key more and so on
531		# this is the max len we need to add into the select cause.
532		w_len = self._mlen - _len +1
533		# we start from 2, because it is < in the sqlite select, which need 1 more.
534		x_len = 2
535		while x_len <= w_len + 1:
536			sqlstr = '''SELECT * FROM (SELECT * FROM main.phrases WHERE mlen < %(mk)d  %(condition)s 
537			UNION ALL
538			SELECT * FROM user_db.phrases WHERE mlen < %(mk)d %(condition)s 
539			UNION ALL
540			SELECT * FROM mudb.phrases WHERE mlen < %(mk)d %(condition)s )
541			ORDER BY mlen ASC, user_freq DESC, freq DESC;''' % { 'mk':_len+x_len, 'condition':_condition}
542			# we have redefine the __int__(self) in class XMDict.XingMa_key to return the key id, so we can use map to got key id :)
543			_xms = map(int,xms[:_len])
544			_xms += _xms + _xms
545			result = self.db.execute(sqlstr, _xms).fetchall()
546			#self.db.commit()
547			# if we find word, we stop this while, 
548			if len(result) >0:
549				break
550			x_len += 1
551		# here in order to get high speed, I use complicated map
552		# to subtitute for
553		sysdb={}
554		usrdb={}
555		mudb={}
556		_cand = []
557		#searchres = map ( lambda res: res[-2] and [ True, [(res[:-2],[res[:-1],res[-1:]])] ]\
558		#		or [ False, [(res[:-2] , [res[:-1],res[-1:]])] ] \
559		#		, result )
560		searchres = map ( lambda res: [ int(res[-2]), int(res[-1]), [(res[:-2],[res[:-1],res[-1:]])] ], result)
561		# for sysdb
562		reslist=filter( lambda x: not x[1], searchres )
563		map (lambda x: sysdb.update(x[2]), reslist)
564		# for usrdb
565		reslist=filter( lambda x: ( x[0] in [0,-1] ) and x[1], searchres )
566		map (lambda x: usrdb.update(x[2]), reslist)
567		# for mudb
568		reslist=filter( lambda x: ( x[0] not in [0,-1] ) and x[1], searchres )
569		map (lambda x: mudb.update(x[2]), reslist)
570
571		# first process mudb
572		searchres = map ( lambda key: mudb[key][0] + mudb[key][1], mudb )
573		#print searchres
574		map (_cand.append, searchres)
575
576		# now process usrdb and sysdb
577		searchres = map ( lambda key:  (not mudb.has_key(key))  and usrdb[key][0] + usrdb[key][1]\
578				or None , usrdb )
579		searchres = filter(lambda x: bool(x), searchres )
580		#print searchres
581		map (_cand.append, searchres)
582		searchres = map ( lambda key: ((not mudb.has_key(key)) and (not usrdb.has_key(key)) )and sysdb[key][0] + sysdb[key][1]\
583				or None, sysdb )
584		searchres = filter (lambda x: bool(x), searchres)
585		map (_cand.append, searchres)
586		#for key in usrdb:
587		#	if not sysdb.has_key (key):
588		#		_cand.append( usrdb[key][0] + usrdb[key][1] )
589		#	else:
590		#		_cand.append( sysdb[key][0] + usrdb[key][1] )
591		#for key in sysdb:
592		#	if not usrdb.has_key (key):
593		#		_cand.append( sysdb[key][0] + sysdb[key][1] )
594		_cand.sort(cmp=self.compare)
595		return _cand[:]
596
597	def select_zi( self, xms ):
598		'''
599		Get zi from database by XingMa_Key objects
600		( which should be equal or less than 6)
601		This method is called in XingMa by passing UserInput held data
602		Return  result[:] 
603		'''
604		# firstly, we make sure the len we used is equal or less than the max pinyin length 6
605		_len = min( len(xms), 6 )
606		_condition = ''
607		#for i in range(_len):
608		#	_condition += 'AND p%d = ? ' % i
609		_condition += ''.join ( map (lambda x: 'AND p%d = ? ' %x, range(_len)) )
610		# you can increase the x in _len + x to include more result, but in the most case, we only need one more key result, so we don't need the extra overhead :)
611		sqlstr = '''SELECT * FROM main.pinyin WHERE plen < %(mk)d  %(condition)s 
612		ORDER BY plen ASC, freq DESC;''' % { 'mk':_len+2, 'condition':_condition}
613		# we have redefine the __int__(self) in class XMDict.XingMa_key to return the key id, so we can use map to got key id :)
614		_xms = map(int,xms[:_len])
615		result = self.db.execute(sqlstr, _xms).fetchall()
616		#self.db.commit()
617		return result[:]
618
619	def get_ime_property( self, attr ):
620		'''get IME property from database, attr is the string of property,
621		which should be str.lower() :)
622		'''
623		sqlstr = 'SELECT val FROM main.ime WHERE attr = ?' 
624		_result = self.db.execute( sqlstr, (attr,)).fetchall()
625		#self.db.commit()
626		if _result:
627			return _result[0][0]
628		else:
629			return None
630
631	def get_phrase_table_index (self):
632		'''get a list of phrase table columns name'''
633		return self._pt_index[:]
634
635	def generate_userdb_desc (self):
636		try:
637			sqlstring = 'CREATE TABLE IF NOT EXISTS user_db.desc (name PRIMARY KEY, value);'
638			self.db.executescript (sqlstring)
639			sqlstring = 'INSERT OR IGNORE INTO user_db.desc  VALUES (?, ?);'
640			self.db.execute (sqlstring, ('version', '0.1'))
641			sqlstring = 'INSERT OR IGNORE INTO user_db.desc  VALUES (?, DATETIME("now", "localtime"));'
642			self.db.execute (sqlstring, ("create-time", ))
643			self.db.commit ()
644		except:
645			import traceback
646			traceback.print_exc ()
647
648	def init_user_db (self,db_file):
649		if not path.exists (db_file):
650			db = sqlite3.connect (db_file)
651			db.execute('PRAGMA page_size = 4096;')
652			db.execute( 'PRAGMA cache_size = 20000;' )
653			db.execute( 'PRAGMA temp_store = MEMORY; ' )
654			db.execute( 'PRAGMA synchronous = OFF; ' )
655			db.commit()
656	
657	def get_database_desc (self, db_file):
658		if not path.exists (db_file):
659			return None
660		try:
661			db = sqlite3.connect (db_file)
662			db.execute('PRAGMA page_size = 4096;')
663			db.execute( 'PRAGMA cache_size = 20000;' )
664			db.execute( 'PRAGMA temp_store = MEMORY; ' )
665			db.execute( 'PRAGMA synchronous = OFF; ' )
666			desc = {}
667			for row in db.execute ("SELECT * FROM desc;").fetchall():
668				desc [row[0]] = row[1]
669			self.db.commit()
670			return desc
671		except:
672			return None
673	
674	def cache_goucima (self):
675		self._goucima = {}
676		goucima = self.db.execute('SELECT * FROM main.goucima;').fetchall()
677		map(lambda x: self._goucima.update({x[0]:x[1:]}), goucima)
678	
679	def get_gcm_id (self, zi):
680		'''Get goucima of given character'''
681		if self._goucima:
682			# we already cache the goucima
683			if not isinstance(zi,unicode):
684				zi = zi.decode('utf-8')
685			try:
686				gcds = self._goucima[zi]
687				return gcds
688			except:
689				pass
690
691		sqlstr = 'SELECT %s FROM main.goucima WHERE zi =?;' % ','.join( map (lambda x: 'g%d' % x, range(self._mlen) ) )
692		return self.db.execute(sqlstr,(zi,)).fetchall()[0]
693
694	def parse_phrase (self, phrase):
695		'''Parse phrase to get its XingMa code'''
696		# first we make sure that we are parsing unicode string
697		try:
698			phrase = unicode(phrase)
699		except:
700			phrase = phrase.decode('utf8')
701		p_len = len(phrase) 
702		xmlist = []
703		if p_len < 2:
704			# phrase should not be shorter than 2
705			return []
706		try:
707			if p_len >= self.rules['above']:
708				rule = self.rules[ self.rules['above'] ]
709			elif p_len in self.rules:
710				rule = self.rules[p_len]
711			else:
712				raise Exception ('unsupport len of phrase')
713			if len(rule) > self._mlen:
714				raise Exception ('fault rule: %s' % rule)
715			#for (zi,ma) in rule:
716			#	if zi > 0:
717			#		zi -= 1
718			#	gcm = self.get_gcm_id (phrase[zi])
719			#	xmlist.append(gcm[ma-1])
720			xmlist = map (lambda x: self.get_gcm_id ( phrase[x[0]-1] )[ x[1]-1 ], rule )
721			return [len( xmlist)] + [p_len]  + xmlist[:] + [phrase]
722
723		except Exception:
724			import traceback
725			traceback.print_exc ()
726
727	def parse_phrase_to_xm (self,phrase):
728		'''Get the XingMa encoding of the phrase in string form'''
729		xmres = self.parse_phrase (phrase) [2:-1]
730		xms= u''.join ( map(self.deparse, xmres) )
731		return xms
732
733	def check_phrase (self,phrase,xmkey=None,database='main'):
734		# if IME didn't support user define phrase,
735		# we divide user input phrase into characters,
736		# and then check its frequence
737		if type(phrase) != type(u''):
738			phrase = phrase.decode('utf8')
739		if self.user_can_define_phrase:
740			self.check_phrase_internal (phrase, xmkey,database)
741		else:
742			map(self.check_phrase_internal, phrase)
743	
744	def check_phrase_internal (self,phrase,xmkey=None,database='main'):
745		'''Check word freq and user_freq
746		'''
747		if type(phrase) != type(u''):
748			phrase = phrase.decode('utf8')
749		try:
750			if phrase in self._no_check_chars:
751				# if the phrase is a single char, and in no_check_chars, we skip it.
752				return
753		except:
754			print 'you are using old format of database, please regenerate your database.'
755		if len(phrase) >=2:
756			wordattr = self.parse_phrase ( phrase )
757			_len = len (wordattr) -3
758		if xmkey == None:
759			sqlstr = '''SELECT * FROM (SELECT * FROM main.phrases WHERE phrase = ?
760			UNION ALL SELECT * FROM user_db.phrases WHERE phrase = ?
761			UNION ALL SELECT * FROM mudb.phrases WHERE phrase = ?)
762			ORDER BY user_freq DESC, freq DESC
763			''' 
764			result = self.db.execute(sqlstr, (phrase,phrase,phrase)).fetchall()
765		else:
766			# we are using this to check whether the tab-key and phrase is in db
767			xms = self.parse (xmkey)
768			xmkids = tuple( map(int,xms) )
769			condition = ' and '.join( map(lambda x: 'm%d = ?' % x, range( len(xms) )) )
770			sqlstr = '''SELECT * FROM %(database)s.phrases WHERE phrase = ? and %(cond)s;''' % {'database':database, 'cond':condition}
771			result = self.db.execute(sqlstr, (phrase,)+xmkids ).fetchall()
772			return bool(result)
773
774		sysdb = {}
775		usrdb = {}
776		mudb = {}
777		searchres = map ( lambda res: [ int(res[-2]), int(res[-1]), [(res[:-2],[res[:-1],res[-1]])] ], result)
778		# for sysdb
779		reslist=filter( lambda x: not x[1], searchres )
780		map (lambda x: sysdb.update(x[2]), reslist)
781		# for usrdb
782		reslist=filter( lambda x: ( x[0] in [0,-1] ) and x[1], searchres )
783		map (lambda x: usrdb.update(x[2]), reslist)
784		# for mudb
785		reslist=filter( lambda x: (x[0] not in [0,-1])  and x[1], searchres )
786		map (lambda x: mudb.update(x[2]), reslist)
787		
788		xmkey = ''
789		if len(phrase) >=2:
790			xmkey = u''.join ( map(self.deparse,wordattr[2:2+_len]) )
791			#for k in wordattr[2:2+_len]:
792			#	xmkey += self.deparse (k)
793		
794		sqlstr = 'UPDATE mudb.phrases SET user_freq = ? WHERE mlen = ? AND clen = ? %s AND phrase = ?;'
795		
796		try:
797			if len(phrase) == 1:
798				if not self.dynamic_adjust:
799					# we should change the frequency of words
800					return
801				# this is a character
802				# we remove the keys contained in mudb from usrdb
803				keyout = filter (lambda k: mudb.has_key(k), usrdb.keys() )
804				map (usrdb.pop, keyout)
805				# we remove the keys contained in mudb and usrdb from sysdb
806				keyout = filter (lambda k: mudb.has_key(k) or usrdb.has_key(k) , sysdb.keys() )
807				map (sysdb.pop, keyout)
808				# first mudb
809				map (lambda res: self.db.execute ( sqlstr % ''.join( map(lambda x: 'AND m%d = ? ' % x, range(res[0])) ) ,  [ mudb[res][1] + 1 ] + list( res[:2+res[0]]) + list (res[2+self._mlen:]) ) , mudb.keys())
810				self.db.commit()
811				# -----original for loop of above map: 
812				#for res in mudb.keys ():
813				#	_con = [ mudb[res][1] + 1 ] + list( res[:2+res[0]]) + list (res[2+self._mlen:])
814				#	_condition = ''.join( map(lambda x: 'AND m%d = ? ' % x, range(res[0])) )	
815				#	self.db.execute ( sqlstr % _condition, _con )
816				
817				# then usrdb
818				map ( lambda res: self.add_phrase ( (''.join ( map(self.deparse,res[2:2+int(res[0])] ) ),phrase, 1,usrdb[res][1]+1  ), database = 'mudb') , usrdb.keys() )				
819				# -----original for loop of above map: 
820				#for res in usrdb.keys ():
821				#	#if mudb.has_key (res):
822				#	#	continue
823				#	xmkey = ''.join ( map(self.deparse,res[2:2+int(res[0])] ) )
824				#	# here we use freq 1 to denote the phrase needed update in user_db
825				#	self.add_phrase ((xmkey,phrase,1,usrdb[res][1]+1 ), database = 'mudb')
826				# last sysdb
827				map ( lambda res: self.add_phrase ( ( ''.join ( map(self.deparse,res[2:2+int(res[0])]) ),phrase,2,1 ), database = 'mudb'), sysdb.keys() )
828				# -----original for loop of above map: 
829				#for res in sysdb.keys ():
830				#	xmkey = ''.join ( map(self.deparse,res[2:2+int(res[0])]) )
831				#	# here we use freq 2 to denote the word needed addition to user_db
832				#	self.add_phrase ((xmkey,phrase,2,1), database = 'mudb')
833			else:
834				# this is a phrase
835				if len (result) == 0 and self.user_can_define_phrase:
836					# this is a new phrase, we add it into user_db
837					self.add_phrase ( (xmkey,phrase,-2,1), database = 'mudb')
838				elif len (result) > 0:
839					if not self.dynamic_adjust:
840						# we should change the frequency of words
841						return
842					# we remove the keys contained in mudb from usrdb
843					keyout = filter (lambda k: mudb.has_key(k), usrdb.keys() )
844					map (usrdb.pop, keyout)
845					# we remove the keys contained in mudb and usrdb from sysdb
846					keyout = filter (lambda k: mudb.has_key(k) or usrdb.has_key(k) , sysdb.keys() )
847					map (sysdb.pop, keyout)
848					
849					# first we process mudb
850					# the original for loop can be found above in 'len==1'
851					map (lambda res: self.db.execute ( sqlstr % ''.join( map(lambda x: 'AND m%d = ? ' % x, range(res[0])) ) ,  [ mudb[res][1] + 1 ] + list( res[:2+res[0]]) + list (res[2+self._mlen:]) ) , mudb.keys())
852					self.db.commit()
853					# then usrdb
854					map ( lambda res: self.add_phrase ( (''.join ( map(self.deparse,res[2:2+int(res[0])] ) ),phrase,(-3 if usrdb[res][0][-1] == -1 else 1),usrdb[res][1]+1  ), database = 'mudb') , usrdb.keys() )				
855					#print self.db.execute('select * from mudb.phrases;').fetchall()
856					# last sysdb
857					map ( lambda res: self.add_phrase ( ( ''.join ( map(self.deparse,res[2:2+int(res[0])]) ),phrase,2,1 ), database = 'mudb'), sysdb.keys() )
858
859				else:
860					# we come to here when the ime dosen't support user phrase define
861					pass
862			
863			#self.db.commit()
864		except:
865			import traceback
866			traceback.print_exc ()
867
868	def find_zi_code (self,zi):
869		'''Check word freq and user_freq
870		'''
871		zi = zi.decode('utf8')
872		sqlstr = '''SELECT * FROM main.phrases WHERE phrase = ?
873		ORDER BY mlen ASC;
874''' 
875		result = self.db.execute(sqlstr, (zi,)).fetchall()
876		#self.db.commit()
877		codes = []
878		try:
879			if result:
880				for _res in result:
881					xmkey = u''
882					for i in range ( int ( _res[0] ) ):
883						xmkey += self.deparse ( _res[2+i] )
884					codes.append(xmkey)
885		except:
886			import traceback
887			traceback.print_exc ()
888		return codes[:]
889
890	def remove_phrase (self,phrase,database='user_db'):
891		'''Remove phrase from database, default is from user_db
892		phrase should be the a row of select * result from database
893		Like (mlen,clen,m0,m1,m2,m3,phrase,freq,user_freq)
894		'''
895		_ph = list(phrase[:-2])
896		_condition = ''	
897		for i in range(_ph[0]):
898			_condition += 'AND m%d = ? ' % i
899		nn =_ph.count(None)
900		if nn:
901			for i in range(nn):
902				_ph.remove(None)
903		msqlstr= 'SELECT * FROM %(database)s.phrases WHERE mlen = ? and clen = ? %(condition)s AND phrase = ? ;' % { 'database':database, 'condition':_condition }
904		if self.db.execute(msqlstr, _ph).fetchall():
905			sqlstr = 'DELETE FROM %(database)s.phrases WHERE mlen = ? AND clen =? %(condition)s AND phrase = ?  ;' % { 'database':database, 'condition':_condition }
906			self.db.execute(sqlstr,_ph)
907			self.db.commit()
908
909		msqlstr= 'SELECT * FROM mudb.phrases WHERE mlen = ? and clen = ? %(condition)s AND phrase = ? ;' % { 'condition':_condition }
910		if self.db.execute(msqlstr, _ph).fetchall():
911			sqlstr = 'DELETE FROM mudb.phrases WHERE mlen = ? AND clen =? %(condition)s AND phrase = ?  ;' % {  'condition':_condition }
912			self.db.execute(sqlstr,_ph)
913			self.db.commit()
914