PageRenderTime 69ms CodeModel.GetById 20ms app.highlight 43ms RepoModel.GetById 1ms app.codeStats 1ms

/python/engine/PinYin/PYDict.py

http://scim-python.googlecode.com/
Python | 368 lines | 335 code | 3 blank | 30 comment | 2 complexity | 3fbfc63d1c110d31132a5cd516c93ffa MD5 | raw file
  1# -*- coding: utf-8 -*-
  2# vim: set noet ts=4:
  3#
  4# scim-python
  5#
  6# Copyright (c) 2007-2008 Huang Peng <shawn.p.huang@gmail.com>
  7#
  8#
  9# This library is free software; you can redistribute it and/or
 10# modify it under the terms of the GNU Lesser General Public
 11# License as published by the Free Software Foundation; either
 12# version 2 of the License, or (at your option) any later version.
 13#
 14# This library is distributed in the hope that it will be useful,
 15# but WITHOUT ANY WARRANTY; without even the implied warranty of
 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17# GNU Lesser General Public License for more details.
 18#
 19# You should have received a copy of the GNU Lesser General Public
 20# License along with this program; if not, write to the
 21# Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 22# Boston, MA  02111-1307  USA
 23#
 24# $Id: $
 25#
 26N_ = lambda x : x
 27PINYIN_DICT = { 
 28	"a" : 1, "ai" : 2, "an" : 3, "ang" : 4, "ao" : 5,
 29	"ba" : 6, "bai" : 7, "ban" : 8, "bang" : 9, "bao" : 10,
 30	"bei" : 11, "ben" : 12, "beng" : 13, "bi" : 14, "bian" : 15,
 31	"biao" : 16, "bie" : 17, "bin" : 18, "bing" : 19, "bo" : 20,
 32	"bu" : 21, "ca" : 22, "cai" : 23, "can" : 24, "cang" : 25,
 33	"cao" : 26, "ce" : 27, "cen" : 28, "ceng" : 29, "ci" : 30,
 34	"cong" : 31, "cou" : 32, "cu" : 33, "cuan" : 34, "cui" : 35,
 35	"cun" : 36, "cuo" : 37, "cha" : 38, "chai" : 39, "chan" : 40,
 36	"chang" : 41, "chao" : 42, "che" : 43, "chen" : 44, "cheng" : 45,
 37	"chi" : 46, "chong" : 47, "chou" : 48, "chu" : 49, "chuai" : 50,
 38	"chuan" : 51, "chuang" : 52, "chui" : 53, "chun" : 54, "chuo" : 55,
 39	"da" : 56, "dai" : 57, "dan" : 58, "dang" : 59, "dao" : 60,
 40	"de" : 61, "dei" : 62,
 41	# "den" : 63, 
 42	"deng" : 64, "di" : 65,
 43	"dia" : 66, "dian" : 67, "diao" : 68, "die" : 69, "ding" : 70,
 44	"diu" : 71, "dong" : 72, "dou" : 73, "du" : 74, "duan" : 75,
 45	"dui" : 76, "dun" : 77, "duo" : 78, "e" : 79, "ei" : 80,
 46	"en" : 81, "er" : 82, "fa" : 83, "fan" : 84, "fang" : 85,
 47	"fei" : 86, "fen" : 87, "feng" : 88, "fo" : 89, "fou" : 90,
 48	"fu" : 91, "ga" : 92, "gai" : 93, "gan" : 94, "gang" : 95,
 49	"gao" : 96, "ge" : 97, "gei" : 98, "gen" : 99, "geng" : 100,
 50	"gong" : 101, "gou" : 102, "gu" : 103, "gua" : 104, "guai" : 105,
 51	"guan" : 106, "guang" : 107, "gui" : 108, "gun" : 109, "guo" : 110,
 52	"ha" : 111, "hai" : 112, "han" : 113, "hang" : 114, "hao" : 115,
 53	"he" : 116, "hei" : 117, "hen" : 118, "heng" : 119, "hong" : 120,
 54	"hou" : 121, "hu" : 122, "hua" : 123, "huai" : 124, "huan" : 125,
 55	"huang" : 126, "hui" : 127, "hun" : 128, "huo" : 129, "ji" : 130,
 56	"jia" : 131, "jian" : 132, "jiang" : 133, "jiao" : 134, "jie" : 135,
 57	"jin" : 136, "jing" : 137, "jiong" : 138, "jiu" : 139, "ju" : 140,
 58	"juan" : 141, "jue" : 142, "jun" : 143, "ka" : 144, "kai" : 145,
 59	"kan" : 146, "kang" : 147, "kao" : 148, "ke" : 149,
 60	# "kei" : 150,
 61	"ken" : 151, "keng" : 152, "kong" : 153, "kou" : 154, "ku" : 155,
 62	"kua" : 156, "kuai" : 157, "kuan" : 158, "kuang" : 159, "kui" : 160,
 63	"kun" : 161, "kuo" : 162, "la" : 163, "lai" : 164, "lan" : 165,
 64	"lang" : 166, "lao" : 167, "le" : 168, "lei" : 169, "leng" : 170,
 65	"li" : 171, "lia" : 172, "lian" : 173, "liang" : 174, "liao" : 175,
 66	"lie" : 176, "lin" : 177, "ling" : 178, "liu" : 179, 
 67	"lo" : 180,
 68	"long" : 181, "lou" : 182, "lu" : 183, "luan" : 184, "lue" : 185,
 69	"lun" : 186, "luo" : 187, "lv" : 188, "lve" : 189, "ma" : 190,
 70	"mai" : 191, "man" : 192, "mang" : 193, "mao" : 194, "me" : 195,
 71	"mei" : 196, "men" : 197, "meng" : 198, "mi" : 199, "mian" : 200,
 72	"miao" : 201, "mie" : 202, "min" : 203, "ming" : 204, "miu" : 205,
 73	"mo" : 206, "mou" : 207, "mu" : 208, "na" : 209, "nai" : 210,
 74	"nan" : 211, "nang" : 212, "nao" : 213, "ne" : 214, "nei" : 215,
 75	"nen" : 216, "neng" : 217, "ni" : 218, "nian" : 219, "niang" : 220,
 76	"niao" : 221, "nie" : 222, "nin" : 223, "ning" : 224, "niu" : 225,
 77	"ng" : 226, "nong" : 227, "nou" : 228, "nu" : 229, "nuan" : 230,
 78	"nue" : 231, "nuo" : 232, "nv" : 233, "nve" : 234, 	"o" : 235,
 79	"ou" : 236, "pa" : 237, "pai" : 238, "pan" : 239, "pang" : 240,
 80	"pao" : 241, "pei" : 242, "pen" : 243, "peng" : 244, "pi" : 245,
 81	"pian" : 246, "piao" : 247, "pie" : 248, "pin" : 249, "ping" : 250,
 82	"po" : 251, "pou" : 252, "pu" : 253, "qi" : 254, "qia" : 255,
 83	"qian" : 256, "qiang" : 257, "qiao" : 258, "qie" : 259, "qin" : 260,
 84	"qing" : 261, "qiong" : 262, "qiu" : 263, "qu" : 264, "quan" : 265,
 85	"que" : 266, "qun" : 267, "ran" : 268, "rang" : 269, "rao" : 270,
 86	"re" : 271, "ren" : 272, "reng" : 273, "ri" : 274, "rong" : 275,
 87	"rou" : 276, "ru" : 277, "ruan" : 278, "rui" : 279, "run" : 280,
 88	"ruo" : 281, "sa" : 282, "sai" : 283, "san" : 284, "sang" : 285,
 89	"sao" : 286, "se" : 287, "sen" : 288, "seng" : 289, "si" : 290,
 90	"song" : 291, "sou" : 292, "su" : 293, "suan" : 294, "sui" : 295,
 91	"sun" : 296, "suo" : 297, "sha" : 298, "shai" : 299, "shan" : 300,
 92	"shang" : 301, "shao" : 302, "she" : 303, "shei" : 304, "shen" : 305,
 93	"sheng" : 306, "shi" : 307, "shou" : 308, "shu" : 309, "shua" : 310,
 94	"shuai" : 311, "shuan" : 312, "shuang" : 313, "shui" : 314, "shun" : 315,
 95	"shuo" : 316, "ta" : 317, "tai" : 318, "tan" : 319, "tang" : 320,
 96	"tao" : 321, "te" : 322,
 97	# "tei" : 323, 
 98	"teng" : 324, "ti" : 325,
 99	"tian" : 326, "tiao" : 327, "tie" : 328, "ting" : 329, "tong" : 330,
100	"tou" : 331, "tu" : 332, "tuan" : 333, "tui" : 334, "tun" : 335,
101	"tuo" : 336, "wa" : 337, "wai" : 338, "wan" : 339, "wang" : 340,
102	"wei" : 341, "wen" : 342, "weng" : 343, "wo" : 344, "wu" : 345,
103	"xi" : 346, "xia" : 347, "xian" : 348, "xiang" : 349, "xiao" : 350,
104	"xie" : 351, "xin" : 352, "xing" : 353, "xiong" : 354, "xiu" : 355,
105	"xu" : 356, "xuan" : 357, "xue" : 358, "xun" : 359, "ya" : 360,
106	"yan" : 361, "yang" : 362, "yao" : 363, "ye" : 364, "yi" : 365,
107	"yin" : 366, "ying" : 367, "yo" : 368, "yong" : 369, "you" : 370,
108	"yu" : 371, "yuan" : 372, "yue" : 373, "yun" : 374, "za" : 375,
109	"zai" : 376, "zan" : 377, "zang" : 378, "zao" : 379, "ze" : 380,
110	"zei" : 381, "zen" : 382, "zeng" : 383, "zi" : 384, "zong" : 385,
111	"zou" : 386, "zu" : 387, "zuan" : 388, "zui" : 389, "zun" : 390,
112	"zuo" : 391, "zha" : 392, "zhai" : 393, "zhan" : 394, "zhang" : 395,
113	"zhao" : 396, "zhe" : 397, "zhen" : 398, "zheng" : 399, "zhi" : 400,
114	"zhong" : 401, "zhou" : 402, "zhu" : 403, "zhua" : 404, "zhuai" : 405,
115	"zhuan" : 406, "zhuang" : 407, "zhui" : 408, "zhun" : 409, "zhuo" : 410,
116	# some weird pinyins
117	#~ "eng" : 411, "chua" : 412, "fe" : 413, "fiao" : 414, "liong" : 415
118	}
119
120PINYIN_LIST = PINYIN_DICT.keys ()
121
122ID_PINYIN_DICT = {}
123for pinyin, id in PINYIN_DICT.items ():
124	ID_PINYIN_DICT[id] = pinyin
125
126SHENGMU_DICT = { 
127	"" : 0, "b" : 1, "p" : 2, "m" : 3, "f" : 4, "d" : 5,
128	"t" : 6, "n" : 7, "l" : 8, "g" : 9, "k" : 10, "h" : 11,
129	"j" : 12, "q" : 13, "x" : 14, "zh" : 15, "ch" : 16, "sh" : 17,
130	"r" : 18, "z" : 19, "c" : 20, "s" : 21, "y" : 22, "w" : 23
131}
132SHENGMU_LIST = SHENGMU_DICT.keys ()
133#~ PINYIN_PARTIAL_LIST = []
134#~ for p in PINYIN_LIST:
135	#~ for i in range (2, len (p)):
136		#~ if not (p[:i] in PINYIN_LIST or p[:i] in SHENGMU_LIST or p[:i] in PINYIN_PARTIAL_LIST):
137			#~ PINYIN_PARTIAL_LIST.append (p[:i])
138#~ print PINYIN_PARTIAL_LIST
139PINYIN_PARTIAL_LIST = [
140'ro', 'zo', 'zon', 'so', 'son', 'tua', 'zua', 'no', 
141'non', 'be', 'ho', 'cua', 'jio', 'jion', 'xua', 'ko', 
142'kon', 'we', 'go', 'tia', 'fi', 'fia', 'fe', 'din', 'ra', 
143'yon', 'do', 'don', 'ron', 'to', 'nia', 'qua', 'cho', 
144'chon', 'mia', 'den', 'sho', 'nua', 'ten', 'co', 'ton', 
145'pe', 'tin', 'sua', 'xio', 'xion', 'pia', 'bia', 'dua', 
146'con', 'hon', 'jua', 'yua', 'zho', 'zhon', 'rua', 'lua', 
147'lio', 'lion', 'qio', 'qion', 'lon', 'gon', 'len']
148
149ID_SHENGMU_DICT = {}
150
151
152for shengmu, id in SHENGMU_DICT.items ():
153	ID_SHENGMU_DICT[id] = shengmu
154
155MOHU_SHENGMU = {
156	"z"	 : ("z", "zh"),
157	"zh" : ("z", "zh"),
158	"c"	 : ("c", "ch"),
159	"ch" : ("c", "ch"),
160	"s"	 : ("s", "sh"),
161	"sh" : ("s", "sh"),
162	"l"	 : ("l", "n"),
163	"n"  : ("l", "n")
164}
165
166MOHU_YUNMU = {
167	"an"  : ("an", "ang"),
168	"ang" : ("an", "ang"),
169	"en"  : ("en", "eng"),
170	"eng" : ("en", "eng"),
171	"in"  : ("in", "ing"),
172	"ing" : ("in", "ing")
173}
174
175MSPY_SHUANGPIN_SHENGMU_DICT = {
176	"b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
177	"h" : "h", "i" : "ch","j" : "j", "k" : "k", "l" : "l",
178	"m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
179	"r" : "r", "s" : "s", "t" : "t", "u" : "sh","v" : "zh",
180	"w" : "w", "x" : "x", "y" : "y", "z" : "z"
181}
182
183MSPY_SHUANGPIN_YUNMU_DICT = {
184	"a" : ("a",),
185	"b" : ("ou",),
186	"c" : ("iao",),
187	"d" : ("uang", "iang"),
188	"e" : ("e",),
189	"f" : ("en",),
190	"g" : ("eng", "ng"),
191	"h" : ("ang",),
192	"i" : ("i",),
193	"j" : ("an",),
194	"k" : ("ao",),
195	"l" : ("ai",),
196	"m" : ("ian",),
197	"n" : ("in",),
198	"o" : ("uo", "o"),
199	"p" : ("un",),
200	"q" : ("iu",),
201	"r" : ("uan", "er"),
202	"s" : ("ong", "iong"),
203	"t" : ("ue",),
204	"u" : ("u",),
205	"v" : ("ui","ue"),
206	"w" : ("ia","ua"),
207	"x" : ("ie",),
208	"y" : ("uai", "v"),
209	"z" : ("ei",),
210	";" : ("ing",)
211}
212
213ZRM_SHUANGPIN_SHENGMU_DICT = {
214	"b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
215	"h" : "h", "i" : "ch","j" : "j", "k" : "k", "l" : "l",
216	"m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
217	"r" : "r", "s" : "s", "t" : "t", "u" : "sh","v" : "zh",
218	"w" : "w", "x" : "x", "y" : "y", "z" : "z"
219}
220
221ZRM_SHUANGPIN_YUNMU_DICT = {
222	"a" : ("a",),
223	"b" : ("ou",),
224	"c" : ("iao",),
225	"d" : ("uang", "iang"),
226	"e" : ("e",),
227	"f" : ("en",),
228	"g" : ("eng", "ng"),
229	"h" : ("ang",),
230	"i" : ("i",),
231	"j" : ("an",),
232	"k" : ("ao",),
233	"l" : ("ai",),
234	"m" : ("ian",),
235	"n" : ("in",),
236	"o" : ("uo", "o"),
237	"p" : ("un",),
238	"q" : ("iu",),
239	"r" : ("uan", "er"),
240	"s" : ("ong", "iong"),
241	"t" : ("ue",),
242	"u" : ("u",),
243	"v" : ("ui","v"),
244	"w" : ("ia","ua"),
245	"x" : ("ie",),
246	"y" : ("uai", "ing"),
247	"z" : ("ei",),
248}
249
250ABC_SHUANGPIN_SHENGMU_DICT = {
251	"a" : "zh", "b" : "b", "c" : "c", "d" : "d", "e":"ch", "f" : "f", "g" : "g",
252	"h" : "h", "j" : "j", "k" : "k", "l" : "l",
253	"m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
254	"r" : "r", "s" : "s", "t" : "t", "v" : "sh",
255	"w" : "w", "x" : "x", "y" : "y", "z" : "z"
256}
257
258ABC_SHUANGPIN_YUNMU_DICT = {
259	"a" : ("a",),
260	"b" : ("ou",),
261	"c" : ("in","uai"),
262	"d" : ("ia", "ua"),
263	"e" : ("e",),
264	"f" : ("en",),
265	"g" : ("eng", "ng"),
266	"h" : ("ang",),
267	"i" : ("i",),
268	"j" : ("an",),
269	"k" : ("ao",),
270	"l" : ("ai",),
271	"m" : ("ue","ui"),
272	"n" : ("un",),
273	"o" : ("uo", "o"),
274	"p" : ("uan",),
275	"q" : ("ei",),
276	"r" : ("er", "iu"),
277	"s" : ("ong", "iong"),
278	"t" : ("iang","uang"),
279	"u" : ("u",),
280	"v" : ("v","ue"),
281	"w" : ("ian",),
282	"x" : ("ie",),
283	"y" : ("ing",),
284	"z" : ("iao",),
285}
286
287PYJJ_SHUANGPIN_SHENGMU_DICT = {
288	"a" : "'", "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
289	"h" : "h", "i" : "sh","j" : "j", "k" : "k", "l" : "l",
290	"m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
291	"r" : "r", "s" : "s", "t" : "t", "u" : "ch","v" : "zh",
292	"w" : "w", "x" : "x", "y" : "y", "z" : "z"
293}
294
295PYJJ_SHUANGPIN_YUNMU_DICT = {
296	"a" : ("a",),
297	"b" : ("ia","ua"),
298	"c" : ("uan",),
299	"d" : ("ao", ),
300	"e" : ("e",),
301	"f" : ("an",),
302	"g" : ("ang",),
303	"h" : ("iang","uang"),
304	"i" : ("i",),
305	"j" : ("ian",),
306	"k" : ("iao",),
307	"l" : ("in",),
308	"m" : ("ie",),
309	"n" : ("iu",),
310	"o" : ("uo", "o"),
311	"p" : ("ou",),
312	"q" : ("er","ing"),
313	"r" : ("en", ),
314	"s" : ("ai", ),
315	"t" : ("eng", "ng"),
316	"u" : ("u",),
317	"v" : ("v","ui"),
318	"w" : ("ei",),
319	"x" : ("uai","ue"),
320	"y" : ("ong","iong"),
321	"z" : ("un",),
322}
323
324ZGPY_SHUANGPIN_SHENGMU_DICT = {
325	"a": "ch", "b" : "b", "c" : "c", "d" : "d", "f" : "f", "g" : "g",
326	"h" : "h", "i" : "sh","j" : "j", "k" : "k", "l" : "l",
327	"m" : "m", "n" : "n", "o" : "'", "p" : "p", "q" : "q",
328	"r" : "r", "s" : "s", "t" : "t", "u" : "zh",
329	"w" : "w", "x" : "x", "y" : "y", "z" : "z"
330}
331
332ZGPY_SHUANGPIN_YUNMU_DICT = {
333	"a" : ("a", ),
334	"b" : ("iao", ),
335	"d" : ("ie", ),
336	"e" : ("e", ),
337	"f" : ("ian", ),
338	"g" : ("iang", "uang"),
339	"h" : ("ong", "iong"),
340	"i" : ("i", ),
341	"j" : ("er", "iu"),
342	"k" : ("ei", ),
343	"l" : ("uan", ),
344	"m" : ("un", ),
345	"n" : ("ue", "ui"),
346	"o" : ("uo", "o"),
347	"p" : ("ai", ),
348	"q" : ("ao", ),
349	"r" : ("an", ),
350	"s" : ("ang", ),
351	"t" : ("eng", "ng"),
352	"u" : ("u", ),
353	"v" : ("v", ),
354	"w" : ("en", ),
355	"x" : ("ia", "ua"),
356	"y" : ("in", "uai"),
357	"z" : ("ou" ,),
358	";" : ("ing", )
359}
360
361SHUANGPIN_SCHEMAS = {
362    N_("MSPY") : (MSPY_SHUANGPIN_SHENGMU_DICT, MSPY_SHUANGPIN_YUNMU_DICT),
363    N_("ZRM")  : (ZRM_SHUANGPIN_SHENGMU_DICT, ZRM_SHUANGPIN_YUNMU_DICT),
364    N_("ABC")  : (ABC_SHUANGPIN_SHENGMU_DICT, ABC_SHUANGPIN_YUNMU_DICT),
365    N_("ZGPY") : (ZGPY_SHUANGPIN_SHENGMU_DICT, ZGPY_SHUANGPIN_YUNMU_DICT),
366    N_("PYJJ") : (PYJJ_SHUANGPIN_SHENGMU_DICT, PYJJ_SHUANGPIN_YUNMU_DICT)
367}
368