/python/engine/PinYin/tools/filter_duoyin.py
http://scim-python.googlecode.com/ · Python · 23 lines · 19 code · 4 blank · 0 comment · 6 complexity · 591d41675a244a76ae6b74346237407e MD5 · raw file
- import sets as Set
- i=0
- count = 0
- dic = {}
- dic_m = {}
- for l in file ("pinyin_table.txt"):
- ll = unicode(l,"utf8")[:-1]
- p = (ll).split(u"\t")
- if p[0] not in dic:
- dic[p[0]] = [(p[1],p[2])]
- else:
- if p[0] not in dic_m:
- dic_m[p[0]] = dic[p[0]]
- dic_m[p[0]].append((p[1],p[2]))
- for k, v in dic_m.items ():
- print k.encode ("utf-8"),
- for l in v:
- print l[0],l[1],
- print