/python/engine/PinYin/tools/filter_duoyin.py

http://scim-python.googlecode.com/ · Python · 23 lines · 19 code · 4 blank · 0 comment · 6 complexity · 591d41675a244a76ae6b74346237407e MD5 · raw file

  1. import sets as Set
  2. i=0
  3. count = 0
  4. dic = {}
  5. dic_m = {}
  6. for l in file ("pinyin_table.txt"):
  7. ll = unicode(l,"utf8")[:-1]
  8. p = (ll).split(u"\t")
  9. if p[0] not in dic:
  10. dic[p[0]] = [(p[1],p[2])]
  11. else:
  12. if p[0] not in dic_m:
  13. dic_m[p[0]] = dic[p[0]]
  14. dic_m[p[0]].append((p[1],p[2]))
  15. for k, v in dic_m.items ():
  16. print k.encode ("utf-8"),
  17. for l in v:
  18. print l[0],l[1],
  19. print