PageRenderTime 108ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 0ms

/python/engine/PinYin/tools/duoyin_filter.py

http://scim-python.googlecode.com/
Python | 34 lines | 29 code | 5 blank | 0 comment | 8 complexity | c1c3b037dff5c72a98e53bff45ad9f2c MD5 | raw file
  1. import sys
  2. import bz2
  3. sys.path.append ("..")
  4. import PYUtil
  5. def load_pinyin_table ():
  6. hanzi_dict = PYUtil.load_pinyin_table (open ("../../../../data/pinyin_table.txt"))
  7. tmp = {}
  8. for key, value in hanzi_dict.items ():
  9. pinyins = []
  10. for pinyin, freq in value.items ():
  11. pinyins.append (pinyin)
  12. tmp[key] = pinyins
  13. return tmp
  14. def main ():
  15. hanzi = load_pinyin_table ()
  16. duoyin = []
  17. for key , value in hanzi.items ():
  18. if len (value) > 1:
  19. duoyin.append (key)
  20. hanzi = set(duoyin)
  21. for l in sys.stdin:
  22. l = l.strip ()
  23. phrase = unicode (l, "utf8").split()[0]
  24. for c in phrase:
  25. if c in hanzi:
  26. print l
  27. break
  28. if __name__ == "__main__":
  29. main ()