PageRenderTime 145ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/python/engine/PinYin/tools/check_pinyin.py

http://scim-python.googlecode.com/
Python | 47 lines | 42 code | 5 blank | 0 comment | 12 complexity | 95c90e9ffd5e58b77ad688b5528cde67 MD5 | raw file
  1. import sys
  2. import bz2
  3. sys.path.append ("..")
  4. import PYUtil
  5. def load_pinyin_table ():
  6. hanzi_dict = PYUtil.load_pinyin_table (open ("../../../../data/pinyin_table.txt"))
  7. tmp = {}
  8. for key, value in hanzi_dict.items ():
  9. pinyins = []
  10. for pinyin, freq in value.items ():
  11. pinyins.append (pinyin)
  12. tmp[key] = pinyins
  13. return tmp
  14. def check_pinyin (phrase, pinyin, hanzi):
  15. pys = pinyin.split ("'")
  16. _len = len (phrase)
  17. if _len != len (pys):
  18. return False
  19. for i in range (0, _len):
  20. c = phrase[i]
  21. py = pys[i]
  22. if c not in hanzi:
  23. return False
  24. if py not in hanzi[c]:
  25. return False
  26. return True
  27. def main ():
  28. inv = False
  29. if len(sys.argv) >= 2 and sys.argv[1] == "-v":
  30. inv = True
  31. hanzi = load_pinyin_table ()
  32. for line in sys.stdin:
  33. line = line.strip ()
  34. tmp = unicode (line, "utf8").split()
  35. phrase = tmp[0]
  36. pinyin = tmp[1]
  37. if check_pinyin (phrase, pinyin, hanzi) != inv:
  38. print line
  39. else:
  40. continue
  41. if __name__ == "__main__":
  42. main ()