/python/engine/PinYin/tools/check_pinyin.py
Python | 47 lines | 42 code | 5 blank | 0 comment | 12 complexity | 95c90e9ffd5e58b77ad688b5528cde67 MD5 | raw file
- import sys
- import bz2
- sys.path.append ("..")
- import PYUtil
- def load_pinyin_table ():
- hanzi_dict = PYUtil.load_pinyin_table (open ("../../../../data/pinyin_table.txt"))
- tmp = {}
- for key, value in hanzi_dict.items ():
- pinyins = []
- for pinyin, freq in value.items ():
- pinyins.append (pinyin)
- tmp[key] = pinyins
- return tmp
- def check_pinyin (phrase, pinyin, hanzi):
- pys = pinyin.split ("'")
- _len = len (phrase)
- if _len != len (pys):
- return False
- for i in range (0, _len):
- c = phrase[i]
- py = pys[i]
- if c not in hanzi:
- return False
- if py not in hanzi[c]:
- return False
- return True
- def main ():
- inv = False
- if len(sys.argv) >= 2 and sys.argv[1] == "-v":
- inv = True
- hanzi = load_pinyin_table ()
- for line in sys.stdin:
- line = line.strip ()
- tmp = unicode (line, "utf8").split()
- phrase = tmp[0]
- pinyin = tmp[1]
- if check_pinyin (phrase, pinyin, hanzi) != inv:
- print line
- else:
- continue
- if __name__ == "__main__":
- main ()