PageRenderTime 897ms CodeModel.GetById 624ms app.highlight 5ms RepoModel.GetById 267ms app.codeStats 0ms

/python/engine/PinYin/tools/check_pinyin.py

http://scim-python.googlecode.com/
Python | 47 lines | 42 code | 5 blank | 0 comment | 19 complexity | 95c90e9ffd5e58b77ad688b5528cde67 MD5 | raw file
 1import sys
 2import bz2
 3sys.path.append ("..")
 4import PYUtil
 5
 6def load_pinyin_table ():
 7	hanzi_dict = PYUtil.load_pinyin_table (open ("../../../../data/pinyin_table.txt"))
 8	tmp = {}
 9	for key, value in hanzi_dict.items ():
10		pinyins = []
11		for pinyin, freq in value.items ():
12			pinyins.append (pinyin)
13		tmp[key] = pinyins
14
15	return tmp
16
17def check_pinyin (phrase, pinyin, hanzi):
18	pys = pinyin.split ("'")
19	_len = len (phrase)
20	if _len != len (pys):
21		return False
22	for i in range (0, _len):
23		c = phrase[i]
24		py = pys[i]
25		if c not in hanzi:
26			return False
27		if py not in hanzi[c]:
28			return False
29	return True
30
31def main ():
32	inv = False
33	if len(sys.argv) >= 2 and sys.argv[1] == "-v":
34		inv = True
35	hanzi = load_pinyin_table ()
36	for line in sys.stdin:
37		line = line.strip ()
38		tmp = unicode (line, "utf8").split()
39		phrase = tmp[0]
40		pinyin = tmp[1]
41		if check_pinyin (phrase, pinyin, hanzi) != inv:
42			print line
43		else:
44			continue
45
46if __name__ == "__main__":
47	main ()