PageRenderTime 13ms CodeModel.GetById 2ms app.highlight 7ms RepoModel.GetById 2ms app.codeStats 0ms

/python/engine/PinYin/tools/duoyin_filter.py

http://scim-python.googlecode.com/
Python | 34 lines | 29 code | 5 blank | 0 comment | 14 complexity | c1c3b037dff5c72a98e53bff45ad9f2c MD5 | raw file
 1import sys
 2import bz2
 3sys.path.append ("..")
 4import PYUtil
 5
 6def load_pinyin_table ():
 7	hanzi_dict = PYUtil.load_pinyin_table (open ("../../../../data/pinyin_table.txt"))
 8	tmp = {}
 9	for key, value in hanzi_dict.items ():
10		pinyins = []
11		for pinyin, freq in value.items ():
12			pinyins.append (pinyin)
13		tmp[key] = pinyins
14
15	return tmp
16
17def main ():
18	hanzi = load_pinyin_table ()
19	duoyin = []
20	for key , value in hanzi.items ():
21		if len (value) > 1:
22			duoyin.append (key)
23	hanzi = set(duoyin)
24
25	for l in sys.stdin:
26		l = l.strip ()
27		phrase = unicode (l, "utf8").split()[0]
28		for c in phrase:
29			if c in hanzi:
30				print l
31				break
32
33if __name__ == "__main__":
34	main ()