/python/engine/PinYin/tools/duoyin_filter.py
Python | 34 lines | 29 code | 5 blank | 0 comment | 8 complexity | c1c3b037dff5c72a98e53bff45ad9f2c MD5 | raw file
- import sys
- import bz2
- sys.path.append ("..")
- import PYUtil
- def load_pinyin_table ():
- hanzi_dict = PYUtil.load_pinyin_table (open ("../../../../data/pinyin_table.txt"))
- tmp = {}
- for key, value in hanzi_dict.items ():
- pinyins = []
- for pinyin, freq in value.items ():
- pinyins.append (pinyin)
- tmp[key] = pinyins
- return tmp
- def main ():
- hanzi = load_pinyin_table ()
- duoyin = []
- for key , value in hanzi.items ():
- if len (value) > 1:
- duoyin.append (key)
- hanzi = set(duoyin)
- for l in sys.stdin:
- l = l.strip ()
- phrase = unicode (l, "utf8").split()[0]
- for c in phrase:
- if c in hanzi:
- print l
- break
- if __name__ == "__main__":
- main ()