/python/engine/PinYin/tools/phrase_filter.py
Python | 28 lines | 24 code | 4 blank | 0 comment | 7 complexity | d6668bb1bd225e986ba01842887fa295 MD5 | raw file
- import sys
- sys.path.append ("..")
- import bz2
- def load_phrase (_file):
- tmp = []
- for l in _file:
- phrase = unicode (l, "utf8").strip().split()[0]
- tmp.append (phrase)
- return tmp
- def main (filenames, in_file):
- phrase_dict = set([])
- for filename in filenames:
- tmp = load_phrase (bz2.BZ2File (filename, "r"))
- phrase_dict |= set(tmp)
- for line in sys.stdin:
- line = unicode (line, "utf8").strip ()
- phrase = line.split ()[0]
- if (phrase in phrase_dict) == in_file:
- print line.encode ("utf8")
- if __name__ == "__main__":
- if sys.argv[1] == "-v":
- main (sys.argv[2:], False)
- else:
- main (sys.argv[1:], True)