PageRenderTime 5ms CodeModel.GetById 1ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 0ms

/python/engine/PinYin/tools/phrase_filter.py

http://scim-python.googlecode.com/
Python | 28 lines | 24 code | 4 blank | 0 comment | 10 complexity | d6668bb1bd225e986ba01842887fa295 MD5 | raw file
 1import sys
 2sys.path.append ("..")
 3import bz2
 4
 5def load_phrase (_file):
 6	tmp = []
 7	for l in _file:
 8		phrase = unicode (l, "utf8").strip().split()[0]
 9		tmp.append (phrase)
10	return tmp
11
12def main (filenames, in_file):
13	phrase_dict = set([])
14	for filename in filenames:
15		tmp = load_phrase (bz2.BZ2File (filename, "r"))
16		phrase_dict |= set(tmp)
17
18	for line in sys.stdin:
19		line = unicode (line, "utf8").strip ()
20		phrase = line.split ()[0]
21		if (phrase in phrase_dict) == in_file:
22			print line.encode ("utf8")
23
24if __name__ == "__main__":
25	if sys.argv[1] == "-v":
26		main (sys.argv[2:], False)
27	else:
28		main (sys.argv[1:], True)