PageRenderTime 27ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/python/engine/PinYin/tools/phrase_filter.py

http://scim-python.googlecode.com/
Python | 28 lines | 24 code | 4 blank | 0 comment | 7 complexity | d6668bb1bd225e986ba01842887fa295 MD5 | raw file
  1. import sys
  2. sys.path.append ("..")
  3. import bz2
  4. def load_phrase (_file):
  5. tmp = []
  6. for l in _file:
  7. phrase = unicode (l, "utf8").strip().split()[0]
  8. tmp.append (phrase)
  9. return tmp
  10. def main (filenames, in_file):
  11. phrase_dict = set([])
  12. for filename in filenames:
  13. tmp = load_phrase (bz2.BZ2File (filename, "r"))
  14. phrase_dict |= set(tmp)
  15. for line in sys.stdin:
  16. line = unicode (line, "utf8").strip ()
  17. phrase = line.split ()[0]
  18. if (phrase in phrase_dict) == in_file:
  19. print line.encode ("utf8")
  20. if __name__ == "__main__":
  21. if sys.argv[1] == "-v":
  22. main (sys.argv[2:], False)
  23. else:
  24. main (sys.argv[1:], True)