/python/engine/PinYin/tools/qq_conv.py

http://scim-python.googlecode.com/ · Python · 45 lines · 37 code · 8 blank · 0 comment · 10 complexity · 879063f8b1d7aa348a44db56f229a177 MD5 · raw file

  1. import sys
  2. sys.path.append ("..")
  3. import PYUtil
  4. def main ():
  5. hanzi_dict = PYUtil.load_pinyin_table (open ("../pinyin_table.txt"))
  6. line_no = 0
  7. for line in sys.stdin:
  8. line = unicode (line, "utf8").strip ()
  9. strings = []
  10. pinyins = []
  11. pinyin = None
  12. for c in line:
  13. if c not in u"abcdefghijklmnopqrstuvwxyz":
  14. pinyins.append (pinyin)
  15. pinyin = None
  16. strings.append (c)
  17. else:
  18. if pinyin == None:
  19. pinyin = c
  20. else:
  21. pinyin = pinyin + c
  22. pinyins.append (pinyin)
  23. pinyins = pinyins[1:]
  24. try:
  25. for i in range (0, len (strings)):
  26. if pinyins[i] == None:
  27. c = strings[i]
  28. assert len (hanzi_dict[c]) == 1
  29. pinyins[i] = hanzi_dict[c].keys()[0]
  30. except:
  31. print >> sys.stderr, "%d : error!" % line_no
  32. continue
  33. output = u"%s\t%s" % (u"".join (strings), u"'".join (pinyins))
  34. print output.encode ("utf8")
  35. line_no += 1
  36. if __name__ == "__main__":
  37. main ()