/python/engine/PinYin/tools/qq_conv.py
http://scim-python.googlecode.com/ · Python · 45 lines · 37 code · 8 blank · 0 comment · 10 complexity · 879063f8b1d7aa348a44db56f229a177 MD5 · raw file
- import sys
- sys.path.append ("..")
- import PYUtil
- def main ():
- hanzi_dict = PYUtil.load_pinyin_table (open ("../pinyin_table.txt"))
- line_no = 0
- for line in sys.stdin:
- line = unicode (line, "utf8").strip ()
- strings = []
- pinyins = []
- pinyin = None
- for c in line:
- if c not in u"abcdefghijklmnopqrstuvwxyz":
- pinyins.append (pinyin)
- pinyin = None
- strings.append (c)
- else:
- if pinyin == None:
- pinyin = c
- else:
- pinyin = pinyin + c
- pinyins.append (pinyin)
- pinyins = pinyins[1:]
-
- try:
- for i in range (0, len (strings)):
- if pinyins[i] == None:
- c = strings[i]
- assert len (hanzi_dict[c]) == 1
- pinyins[i] = hanzi_dict[c].keys()[0]
- except:
- print >> sys.stderr, "%d : error!" % line_no
- continue
- output = u"%s\t%s" % (u"".join (strings), u"'".join (pinyins))
- print output.encode ("utf8")
- line_no += 1
-
-
- if __name__ == "__main__":
- main ()