/python/engine/PinYin/tools/qq_conv.py
Python | 45 lines | 41 code | 4 blank | 0 comment | 11 complexity | 879063f8b1d7aa348a44db56f229a177 MD5 | raw file
1import sys 2sys.path.append ("..") 3import PYUtil 4 5 6def main (): 7 hanzi_dict = PYUtil.load_pinyin_table (open ("../pinyin_table.txt")) 8 line_no = 0 9 for line in sys.stdin: 10 line = unicode (line, "utf8").strip () 11 strings = [] 12 pinyins = [] 13 pinyin = None 14 for c in line: 15 if c not in u"abcdefghijklmnopqrstuvwxyz": 16 pinyins.append (pinyin) 17 pinyin = None 18 strings.append (c) 19 else: 20 if pinyin == None: 21 pinyin = c 22 else: 23 pinyin = pinyin + c 24 pinyins.append (pinyin) 25 pinyins = pinyins[1:] 26 27 try: 28 for i in range (0, len (strings)): 29 if pinyins[i] == None: 30 c = strings[i] 31 assert len (hanzi_dict[c]) == 1 32 pinyins[i] = hanzi_dict[c].keys()[0] 33 except: 34 print >> sys.stderr, "%d : error!" % line_no 35 continue 36 37 output = u"%s\t%s" % (u"".join (strings), u"'".join (pinyins)) 38 print output.encode ("utf8") 39 line_no += 1 40 41 42 43 44if __name__ == "__main__": 45 main ()