PageRenderTime 10ms CodeModel.GetById 1ms app.highlight 6ms RepoModel.GetById 1ms app.codeStats 0ms

/python/engine/PinYin/tools/qq_conv.py

http://scim-python.googlecode.com/
Python | 45 lines | 41 code | 4 blank | 0 comment | 11 complexity | 879063f8b1d7aa348a44db56f229a177 MD5 | raw file
 1import sys
 2sys.path.append ("..")
 3import PYUtil
 4
 5
 6def main ():
 7	hanzi_dict = PYUtil.load_pinyin_table (open ("../pinyin_table.txt"))
 8	line_no = 0
 9	for line in sys.stdin:
10		line = unicode (line, "utf8").strip ()
11		strings = []
12		pinyins = []
13		pinyin = None
14		for c in line:
15			if c not in u"abcdefghijklmnopqrstuvwxyz":
16				pinyins.append (pinyin)
17				pinyin = None
18				strings.append (c)
19			else:
20				if pinyin == None:
21					pinyin = c
22				else:
23					pinyin = pinyin + c
24		pinyins.append (pinyin)
25		pinyins = pinyins[1:]
26		
27		try:
28			for i in range (0, len (strings)):
29				if pinyins[i] == None:
30					c = strings[i]
31					assert len (hanzi_dict[c]) == 1
32					pinyins[i] = hanzi_dict[c].keys()[0]
33		except:
34			print >> sys.stderr, "%d : error!" % line_no
35			continue
36
37		output = u"%s\t%s" % (u"".join (strings), u"'".join (pinyins))
38		print output.encode ("utf8")
39		line_no += 1
40
41				
42				
43
44if __name__ == "__main__":
45	main ()