PageRenderTime 21ms CodeModel.GetById 9ms app.highlight 7ms RepoModel.GetById 1ms app.codeStats 0ms

/python/engine/PinYin/annotate.py

http://scim-python.googlecode.com/
Python | 46 lines | 21 code | 3 blank | 22 comment | 8 complexity | 16d0aaa780d2a5246d3e52aadf58c0b3 MD5 | raw file
 1import ZhengJu
 2editor = ZhengJu.Editor()
 3i=0
 4count = 0
 5for l in file ("google_annotated_uniq.txt"):
 6	ll = unicode(l,"utf8")
 7	p = (ll).split(u"\t")
 8	#~ print p[0]
 9	if len(p[0])==1:
10		count += 1
11		i += int(p[2])
12	#~ try:
13		#~ editor.reverse(p[0])
14		#~ pinyin = "'".join (map (str, editor.wordlist))
15		#~ print (p[0] + "\t" + pinyin + "\t" + p[1]).encode ("utf-8"),
16	#~ except:
17		#~ pass
18print i,count
19
20#~ for l in file ("google.txt"):
21	#~ ll = unicode(l,"utf8")
22	#~ p = (ll).split(u"\t")
23	#~ print p[0]
24	#~ if len(p[0])==1:
25		#~ count += 1
26		#~ i += int(p[1])
27		#~ print p[0].encode ("utf-8")
28	#~ try:
29		#~ editor.reverse(p[0])
30		#~ pinyin = "'".join (map (str, editor.wordlist))
31		#~ print (p[0] + "\t" + pinyin + "\t" + p[1]).encode ("utf-8"),
32	#~ except:
33		#~ pass
34#~ print i,count
35
36import sets as Set
37i=0
38count = 0
39s = Set.Set()
40for l in file ("google_annotated.txt"):
41	ll = unicode(l,"utf8")
42	p = (ll).split(u"\t")
43	if p[0] not in s:
44		s.add(p[0])
45		print ll.encode ("utf-8"),
46