PageRenderTime 21ms CodeModel.GetById 15ms app.highlight 2ms RepoModel.GetById 1ms app.codeStats 0ms

/doc/ICTCLAS_Diary/2006-03-13.rtf

http://ictclas4j.googlecode.com/
Unknown | 25 lines | 24 code | 1 blank | 0 comment | 0 complexity | 818b8528d154d1225b5e8b1b6decf92d MD5 | raw file
 1{\rtf1\ansi\ansicpg0\uc1\deff0\deflang0\deflangfe0{\fonttbl{\f0\fnil\fcharset1 Arial;}{\f1\fnil\fcharset2 Symbol;}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;\red3\green126\blue226;\red43\green94\blue196;}{\*\listtable{\list\listtemplateid1273317153\listsimple1
 2{\listlevel\levelnfc23\leveljc0\li240\fi-240\jclisttab\tx390{\leveltext\'01\'b7;}{\levelnumbers;}\f1\fs20\lang1024}
 3\listid20721068}
 4}
 5{\*\listoverridetable
 6{\listoverride\listid20721068\listoverridecount0\ls1}
 7}
 8
 9\uc1
10\pard\fi0\li0\ql\ri0\sb0\sa0\itap0 \plain \f0\fs20 CSpan::GetBestPOS()
11\par \pard\li240\fi-240\jclisttab\tx390\ql\ri0\sb0\sa0\itap0 {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 Trace back the result of Disamb() to get the best sequence of POS tags
12\plain\par {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 \plain \f0\fs20 Output parameter: m_nBestTag[]
13\plain\par {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 \plain \f0\fs20 The virtual ending in m_nBestTag[] is indicated by "-1"
14\plain\par {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 \plain \f0\fs20 Note: m_nsWords[i][0]==0 means "virtual ending" at i
15\plain\par \pard\fi0\li0\ql\ri0\sb0\sa0\itap0 \plain \f0\fs20 
16\par \pard\fi0\li0\ql\ri0\sb0\sa0\itap0 CSpan::Disamb()
17\par \pard\li240\fi-240\jclisttab\tx390\ql\ri0\sb0\sa0\itap0 {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 the first step in CSpan::GetBestPOS()
18\plain\par {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 \plain \f0\fs20 Very probably here is the implementation of Viterbi algorithm
19\plain\par \pard\fi0\li0\ql\ri0\sb0\sa0\itap0 \plain \f0\fs20 
20\par \pard\fi0\li0\ql\ri0\sb0\sa0\itap0 
21\par \pard\fi0\li0\ql\ri0\sb0\sa0\itap0 class CSpan:
22\par \pard\li240\fi-240\jclisttab\tx390\ql\ri0\sb0\sa0\itap0 {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 m_dFrequency[i][j] = (accumulative?) path cost
23\line [From CSpan::GetFrom()] m_dFrequency ~= -log(pWordItems[i].dValue)+log(m_context.GetFrequency(aTag)) + ... [to be updated by the procedure after GetFrom()]
24\plain\par {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 \plain \f0\fs20 m_nStartPos: in POSTagging(), only updated in Reset(false). Probably, this variable record the position of the word (index of a char array) that should start to scan next turn
25\plain\par {\listtext\pard\plain\f1\fs20\lang1024 \'b7\tab}\ls1\ilvl0 \plain \f0\fs20 m_nTags[i][j]: }