PageRenderTime 43ms CodeModel.GetById 34ms app.highlight 3ms RepoModel.GetById 1ms app.codeStats 1ms

/doc/ICTCLAS_Diary/2006-02-22.rtf

http://ictclas4j.googlecode.com/
Unknown | 58 lines | 58 code | 0 blank | 0 comment | 0 complexity | 253b160868d9fd4bca0f2ef21a0bc1e4 MD5 | raw file
 1{\rtf1\ansi\ansicpg1252\uc2\deff0\stshfdbch13\stshfloch0\stshfhich0\stshfbi0\deflang1033\deflangfe2052{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}
 2{\f13\fnil\fcharset134\fprq2{\*\panose 02010600030101010101}SimSun{\*\falt \'cb\'ce\'cc\'e5};}{\f37\fnil\fcharset134\fprq2{\*\panose 02010600030101010101}@SimSun;}{\f38\froman\fcharset238\fprq2 Times New Roman CE;}
 3{\f39\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f41\froman\fcharset161\fprq2 Times New Roman Greek;}{\f42\froman\fcharset162\fprq2 Times New Roman Tur;}{\f43\froman\fcharset177\fprq2 Times New Roman (Hebrew);}
 4{\f44\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f45\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f46\froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f48\fswiss\fcharset238\fprq2 Arial CE;}
 5{\f49\fswiss\fcharset204\fprq2 Arial Cyr;}{\f51\fswiss\fcharset161\fprq2 Arial Greek;}{\f52\fswiss\fcharset162\fprq2 Arial Tur;}{\f53\fswiss\fcharset177\fprq2 Arial (Hebrew);}{\f54\fswiss\fcharset178\fprq2 Arial (Arabic);}
 6{\f55\fswiss\fcharset186\fprq2 Arial Baltic;}{\f56\fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f170\fnil\fcharset0\fprq2 SimSun Western{\*\falt \'cb\'ce\'cc\'e5};}{\f410\fnil\fcharset0\fprq2 @SimSun Western;}}{\colortbl;\red0\green0\blue0;
 7\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;
 8\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 
 9\fs24\lang1033\langfe3076\loch\f0\hich\af0\dbch\af13\cgrid\langnp1033\langfenp3076 \snext0 Normal;}{\*\cs10 \additive \ssemihidden Default Paragraph Font;}{\*
10\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv 
11\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\*\listtable{\list\listtemplateid128256817
12\listsimple{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\f1\fs20 \fi-240\li240\jclisttab\tx390\lin240 }{\listname ;}\listid629849591}
13{\list\listtemplateid176119771\listsimple{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\f1\fs20 \fi-240\li240\jclisttab\tx390\lin240 }{\listname 
14;}\listid1019612489}{\list\listtemplateid151546482\listsimple{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\f1\fs20 \fi-240\li240\jclisttab\tx390\lin240 }
15{\listname ;}\listid1805721148}}{\*\listoverridetable{\listoverride\listid1019612489\listoverridecount0\ls1}{\listoverride\listid1805721148\listoverridecount0\ls2}{\listoverride\listid629849591\listoverridecount0\ls3}}{\*\rsidtbl \rsid1508032}
16{\*\generator Microsoft Word 11.0.6359;}{\info{\author SEEM}{\operator SEEM}{\creatim\yr2006\mo4\dy13\hr22\min53}{\revtim\yr2006\mo4\dy13\hr22\min55}{\version2}{\edmins2}{\nofpages1}{\nofwords208}{\nofchars1189}{\*\company CUHK}{\nofcharsws1395}
17{\vern24703}}\paperw12240\paperh15840\margl1800\margr1800\margt1440\margb1440\gutter0 \widowctrl\ftnbj\aenddoc\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\hyphcaps0\horzdoc\dghspace120\dgvspace120\dghorigin1701\dgvorigin1984\dghshow0\dgvshow3
18\jcompress\viewkind1\viewscale100\nolnhtadjtbl\rsidroot1508032 \fet0\sectd \linex0\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta \hich .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta \hich .}}{\*\pnseclvl3
19\pndec\pnstart1\pnindent720\pnhang {\pntxta \hich .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta \hich )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb \hich (}{\pntxta \hich )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang 
20{\pntxtb \hich (}{\pntxta \hich )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb \hich (}{\pntxta \hich )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb \hich (}{\pntxta \hich )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang 
21{\pntxtb \hich (}{\pntxta \hich )}}\pard\plain \ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 \fs24\lang1033\langfe3076\loch\af0\hich\af0\dbch\af13\cgrid\langnp1033\langfenp3076 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
22\hich\af1\dbch\af13\loch\f1 Meeting Summary:
23\par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 1.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls1\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
24\faauto\ls1\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 Try to find ways to obtain the 6-month PKU corpus. (will be used for training)}{
25\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
26\par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 2.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls1\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
27\faauto\ls1\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 Decided to add the component to the free ICTCLAS in order to recognize the nested names (organization name and some location names)
28\line \hich\af1\dbch\af13\loch\f1 The component would be HMM-based and need training. Only train this component should be enough. }{\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
29\par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 3.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls1\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
30\faauto\ls1\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 In the future, may need to train the whole software with 6-month PKU corpus. May need to prepare for it.}{
31\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
32\par }\pard \ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
33\par 
34\par \hich\af1\dbch\af13\loch\f1 About tag-conversion:
35\par \hich\af1\dbch\af13\loch\f1 No matter which PKU corpus we will use\hich\af1\dbch\af13\loch\f1 
36, we have to convert the PKU tag. The conversion is quite troublesome and cannot be done automatically. Training of the Role Model needs the corpus that (According to the paper - Chinese Named Entity Recognition Using Role Model page 8):
37\par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 1.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls2\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
38\faauto\ls2\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 The surname and\hich\af1\dbch\af13\loch\f1 
39 given name of a Chinese name should be distinguishably tagged (not using same tag). -- Done in 2003 PKU standard.}{\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
40\par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 2.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls2\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
41\faauto\ls2\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 The transliterated PER and transliterated LOC are not mixed with Chinese PER and LOC. -- Not done in 2003 PKU standard.}{
42\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
43\par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 3.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls2\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
44\faauto\ls2\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 The ambiguous l\hich\af1\dbch\af13\loch\f1 abel "/j" is replaced with its corresponding NE label. -- Not done in 2003 PKU standard.}{
45\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
46\par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 4.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls2\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
47\faauto\ls2\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 Different tag is used to distinguish different punctuations. -- Done in 2003 PKU standard.}{
48\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
49\par }\pard \ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
50\par }\pard \qj \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 Except these tags, it seems that other tags should not be important. 
51\par 
52\par }\pard \ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 
53\par 
54\par \hich\af1\dbch\af13\loch\f1 Other findings:
55\par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 1.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls3\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
56\faauto\ls3\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 PKU regard Japanese name as foreign name. Therefore, it tag a Japanese name as a whole. It doesn't separate surname and given name.}{
57\insrsid1508032 
58\par }}