/doc/ICTCLAS_Diary/2006-02-22.rtf

http://ictclas4j.googlecode.com/ · Unknown · 58 lines · 58 code · 0 blank · 0 comment · 0 complexity · 253b160868d9fd4bca0f2ef21a0bc1e4 MD5 · raw file

  1. {\rtf1\ansi\ansicpg1252\uc2\deff0\stshfdbch13\stshfloch0\stshfhich0\stshfbi0\deflang1033\deflangfe2052{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}
  2. {\f13\fnil\fcharset134\fprq2{\*\panose 02010600030101010101}SimSun{\*\falt \'cb\'ce\'cc\'e5};}{\f37\fnil\fcharset134\fprq2{\*\panose 02010600030101010101}@SimSun;}{\f38\froman\fcharset238\fprq2 Times New Roman CE;}
  3. {\f39\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f41\froman\fcharset161\fprq2 Times New Roman Greek;}{\f42\froman\fcharset162\fprq2 Times New Roman Tur;}{\f43\froman\fcharset177\fprq2 Times New Roman (Hebrew);}
  4. {\f44\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f45\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f46\froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f48\fswiss\fcharset238\fprq2 Arial CE;}
  5. {\f49\fswiss\fcharset204\fprq2 Arial Cyr;}{\f51\fswiss\fcharset161\fprq2 Arial Greek;}{\f52\fswiss\fcharset162\fprq2 Arial Tur;}{\f53\fswiss\fcharset177\fprq2 Arial (Hebrew);}{\f54\fswiss\fcharset178\fprq2 Arial (Arabic);}
  6. {\f55\fswiss\fcharset186\fprq2 Arial Baltic;}{\f56\fswiss\fcharset163\fprq2 Arial (Vietnamese);}{\f170\fnil\fcharset0\fprq2 SimSun Western{\*\falt \'cb\'ce\'cc\'e5};}{\f410\fnil\fcharset0\fprq2 @SimSun Western;}}{\colortbl;\red0\green0\blue0;
  7. \red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;
  8. \red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0
  9. \fs24\lang1033\langfe3076\loch\f0\hich\af0\dbch\af13\cgrid\langnp1033\langfenp3076 \snext0 Normal;}{\*\cs10 \additive \ssemihidden Default Paragraph Font;}{\*
  10. \ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tscellwidthfts0\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv
  11. \ql \li0\ri0\widctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \fs20\lang1024\langfe1024\cgrid\langnp1024\langfenp1024 \snext11 \ssemihidden Normal Table;}}{\*\latentstyles\lsdstimax156\lsdlockeddef0}{\*\listtable{\list\listtemplateid128256817
  12. \listsimple{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\f1\fs20 \fi-240\li240\jclisttab\tx390\lin240 }{\listname ;}\listid629849591}
  13. {\list\listtemplateid176119771\listsimple{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\f1\fs20 \fi-240\li240\jclisttab\tx390\lin240 }{\listname
  14. ;}\listid1019612489}{\list\listtemplateid151546482\listsimple{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace0\levelindent0{\leveltext\'02\'00.;}{\levelnumbers\'01;}\f1\fs20 \fi-240\li240\jclisttab\tx390\lin240 }
  15. {\listname ;}\listid1805721148}}{\*\listoverridetable{\listoverride\listid1019612489\listoverridecount0\ls1}{\listoverride\listid1805721148\listoverridecount0\ls2}{\listoverride\listid629849591\listoverridecount0\ls3}}{\*\rsidtbl \rsid1508032}
  16. {\*\generator Microsoft Word 11.0.6359;}{\info{\author SEEM}{\operator SEEM}{\creatim\yr2006\mo4\dy13\hr22\min53}{\revtim\yr2006\mo4\dy13\hr22\min55}{\version2}{\edmins2}{\nofpages1}{\nofwords208}{\nofchars1189}{\*\company CUHK}{\nofcharsws1395}
  17. {\vern24703}}\paperw12240\paperh15840\margl1800\margr1800\margt1440\margb1440\gutter0 \widowctrl\ftnbj\aenddoc\noxlattoyen\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\hyphcaps0\horzdoc\dghspace120\dgvspace120\dghorigin1701\dgvorigin1984\dghshow0\dgvshow3
  18. \jcompress\viewkind1\viewscale100\nolnhtadjtbl\rsidroot1508032 \fet0\sectd \linex0\sectdefaultcl\sftnbj {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta \hich .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta \hich .}}{\*\pnseclvl3
  19. \pndec\pnstart1\pnindent720\pnhang {\pntxta \hich .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta \hich )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb \hich (}{\pntxta \hich )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang
  20. {\pntxtb \hich (}{\pntxta \hich )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb \hich (}{\pntxta \hich )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb \hich (}{\pntxta \hich )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang
  21. {\pntxtb \hich (}{\pntxta \hich )}}\pard\plain \ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 \fs24\lang1033\langfe3076\loch\af0\hich\af0\dbch\af13\cgrid\langnp1033\langfenp3076 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  22. \hich\af1\dbch\af13\loch\f1 Meeting Summary:
  23. \par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 1.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls1\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
  24. \faauto\ls1\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 Try to find ways to obtain the 6-month PKU corpus. (will be used for training)}{
  25. \f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  26. \par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 2.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls1\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
  27. \faauto\ls1\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 Decided to add the component to the free ICTCLAS in order to recognize the nested names (organization name and some location names)
  28. \line \hich\af1\dbch\af13\loch\f1 The component would be HMM-based and need training. Only train this component should be enough. }{\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  29. \par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 3.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls1\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
  30. \faauto\ls1\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 In the future, may need to train the whole software with 6-month PKU corpus. May need to prepare for it.}{
  31. \f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  32. \par }\pard \ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  33. \par
  34. \par \hich\af1\dbch\af13\loch\f1 About tag-conversion:
  35. \par \hich\af1\dbch\af13\loch\f1 No matter which PKU corpus we will use\hich\af1\dbch\af13\loch\f1
  36. , we have to convert the PKU tag. The conversion is quite troublesome and cannot be done automatically. Training of the Role Model needs the corpus that (According to the paper - Chinese Named Entity Recognition Using Role Model page 8):
  37. \par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 1.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls2\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
  38. \faauto\ls2\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 The surname and\hich\af1\dbch\af13\loch\f1
  39. given name of a Chinese name should be distinguishably tagged (not using same tag). -- Done in 2003 PKU standard.}{\f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  40. \par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 2.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls2\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
  41. \faauto\ls2\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 The transliterated PER and transliterated LOC are not mixed with Chinese PER and LOC. -- Not done in 2003 PKU standard.}{
  42. \f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  43. \par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 3.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls2\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
  44. \faauto\ls2\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 The ambiguous l\hich\af1\dbch\af13\loch\f1 abel "/j" is replaced with its corresponding NE label. -- Not done in 2003 PKU standard.}{
  45. \f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  46. \par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 4.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls2\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
  47. \faauto\ls2\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 Different tag is used to distinguish different punctuations. -- Done in 2003 PKU standard.}{
  48. \f1\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  49. \par }\pard \ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  50. \par }\pard \qj \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 Except these tags, it seems that other tags should not be important.
  51. \par
  52. \par }\pard \ql \li0\ri0\nowidctlpar\faauto\rin0\lin0\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032
  53. \par
  54. \par \hich\af1\dbch\af13\loch\f1 Other findings:
  55. \par {\pntext\pard\plain\f1\fs20\lang0\langfe2052\langnp0 \hich\af1\dbch\af13\loch\f1 1.\tab}}\pard \ql \fi-240\li240\ri0\nowidctlpar\jclisttab\tx390{\*\pn \pnlvlbody\ilvl0\ls3\pnrnot0\pndec\pnf1\pnfs20\pnstart1\pnindent360\pnsp120\pnhang {\pntxta \hich .}}
  56. \faauto\ls3\rin0\lin240\itap0 {\f1\fs20\lang0\langfe2052\langnp0\langfenp2052\insrsid1508032 \hich\af1\dbch\af13\loch\f1 PKU regard Japanese name as foreign name. Therefore, it tag a Japanese name as a whole. It doesn't separate surname and given name.}{
  57. \insrsid1508032
  58. \par }}