PageRenderTime 81ms CodeModel.GetById 10ms app.highlight 64ms RepoModel.GetById 2ms app.codeStats 0ms

/src/org/ictclas4j/bean/Span.java

http://ictclas4j.googlecode.com/
Java | 659 lines | 520 code | 56 blank | 83 comment | 140 complexity | 99cdb84be0bb0ff2f8ff3942bc454719 MD5 | raw file
  1package org.ictclas4j.bean;
  2
  3import java.util.ArrayList;
  4
  5import org.ictclas4j.utility.Utility;
  6import org.ictclas4j.utility.Utility.TAG_TYPE;
  7
  8
  9public class Span {
 10
 11	public ContextStat context;
 12
 13	TAG_TYPE tagType;
 14
 15	private int[][] m_nTags;
 16
 17	int[][] m_nBestPrev;
 18
 19	int m_nStartPos;
 20
 21	int[] m_nBestTag;
 22
 23	int m_nCurLength;
 24
 25	String[] m_sWords;
 26
 27	double[][] m_dFrequency;
 28
 29	public int[][] m_nUnknownWords;
 30
 31	public int m_nUnknownIndex;
 32
 33	public int[] m_nWordPosition;
 34
 35	public double[] m_dWordsPossibility;
 36
 37	public Span() {
 38		m_nTags = new int[Utility.MAX_WORDS_PER_SENTENCE][Utility.MAX_POS_PER_WORD];
 39		if (tagType != Utility.TAG_TYPE.TT_NORMAL)
 40			m_nTags[0][0] = 100;// Begin tag
 41		else
 42			m_nTags[0][0] = 0;// Begin tag
 43		m_nTags[0][1] = -1;
 44		m_nBestPrev = new int[Utility.MAX_WORDS_PER_SENTENCE][Utility.MAX_POS_PER_WORD];
 45		m_nBestTag = new int[Utility.MAX_WORDS_PER_SENTENCE];
 46		m_sWords = new String[Utility.MAX_WORDS_PER_SENTENCE];
 47		m_nUnknownWords = new int[Utility.MAX_UNKNOWN_PER_SENTENCE][2];
 48		m_nWordPosition = new int[Utility.MAX_WORDS_PER_SENTENCE];
 49		m_dWordsPossibility = new double[Utility.MAX_UNKNOWN_PER_SENTENCE];
 50		m_dFrequency = new double[Utility.MAX_WORDS_PER_SENTENCE][Utility.MAX_POS_PER_WORD];
 51
 52		tagType = Utility.TAG_TYPE.TT_NORMAL;
 53	}
 54
 55	public boolean loadContext(String fileName) {
 56		if (fileName != null) {
 57			context = new ContextStat();
 58			return context.load(fileName);
 59		}
 60		return false;
 61	}
 62
 63	public void setType(TAG_TYPE type) {
 64		tagType = type;
 65	}
 66
 67	public boolean posTagging(ArrayList<WordResult> wrList, Dictionary coreDict, Dictionary unknownDict) {
 68		int i = 0;
 69		int j, nStartPos;
 70		reset(false);
 71		while (i > -1 && i < wrList.size()) {
 72			nStartPos = i;// Start Position
 73			i = getFrom(wrList, nStartPos, coreDict, unknownDict);
 74			getBestPOS();
 75			switch (tagType) {
 76			case TT_NORMAL:// normal POS tagging
 77				j = 1;
 78				// Store the best POS tagging
 79				while (m_nBestTag[j] != -1 && j < m_nCurLength) {
 80					WordResult wr = wrList.get(j + nStartPos - 1);
 81					wr.setHandle(m_nBestTag[j]);
 82					// Let ?be 0
 83					// Exist and update its frequncy as a POS value
 84					if (wr.getValue() > 0 && coreDict.isExist(wr.getWord(), -1))
 85						wr.setValue(coreDict.getFreq(wr.getWord(), m_nBestTag[j]));
 86					j += 1;
 87				}
 88				break;
 89			case TT_PERSON:// Person recognition
 90				PersonRecognize(unknownDict);
 91				break;
 92			case TT_PLACE:// Place name recognition
 93			case TT_TRANS_PERSON:// Transliteration Person
 94				PlaceRecognize(coreDict, unknownDict);
 95				break;
 96			default:
 97				break;
 98			}
 99			reset();
100		}
101		return true;
102	}
103
104	public boolean reset(boolean isContinue) {
105		if (!isContinue) {
106			if (tagType != Utility.TAG_TYPE.TT_NORMAL)
107				m_nTags[0][0] = 100;// Begin tag
108			else
109				m_nTags[0][0] = 0;// Begin tag
110			m_nUnknownIndex = 0;
111			m_dFrequency[0][0] = 0;
112			m_nStartPos = 0;
113		} else {
114			// Get the last POS in the last sentence
115			m_nTags[0][0] = m_nTags[m_nCurLength - 1][0];
116			m_dFrequency[0][0] = m_dFrequency[m_nCurLength - 1][0];
117		}
118
119		// Get the last POS in the last sentence,set the -1 as end flag
120		m_nTags[0][1] = -1;
121		m_nCurLength = 1;
122		m_nWordPosition[1] = m_nStartPos;
123		m_sWords[0] = null;
124		return true;
125	}
126
127	public boolean reset() {
128		return reset(true);
129	}
130
131	private boolean disamb() {
132		int i, j, k, nMinCandidate;
133		double dMinFee = 0;
134		double dTmp = 0;
135
136		for (i = 1; i < m_nCurLength; i++)// For every word
137		{
138			for (j = 0; m_nTags[i][j] >= 0; j++)// For every word
139			{
140				nMinCandidate = Utility.MAX_POS_PER_WORD + 1;
141				for (k = 0; m_nTags[i - 1][k] >= 0; k++) {
142					// ConvertPOS(m_nTags[i-1][k],&nKey,&nPrevPOS);
143					// ConvertPOS(m_nTags[i][j],&nKey,&nCurPOS);
144					// dTmp=m_context.GetContextPossibility(nKey,nPrevPOS,nCurPOS);
145					dTmp = -Math.log(context.getPossibility(0, m_nTags[i - 1][k], m_nTags[i][j]));
146					dTmp += m_dFrequency[i - 1][k];// Add the fees
147					if (nMinCandidate > 10 || dTmp < dMinFee)// Get the
148					// minimum fee
149					{
150						nMinCandidate = k;
151						dMinFee = dTmp;
152					}
153				}
154				m_nBestPrev[i][j] = nMinCandidate;// The best previous for j
155				m_dFrequency[i][j] = m_dFrequency[i][j] + dMinFee;
156			}
157		}
158
159		return true;
160	}
161
162	public boolean getBestPOS() {
163		disamb();
164		for (int i = m_nCurLength - 1, j = 0; i > 0; i--)// ,j>=0
165		{
166			if (m_sWords[i] != null) {// Not virtual ending
167				m_nBestTag[i] = m_nTags[i][j];// Record the best POS and its
168				// possibility
169			}
170			j = m_nBestPrev[i][j];
171		}
172		int nEnd = m_nCurLength;// Set the end of POS tagging
173		if (m_sWords[m_nCurLength - 1] == null)
174			nEnd = m_nCurLength - 1;
175		m_nBestTag[nEnd] = -1;
176		return true;
177	}
178
179	/**
180	 * ?????dictUnknown????????????
181	 * @param wrList
182	 * @param index
183	 * @param coreDict
184	 * @param unknownDict
185	 * @return
186	 */
187	public int getFrom(ArrayList<WordResult> wrList, int index, Dictionary coreDict, Dictionary unknownDict) {
188
189		int[] aPOS = new int[Utility.MAX_POS_PER_WORD];
190		int[] aFreq = new int[Utility.MAX_POS_PER_WORD];
191		int nFreq = 0, j, nRetPos = 0, nWordsIndex = 0;
192		boolean bSplit = false;// Need to split in Transliteration recognition
193		int i = 1, nPOSCount;
194		String sCurWord;// Current word
195		nWordsIndex = index ;
196
197		for (; i < Utility.MAX_WORDS_PER_SENTENCE && nWordsIndex < wrList.size(); i++) {
198			WordResult wr = wrList.get(nWordsIndex);
199			String word = wr.getWord();
200			if (tagType == Utility.TAG_TYPE.TT_NORMAL || !unknownDict.isExist(word, 44)) {
201				// current word
202				m_sWords[i] = word;// store
203				m_nWordPosition[i + 1] = m_nWordPosition[i] + m_sWords[i].getBytes().length;
204			}  
205			
206			// Record the position of current word
207			m_nStartPos = m_nWordPosition[i + 1];
208			// Move the Start POS to the ending
209			if (tagType != Utility.TAG_TYPE.TT_NORMAL) {
210				// Get the POSs from the unknown recognition dictionary
211				sCurWord = m_sWords[i];
212				if (tagType == Utility.TAG_TYPE.TT_TRANS_PERSON && i > 0
213						&& Utility.charType(m_sWords[i - 1]) == Utility.CT_CHINESE) {
214					if (".".equals(m_sWords[i]))
215						sCurWord = "?";
216					else if ("-".equals(m_sWords))
217						sCurWord = "?";
218				}
219				ArrayList<WordItem> wis = unknownDict.getHandle(sCurWord);
220				nPOSCount = wis.size() + 1;
221				for (j = 0; j < wis.size(); j++) {
222					aPOS[j] = wis.get(j).getHandle();
223					aFreq[j] = wis.get(j).getFreq();
224					m_nTags[i][j] = aPOS[j];
225					m_dFrequency[i][j] = -Math.log((1 + aFreq[j]));
226					m_dFrequency[i][j] += Math.log((context.getFreq(0, aPOS[j]) + nPOSCount));
227				}
228
229				if ("?##?".equals(m_sWords[i])) {
230					m_nTags[i][j] = 100;
231					m_dFrequency[i][j] = 0;
232					j++;
233				} else if ("?##?".equals(m_sWords[i])) {
234					m_nTags[i][j] = 101;
235					m_dFrequency[i][j] = 0;
236					j++;
237				} else {
238					wis = coreDict.getHandle(m_sWords[i]);
239					nFreq = 0;
240					for (int k = 0; k < wis.size(); k++) {
241						aFreq[k] = wis.get(k).getFreq();
242						nFreq += aFreq[k];
243					}
244					if (wis.size() > 0) {
245						m_nTags[i][j] = 0;
246						m_dFrequency[i][j] = -Math.log((double) (1 + nFreq));
247						m_dFrequency[i][j] += Math.log((double) (context.getFreq(0, 0) + nPOSCount));
248						j++;
249					}
250				}
251			} else// For normal POS tagging
252			{
253				j = 0;
254				// Get the POSs from the unknown recognition dictionary
255				if (wr.getHandle() > 0) {// The word has is only one POS
256					// value
257					// We have record its POS and nFrequncy in the items.
258					m_nTags[i][j] = wr.getHandle();
259					m_dFrequency[i][j] = -Math.log(wr.getValue())
260							+ Math.log((double) (context.getFreq(0, m_nTags[i][j]) + 1));
261
262					// Not permit the value less than 0
263					if (m_dFrequency[i][j] < 0)
264						m_dFrequency[i][j] = 0;
265					j++;
266				}
267
268				// The word has multiple POSs, we should retrieve the
269				// information from Core Dictionary
270				else {
271					if (wr.getHandle() < 0) {// The word has is only one POS
272						m_nTags[i][j] = -wr.getHandle();
273						m_dFrequency[i][j++] = wr.getValue();
274
275					}
276					ArrayList<WordItem> wis = coreDict.getHandle(m_sWords[i]);
277					nPOSCount = wis.size();
278					for (; j < wis.size(); j++) {
279						// in the unknown dictionary
280						aPOS[j] = wis.get(j).getHandle();
281						aFreq[j] = wis.get(j).getFreq();
282						m_nTags[i][j] = aPOS[j];
283						m_dFrequency[i][j] = -Math.log(1 + aFreq[j])
284								+ Math.log(context.getFreq(0, m_nTags[i][j]) + nPOSCount);
285					}
286				}
287			}
288
289			// We donot know the POS, so we have to guess them according lexical
290			// knowledge
291			if (j == 0) {
292				j = guessPOS(i);// Guess the POS of current word
293			}
294			m_nTags[i][j] = -1;// Set the ending POS
295
296			// No ambuguity, so we can break from the loop
297			if (j == 1 && m_nTags[i][j] != Utility.CT_SENTENCE_BEGIN) {
298				i++;
299				m_sWords[i] = null;
300				break;
301			}
302			if (!bSplit)
303				nWordsIndex++;
304		}
305		if (nWordsIndex == wrList.size())
306			nRetPos = -1;// Reaching ending
307
308		if (m_nTags[i - 1][1] != -1)// ||m_sWords[i][0]==0
309		{// Set end for words like "?/?/?"
310			if (tagType != Utility.TAG_TYPE.TT_NORMAL)
311				m_nTags[i][0] = 101;
312			else
313				m_nTags[i][0] = 1;
314
315			m_dFrequency[i][0] = 0;
316			m_sWords[i] = null;// Set virtual ending
317			m_nTags[i++][1] = -1;
318		}
319		m_nCurLength = i;// The current word count
320		if (nRetPos != -1)
321			return nWordsIndex + 1;// Next start position
322		return -1;// Reaching ending
323
324	}
325
326	/**
327	 * <pre>
328	 *          
329	 *          BBCD 343 0.003606 
330	 *          BBC 2 0.000021 
331	 *          BBE 125 0.001314 
332	 *          BBZ 30 0.000315 
333	 *          BCD 62460 0.656624 
334	 *          BEE 0 0.000000 
335	 *          BE 13899 0.146116 
336	 *          BG 869 0.009136 
337	 *          BXD 4 0.000042 
338	 *          BZ 3707 0.038971 
339	 *          CD 8596 0.090367 
340	 *          EE 26 0.000273 
341	 *          FB 871 0.009157 
342	 *          Y 3265 0.034324
343	 *          XD 926 0.009735
344	 *          
345	 *          The person recognition patterns set
346	 *          BBCD:?+?+?1+?2;
347	 *          BBE: ?+?+??;
348	 *          BBZ: ?+?+????;
349	 *          BCD: ?+?1+?2;
350	 *          BE: ?+??;
351	 *          BEE: ?+??+??;???
352	 *          BG: ?+??
353	 *          BXD: ?+???????+????
354	 *          BZ: ?+????;
355	 *          B: ?
356	 *          CD: ?1+?2;
357	 *          EE: ??+??;
358	 *          FB: ??+?
359	 *          XD: ???????+????
360	 *          Y: ?????
361	 * </pre>
362	 */
363	public boolean PersonRecognize(Dictionary personDict) {
364		String sPOS = "z";
365		String sPersonName;
366		// 0 1 2 3 4 5
367		final String[] patterns = { "BBCD", "BBC", "BBE", "BBZ", "BCD", "BEE", "BE", "BG", "BXD", "BZ", "CDCD", "CD",
368				"EE", "FB", "Y", "XD", "" };
369		// BBCD BBC BBE BBZ BCD BEE BE BG
370		final double[] factor = { 0.003606, 0.000021, 0.001314, 0.000315, 0.656624, 0.000021, 0.146116, 0.009136,
371		// BXD BZ CDCD CD EE FB Y XD
372				0.000042, 0.038971, 0, 0.090367, 0.000273, 0.009157, 0.034324, 0.009735, 0 };
373		// About parameter:
374
375		final int patternLen[] = { 4, 3, 3, 3, 3, 3, 2, 2, 3, 2, 4, 2, 2, 2, 1, 2, 0 };
376		int i = 0;
377		for (i = 1; m_nBestTag[i] > -1; i++)
378			// Convert to string from POS
379			sPOS += (char) (m_nBestTag[i] + 'A');
380		int j = 1, k, nPos;// Find the proper pattern from the first POS
381		int nLittleFreqCount;// Counter for the person name role with little
382		// frequecy
383		boolean bMatched = false;
384
385		while (j < i) {
386			bMatched = false;
387			for (k = 0; !bMatched && patternLen[k] > 0; k++) {
388				if (sPOS.substring(j).indexOf(patterns[k]) == 0 && !"?".equals(m_sWords[j - 1])
389						&& !"?".equals(m_sWords[j + patternLen[k]])) {// Find
390
391					String temp = sPOS.substring(j + 2);
392					if (temp.length() > 1)
393						temp = temp.substring(0, 1);
394
395					// Rule 1 for exclusion:??+?+?1(?2): ??(??+?)???
396					if ("FB".equals(patterns[k]) && ("E".equals(temp) || "C".equals(temp) || "G".equals(temp))) {
397						continue;
398					}
399
400					nPos = j;// Record the person position in the tag
401					// sequence
402					sPersonName = "";
403					nLittleFreqCount = 0;// Record the number of role with
404					// little frequency
405					while (nPos < j + patternLen[k]) {// Get the possible
406						// person name
407
408						if (m_nBestTag[nPos] < 4
409								&& personDict.getFreq(m_sWords[nPos], m_nBestTag[nPos]) < Utility.LITTLE_FREQUENCY)
410							nLittleFreqCount++;// The counter increase
411						sPersonName += m_sWords[nPos];
412						nPos += 1;
413					}
414					if ("CDCD".equals(patterns[k])) {
415						if (GetForeignCharCount(sPersonName) > 0)
416							j += patternLen[k] - 1;
417						continue;
418					}
419					m_nUnknownWords[m_nUnknownIndex][0] = m_nWordPosition[j];
420					m_nUnknownWords[m_nUnknownIndex][1] = m_nWordPosition[j + patternLen[k]];
421					m_dWordsPossibility[m_nUnknownIndex] = -Math.log(factor[k])
422							+ ComputePossibility(j, patternLen[k], personDict);
423					// Mutiply the factor
424					m_nUnknownIndex += 1;
425					j += patternLen[k];
426					bMatched = true;
427				}
428			}
429			if (!bMatched)// Not matched, add j by 1
430				j += 1;
431		}
432		return true;
433	}
434
435	private int guessPOS(int index) {
436		int j = 0, i = index, charType;
437		int nLen;
438		switch (tagType) {
439		case TT_NORMAL:
440			break;
441		case TT_PERSON:
442			j = 0;
443			if (m_sWords[index].indexOf("××") != -1) {
444				m_nTags[i][j] = 6;
445				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 6) + 1);
446			} else {
447				m_nTags[i][j] = 0;
448				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 0) + 1);
449				nLen = m_sWords[index].getBytes().length;
450				if (nLen >= 4) {
451					m_nTags[i][j] = 0;
452					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 0) + 1);
453					m_nTags[i][j] = 11;
454					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 11) * 8);
455					m_nTags[i][j] = 12;
456					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 12) * 8);
457					m_nTags[i][j] = 13;
458					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 13) * 8);
459				} else if (nLen == 2) {
460					m_nTags[i][j] = 0;
461					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 0) + 1);
462					charType = Utility.charType(m_sWords[index]);
463					if (charType == Utility.CT_OTHER || charType == Utility.CT_CHINESE) {
464						m_nTags[i][j] = 1;
465						m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 1) + 1);
466						m_nTags[i][j] = 2;
467						m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 2) + 1);
468						m_nTags[i][j] = 3;
469						m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 3) + 1);
470						m_nTags[i][j] = 4;
471						m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 4) + 1);
472					}
473					m_nTags[i][j] = 11;
474					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 11) * 8);
475					m_nTags[i][j] = 12;
476					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 12) * 8);
477					m_nTags[i][j] = 13;
478					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 13) * 8);
479				}
480			}
481			break;
482		case TT_PLACE:
483			j = 0;
484			m_nTags[i][j] = 0;
485			m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 0) + 1);
486			nLen = m_sWords[index].length();
487			if (nLen >= 4) {
488				m_nTags[i][j] = 11;
489				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 11) * 8);
490				m_nTags[i][j] = 12;
491				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 12) * 8);
492				m_nTags[i][j] = 13;
493				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 13) * 8);
494			} else if (nLen == 2) {
495				m_nTags[i][j] = 0;
496				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 0) + 1);
497				charType = Utility.charType(m_sWords[index]);
498				if (charType == Utility.CT_OTHER || charType == Utility.CT_CHINESE) {
499					m_nTags[i][j] = 1;
500					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 1) + 1);
501					m_nTags[i][j] = 2;
502					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 2) + 1);
503					m_nTags[i][j] = 3;
504					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 3) + 1);
505					m_nTags[i][j] = 4;
506					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 4) + 1);
507				}
508				m_nTags[i][j] = 11;
509				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 11) * 8);
510				m_nTags[i][j] = 12;
511				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 12) * 8);
512				m_nTags[i][j] = 13;
513				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 13) * 8);
514			}
515			break;
516		case TT_TRANS_PERSON:
517			j = 0;
518			nLen = m_sWords[index].length();
519
520			m_nTags[i][j] = 0;
521			m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 0) + 1);
522
523			if (!Utility.isAllChinese(m_sWords[index])) {
524				if (Utility.isAllLetter(m_sWords[index])) {
525					m_nTags[i][j] = 1;
526					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 1) + 1);
527					m_nTags[i][j] = 11;
528					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 11) + 1);
529					m_nTags[i][j] = 2;
530					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 2) * 2 + 1);
531					m_nTags[i][j] = 3;
532					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 3) * 2 + 1);
533					m_nTags[i][j] = 12;
534					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 12) * 2 + 1);
535					m_nTags[i][j] = 13;
536					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 13) * 2 + 1);
537				}
538				m_nTags[i][j] = 41;
539				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 41) * 8);
540				m_nTags[i][j] = 42;
541				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 42) * 8);
542				m_nTags[i][j] = 43;
543				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 43) * 8);
544			} else if (nLen >= 4) {
545				m_nTags[i][j] = 41;
546				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 41) * 8);
547				m_nTags[i][j] = 42;
548				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 42) * 8);
549				m_nTags[i][j] = 43;
550				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 43) * 8);
551			} else if (nLen == 2) {
552				charType = Utility.charType(m_sWords[index]);
553				if (charType == Utility.CT_OTHER || charType == Utility.CT_CHINESE) {
554					m_nTags[i][j] = 1;
555					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 1) * 2 + 1);
556					m_nTags[i][j] = 2;
557					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 2) * 2 + 1);
558					m_nTags[i][j] = 3;
559					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 3) * 2 + 1);
560					m_nTags[i][j] = 30;
561					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 30) * 8 + 1);
562					m_nTags[i][j] = 11;
563					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 11) * 4 + 1);
564					m_nTags[i][j] = 12;
565					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 12) * 4 + 1);
566					m_nTags[i][j] = 13;
567					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 13) * 4 + 1);
568					m_nTags[i][j] = 21;
569					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 21) * 2 + 1);
570					m_nTags[i][j] = 22;
571					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 22) * 2 + 1);
572					m_nTags[i][j] = 23;
573					m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 23) * 2 + 1);
574				}
575				m_nTags[i][j] = 41;
576				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 41) * 8);
577				m_nTags[i][j] = 42;
578				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 42) * 8);
579				m_nTags[i][j] = 43;
580				m_dFrequency[i][j++] = (double) 1 / (double) (context.getFreq(0, 43) * 8);
581			}
582			break;
583		default:
584			break;
585		}
586
587		return j;
588	}
589
590	int GetForeignCharCount(String personName) {
591		return 0;
592	}
593
594	public boolean PlaceRecognize(Dictionary coreDict, Dictionary placeDict) {
595		int nStart = 1, nEnd = 1, i = 1, nTemp;
596		double dPanelty = 1.0;// Panelty value
597		while (m_nBestTag[i] > -1) {
598			if (m_nBestTag[i] == 1)// 1 Trigger the recognition procession
599			{
600				nStart = i;
601				nEnd = nStart + 1;
602				while (m_nBestTag[nEnd] == 1)//
603				{
604					if (nEnd > nStart + 1)
605						dPanelty += 1.0;
606					nEnd++;
607				}
608				while (m_nBestTag[nEnd] == 2)
609					// 2,12,22
610					nEnd++;
611				nTemp = nEnd;
612				while (m_nBestTag[nEnd] == 3) {
613					if (nEnd > nTemp)
614						dPanelty += 1.0;
615					nEnd++;
616				}
617			} else if (m_nBestTag[i] == 2)// 1,11,21 Trigger the recognition
618			{
619				dPanelty += 1.0;
620				nStart = i;
621				nEnd = nStart + 1;
622				while (m_nBestTag[nEnd] == 2)
623					// 2
624					nEnd++;
625				nTemp = nEnd;
626				while (m_nBestTag[nEnd] == 3)// 2
627				{
628					if (nEnd > nTemp)
629						dPanelty += 1.0;
630					nEnd++;
631				}
632			}
633			if (nEnd > nStart) {
634				m_nUnknownWords[m_nUnknownIndex][0] = m_nWordPosition[nStart];
635				m_nUnknownWords[m_nUnknownIndex][1] = m_nWordPosition[nEnd];
636				m_dWordsPossibility[m_nUnknownIndex++] = ComputePossibility(nStart, nEnd - nStart + 1, placeDict)
637						+ Math.log(dPanelty);
638				nStart = nEnd;
639			}
640			if (i < nEnd)
641				i = nEnd;
642			else
643				i = i + 1;
644		}
645		return true;
646	}
647
648	private double ComputePossibility(int startPos, int length, Dictionary dict) {
649		double retValue = 0, posPoss;
650		int nFreq;
651		for (int i = startPos; i < startPos + length; i++) {
652			nFreq = dict.getFreq(m_sWords[i], m_nBestTag[i]);
653			// nFreq is word being the POS
654			posPoss = Math.log((double) (context.getFreq(0, m_nBestTag[i]) + 1)) - Math.log((double) (nFreq + 1));
655			retValue += posPoss;
656		}
657		return retValue;
658	}
659}