PageRenderTime 42ms CodeModel.GetById 1ms app.highlight 37ms RepoModel.GetById 1ms app.codeStats 0ms

/src/org/ictclas4j/bean/ContextStat.java

http://ictclas4j.googlecode.com/
Java | 170 lines | 127 code | 34 blank | 9 comment | 32 complexity | 20c5d64d43a5bcfe5d81ed36ade21468 MD5 | raw file
  1package org.ictclas4j.bean;
  2
  3import java.io.DataInputStream;
  4import java.io.File;
  5import java.io.FileInputStream;
  6import java.io.FileNotFoundException;
  7import java.io.IOException;
  8import java.util.ArrayList;
  9
 10import org.apache.log4j.Logger;
 11import org.ictclas4j.utility.GFCommon;
 12import org.ictclas4j.utility.Utility;
 13
 14
 15public class ContextStat {
 16	private int tableLen;
 17
 18	private int[] symbolTable;
 19
 20	private ArrayList<TagContext> tcList;
 21
 22	static Logger logger = Logger.getLogger(ContextStat.class);
 23
 24	public ContextStat() { 
 25		tcList = new ArrayList<TagContext>();
 26	}
 27
 28	public boolean load(String fileName) {
 29		return load(fileName, false);
 30	}
 31
 32	public boolean load(String fileName, boolean isReset) {
 33		File file = new File(fileName);
 34		if (!file.canRead())
 35			return false;// fail while opening the file
 36
 37		try {
 38
 39			byte[] b = null;
 40			DataInputStream in = new DataInputStream(new FileInputStream(file));
 41			// ????
 42			tableLen = GFCommon.bytes2int(Utility.readBytes(in, 4), false);
 43			logger.debug("tableLen:" + tableLen);
 44
 45			// ??????
 46			symbolTable = new int[tableLen];
 47			for (int i = 0; i < tableLen; i++) {
 48				b = Utility.readBytes(in, 4);
 49				symbolTable[i] = GFCommon.bytes2int(b, false);
 50				logger.debug("symbolTable[" + i + "]:" + symbolTable[i]);
 51			}
 52
 53			long fileLen = file.length();
 54			long curLen = 4 + tableLen * 4;
 55			while (curLen < fileLen) {
 56				logger.debug("tagContext:");
 57				TagContext tc = new TagContext();
 58
 59				// ?????
 60				b = Utility.readBytes(in, 4);
 61				int key = GFCommon.bytes2int(b);
 62				curLen += 4;
 63				logger.debug("\tkey:" + key);
 64
 65				// ?????
 66				b = Utility.readBytes(in, 4);
 67				curLen += 4;
 68				int totalFreq = GFCommon.bytes2int(b, false);
 69				logger.debug("\ttotalFreq:" + totalFreq);
 70
 71				// ????
 72				int[] tagFreq = new int[tableLen];
 73				for (int i = 0; i < tableLen; i++) {
 74					b = Utility.readBytes(in, 4);
 75					curLen += 4;
 76					tagFreq[i] = GFCommon.bytes2int(b, false);
 77					logger.debug("\ttagFreq[" + i + "]:" + tagFreq[i]);
 78				}
 79
 80				// ???????
 81				int[][] contextArray = new int[tableLen][tableLen];
 82				for (int i = 0; i < tableLen; i++) {
 83					String pr = "";
 84					logger.debug("\tcontextArray[" + i + "]");
 85					for (int j = 0; j < tableLen; j++) {
 86						b = Utility.readBytes(in, 4);
 87						curLen += 4;
 88						contextArray[i][j] = GFCommon.bytes2int(b, false);
 89						pr += " " + contextArray[i][j];
 90					}
 91					logger.debug("\t\t" + pr);
 92				}
 93
 94				tc.setTotalFreq(totalFreq);
 95				tc.setKey(key);
 96				tc.setTagFreq(tagFreq);
 97				tc.setContextArray(contextArray);
 98				tcList.add(tc);
 99			}
100			in.close();
101		} catch (FileNotFoundException e) {
102			logger.debug(e);
103		} catch (IOException e) {
104			logger.debug(e);
105		}
106		return true;
107	}
108
109	public int getFreq(int key, int symbol) {
110		TagContext tc = getItem(key);
111		if (tc == null)
112			return 0;
113
114		int index = Utility.binarySearch(symbol, symbolTable);
115		if (index == -1)// error finding the symbol
116			return 0;
117
118		// Add the frequency
119		int frequency = 0;
120		if (tc.getTagFreq() != null)
121			frequency = tc.getTagFreq()[index];
122		return frequency;
123
124	}
125
126	public double getPossibility(int key, int prev, int cur) {
127		double result = 0;
128
129		int curIndex = Utility.binarySearch(cur, symbolTable);
130		int prevIndex = Utility.binarySearch(prev, symbolTable);
131
132		TagContext tc = getItem(key);
133
134		// return a lower value, not 0 to prevent data sparse
135		if (tc == null || curIndex == -1 || prevIndex == -1
136				|| tc.getContextArray()[prevIndex][curIndex] == 0
137				|| tc.getTagFreq()[prevIndex] == 0)
138			return 0.000001;
139		
140		int prevCurConFreq = tc.getContextArray()[prevIndex][curIndex];
141		int prevFreq = tc.getTagFreq()[prevIndex];
142
143		// 0.9 and 0.1 is a value based experience
144		result = 0.9 * (double) prevCurConFreq;
145		result /= (double) prevFreq;
146		result += 0.1 * (double) prevFreq / (double) tc.getTotalFreq();
147
148		return result;
149	}
150
151	public TagContext getItem(int key) {
152		TagContext result = null; 
153		
154		if(tcList==null||tcList.size()==0)
155			return null;
156		if (key == 0  )
157			result = tcList.get(0);
158		else   {
159			int i=0;
160			for ( ; i < tcList.size() && tcList.get(i).getKey()<key; i++);
161			if(i<tcList.size() && tcList.get(i).getKey()==key)
162				result=tcList.get(i);
163			else if(i-1<tcList.size())
164				result=tcList.get(i-1);
165		}
166		
167		return result;
168	}
169
170}