PageRenderTime 18ms CodeModel.GetById 2ms app.highlight 12ms RepoModel.GetById 2ms app.codeStats 0ms

/ictclas4j/src/org/ictclas4j/segment/Segment.java

http://ictclas4j.googlecode.com/
Java | 218 lines | 165 code | 38 blank | 15 comment | 31 complexity | fa4c60fa2ff0858ad945ebba4801cd91 MD5 | raw file
  1package org.ictclas4j.segment;
  2
  3import java.util.ArrayList;
  4
  5import org.apache.log4j.Logger;
  6import org.ictclas4j.bean.Atom;
  7import org.ictclas4j.bean.DebugResult;
  8import org.ictclas4j.bean.DictLib;
  9import org.ictclas4j.bean.MidResult;
 10import org.ictclas4j.bean.POSTag;
 11import org.ictclas4j.bean.SegAtom;
 12import org.ictclas4j.bean.SegNode;
 13import org.ictclas4j.bean.SegResult;
 14import org.ictclas4j.bean.Sentence;
 15import org.ictclas4j.util.DebugUtil;
 16import org.ictclas4j.util.Utility;
 17
 18
 19public class Segment {
 20	private DictLib dictLib;
 21
 22	private int segPathCount = 1;// ???????
 23
 24	private boolean isRecogniseUnknown;// ????????
 25
 26	private boolean isOutputMidResult;// ????????
 27
 28	static Logger logger = Logger.getLogger(Segment.class);
 29
 30	public Segment(DictLib dictLib, int segPathCount) {
 31		this.dictLib = dictLib;
 32		this.segPathCount = segPathCount;
 33		this.isRecogniseUnknown = true;
 34	}
 35
 36	public SegResult split(String src) {
 37		SegResult finalResult = new SegResult();// ????
 38		DebugResult debugResult = new DebugResult(src);
 39
 40		if (src != null) {
 41			int index = 0;
 42			SegResult midResult = null;
 43			finalResult.setRawContent(src);
 44			SentenceSeg ss = new SentenceSeg(src);
 45			ArrayList<Sentence> sens = ss.getSens();
 46
 47			for (Sentence sen : sens) {
 48				logger.debug(sen);
 49				MidResult mr = new MidResult();
 50				mr.setIndex(index++);
 51				mr.setSource(sen.getContent());
 52				if (sen.isSeg()) {
 53
 54					// ????
 55					AtomSeg as = new AtomSeg(sen.getContent());
 56					ArrayList<Atom> atoms = as.getAtoms();
 57					mr.setAtoms(atoms);
 58
 59					// ??????,???????????????????????
 60					SegGraph segGraph = GraphGenerate.generate(atoms, dictLib);
 61					mr.setSegGraph(segGraph.getSnList());
 62					// ????????
 63					SegGraph biSegGraph = GraphGenerate.biGenerate(segGraph, dictLib);
 64					mr.setBiSegGraph(biSegGraph.getSnList());
 65
 66					// ?N????
 67					NShortPath nsp = new NShortPath(biSegGraph, segPathCount);
 68					ArrayList<ArrayList<Integer>> bipath = nsp.getPaths();
 69					mr.setBipath(bipath);
 70
 71					for (ArrayList<Integer> onePath : bipath) {
 72						// ????????
 73						ArrayList<SegNode> segPath = getSegPath(segGraph, onePath);
 74						ArrayList<SegNode> firstPath = AdjustSeg.firstAdjust(segPath);
 75						SegResult firstResult = outputResult(firstPath);
 76						mr.addFirstResult(firstResult.toString());
 77
 78						if (isRecogniseUnknown)
 79							midResult = optinium(mr, firstPath);
 80						else {
 81							PosTagger lexTagger = new PosTagger(Utility.TAG_TYPE.TT_NORMAL, dictLib);
 82							lexTagger.recognise(firstPath);
 83							SegResult optResult = outputResult(firstPath);
 84							mr.addOptResult(optResult.toString());
 85							ArrayList<SegNode> adjResult = AdjustSeg.finalAdjust(firstPath, dictLib);
 86
 87							midResult = outputResult(adjResult);
 88						}
 89						break;
 90					}
 91				} else {
 92					SegAtom atom = new SegAtom(sen.getContent());
 93					SegAtom[] atoms = new SegAtom[1];
 94					atoms[0] = atom;
 95					midResult = new SegResult();
 96					midResult.setRawContent(sen.getContent());
 97					midResult.setAtoms(atoms);
 98				}
 99				finalResult.merge(midResult);
100				debugResult.addMidResult(mr);
101			}
102			logger.debug(finalResult.toString());
103			if (this.isOutputMidResult) {
104				DebugUtil.output2html(debugResult);
105			}
106		}
107
108		return finalResult;
109	}
110
111	// ???????????
112	private SegResult optinium(MidResult mr, ArrayList<SegNode> firstPath) {
113		SegResult result = null;
114		if (mr != null && firstPath != null) {
115			// ???????????????????
116			SegGraph optSegGraph = new SegGraph(firstPath);
117			ArrayList<SegNode> sns = clone(firstPath);
118			PosTagger personTagger = new PosTagger(Utility.TAG_TYPE.TT_PERSON, dictLib);
119			personTagger.recognise(optSegGraph, sns);
120			PosTagger transPersonTagger = new PosTagger(Utility.TAG_TYPE.TT_TRANS_PERSON, dictLib);
121			transPersonTagger.recognise(optSegGraph, sns);
122			// PosTagger placeTagger=new
123			// PosTagger(Utility.TAG_TYPE.TT_PLACE,dictLib);
124			// placeTagger.recognise(optSegGraph, sns);
125			mr.setOptSegGraph(optSegGraph.getSnList());
126
127			// ?????????????????????
128			SegGraph optBiSegGraph = GraphGenerate.biGenerate(optSegGraph, dictLib);
129			mr.setOptBiSegGraph(optBiSegGraph.getSnList());
130
131			// ????N?????
132			NShortPath optNsp = new NShortPath(optBiSegGraph, segPathCount);
133			ArrayList<ArrayList<Integer>> optBipath = optNsp.getPaths();
134			mr.setOptBipath(optBipath);
135
136			// ???????????????????????????????
137			ArrayList<SegNode> adjResult = null;
138			PosTagger lexTagger = new PosTagger(Utility.TAG_TYPE.TT_NORMAL, dictLib);
139			for (ArrayList<Integer> optOnePath : optBipath) {
140				ArrayList<SegNode> optSegPath = getSegPath(optSegGraph, optOnePath);
141				lexTagger.recognise(optSegPath);
142				SegResult optResult = outputResult(optSegPath);
143				mr.addOptResult(optResult.toString());
144				adjResult = AdjustSeg.finalAdjust(optSegPath, dictLib);
145				result = outputResult(adjResult);
146				break;
147			}
148		}
149		return result;
150	}
151
152	private ArrayList<SegNode> clone(ArrayList<SegNode> sns) {
153		ArrayList<SegNode> result = null;
154		if (sns != null && sns.size() > 0) {
155			result = new ArrayList<SegNode>();
156			for (SegNode sn : sns)
157				try {
158					result.add(sn.clone());
159				} catch (CloneNotSupportedException e) {
160					logger.error(e.getMessage(),e);
161				}
162		}
163
164		return result;
165	}
166
167	// ??????????????
168	private ArrayList<SegNode> getSegPath(SegGraph sg, ArrayList<Integer> bipath) {
169
170		ArrayList<SegNode> path = null;
171
172		if (sg != null && bipath != null) {
173			ArrayList<SegNode> sns = sg.getSnList();
174			path = new ArrayList<SegNode>();
175
176			for (int index : bipath)
177				path.add(sns.get(index));
178
179		}
180		return path;
181	}
182
183	// ????????????
184	private SegResult outputResult(ArrayList<SegNode> wrList) {
185		SegResult result = null;
186		if (wrList != null && wrList.size() > 0) {
187			result = new SegResult();
188			ArrayList<SegAtom> saList = new ArrayList<SegAtom>();
189			for (int i = 0; i < wrList.size(); i++) {
190
191				SegNode sn = wrList.get(i);
192				if (sn.getPos() != POSTag.SEN_BEGIN && sn.getPos() != POSTag.SEN_END) { 
193					SegAtom sa =sn.toSegAtom();
194					saList.add(sa);
195				} 
196			}
197
198			SegAtom[] atoms = new SegAtom[saList.size() - 1];
199			atoms = saList.toArray(atoms);
200			result.setAtoms(atoms);
201		}
202
203		return result;
204	}
205
206	public void setSegPathCount(int segPathCount) {
207		this.segPathCount = segPathCount;
208	}
209
210	public void setRecogniseUnknown(boolean isRecogniseUnknown) {
211		this.isRecogniseUnknown = isRecogniseUnknown;
212	}
213
214	public void setOutputMidResult(boolean isOutputMidResult) {
215		this.isOutputMidResult = isOutputMidResult;
216	}
217
218}