/ictclas4j/src/org/ictclas4j/segment/Segment.java

http://ictclas4j.googlecode.com/ · Java · 218 lines · 165 code · 38 blank · 15 comment · 31 complexity · fa4c60fa2ff0858ad945ebba4801cd91 MD5 · raw file

  1. package org.ictclas4j.segment;
  2. import java.util.ArrayList;
  3. import org.apache.log4j.Logger;
  4. import org.ictclas4j.bean.Atom;
  5. import org.ictclas4j.bean.DebugResult;
  6. import org.ictclas4j.bean.DictLib;
  7. import org.ictclas4j.bean.MidResult;
  8. import org.ictclas4j.bean.POSTag;
  9. import org.ictclas4j.bean.SegAtom;
  10. import org.ictclas4j.bean.SegNode;
  11. import org.ictclas4j.bean.SegResult;
  12. import org.ictclas4j.bean.Sentence;
  13. import org.ictclas4j.util.DebugUtil;
  14. import org.ictclas4j.util.Utility;
  15. public class Segment {
  16. private DictLib dictLib;
  17. private int segPathCount = 1;// ???????
  18. private boolean isRecogniseUnknown;// ????????
  19. private boolean isOutputMidResult;// ????????
  20. static Logger logger = Logger.getLogger(Segment.class);
  21. public Segment(DictLib dictLib, int segPathCount) {
  22. this.dictLib = dictLib;
  23. this.segPathCount = segPathCount;
  24. this.isRecogniseUnknown = true;
  25. }
  26. public SegResult split(String src) {
  27. SegResult finalResult = new SegResult();// ????
  28. DebugResult debugResult = new DebugResult(src);
  29. if (src != null) {
  30. int index = 0;
  31. SegResult midResult = null;
  32. finalResult.setRawContent(src);
  33. SentenceSeg ss = new SentenceSeg(src);
  34. ArrayList<Sentence> sens = ss.getSens();
  35. for (Sentence sen : sens) {
  36. logger.debug(sen);
  37. MidResult mr = new MidResult();
  38. mr.setIndex(index++);
  39. mr.setSource(sen.getContent());
  40. if (sen.isSeg()) {
  41. // ????
  42. AtomSeg as = new AtomSeg(sen.getContent());
  43. ArrayList<Atom> atoms = as.getAtoms();
  44. mr.setAtoms(atoms);
  45. // ??????,???????????????????????
  46. SegGraph segGraph = GraphGenerate.generate(atoms, dictLib);
  47. mr.setSegGraph(segGraph.getSnList());
  48. // ????????
  49. SegGraph biSegGraph = GraphGenerate.biGenerate(segGraph, dictLib);
  50. mr.setBiSegGraph(biSegGraph.getSnList());
  51. // ?N????
  52. NShortPath nsp = new NShortPath(biSegGraph, segPathCount);
  53. ArrayList<ArrayList<Integer>> bipath = nsp.getPaths();
  54. mr.setBipath(bipath);
  55. for (ArrayList<Integer> onePath : bipath) {
  56. // ????????
  57. ArrayList<SegNode> segPath = getSegPath(segGraph, onePath);
  58. ArrayList<SegNode> firstPath = AdjustSeg.firstAdjust(segPath);
  59. SegResult firstResult = outputResult(firstPath);
  60. mr.addFirstResult(firstResult.toString());
  61. if (isRecogniseUnknown)
  62. midResult = optinium(mr, firstPath);
  63. else {
  64. PosTagger lexTagger = new PosTagger(Utility.TAG_TYPE.TT_NORMAL, dictLib);
  65. lexTagger.recognise(firstPath);
  66. SegResult optResult = outputResult(firstPath);
  67. mr.addOptResult(optResult.toString());
  68. ArrayList<SegNode> adjResult = AdjustSeg.finalAdjust(firstPath, dictLib);
  69. midResult = outputResult(adjResult);
  70. }
  71. break;
  72. }
  73. } else {
  74. SegAtom atom = new SegAtom(sen.getContent());
  75. SegAtom[] atoms = new SegAtom[1];
  76. atoms[0] = atom;
  77. midResult = new SegResult();
  78. midResult.setRawContent(sen.getContent());
  79. midResult.setAtoms(atoms);
  80. }
  81. finalResult.merge(midResult);
  82. debugResult.addMidResult(mr);
  83. }
  84. logger.debug(finalResult.toString());
  85. if (this.isOutputMidResult) {
  86. DebugUtil.output2html(debugResult);
  87. }
  88. }
  89. return finalResult;
  90. }
  91. // ???????????
  92. private SegResult optinium(MidResult mr, ArrayList<SegNode> firstPath) {
  93. SegResult result = null;
  94. if (mr != null && firstPath != null) {
  95. // ???????????????????
  96. SegGraph optSegGraph = new SegGraph(firstPath);
  97. ArrayList<SegNode> sns = clone(firstPath);
  98. PosTagger personTagger = new PosTagger(Utility.TAG_TYPE.TT_PERSON, dictLib);
  99. personTagger.recognise(optSegGraph, sns);
  100. PosTagger transPersonTagger = new PosTagger(Utility.TAG_TYPE.TT_TRANS_PERSON, dictLib);
  101. transPersonTagger.recognise(optSegGraph, sns);
  102. // PosTagger placeTagger=new
  103. // PosTagger(Utility.TAG_TYPE.TT_PLACE,dictLib);
  104. // placeTagger.recognise(optSegGraph, sns);
  105. mr.setOptSegGraph(optSegGraph.getSnList());
  106. // ?????????????????????
  107. SegGraph optBiSegGraph = GraphGenerate.biGenerate(optSegGraph, dictLib);
  108. mr.setOptBiSegGraph(optBiSegGraph.getSnList());
  109. // ????N?????
  110. NShortPath optNsp = new NShortPath(optBiSegGraph, segPathCount);
  111. ArrayList<ArrayList<Integer>> optBipath = optNsp.getPaths();
  112. mr.setOptBipath(optBipath);
  113. // ???????????????????????????????
  114. ArrayList<SegNode> adjResult = null;
  115. PosTagger lexTagger = new PosTagger(Utility.TAG_TYPE.TT_NORMAL, dictLib);
  116. for (ArrayList<Integer> optOnePath : optBipath) {
  117. ArrayList<SegNode> optSegPath = getSegPath(optSegGraph, optOnePath);
  118. lexTagger.recognise(optSegPath);
  119. SegResult optResult = outputResult(optSegPath);
  120. mr.addOptResult(optResult.toString());
  121. adjResult = AdjustSeg.finalAdjust(optSegPath, dictLib);
  122. result = outputResult(adjResult);
  123. break;
  124. }
  125. }
  126. return result;
  127. }
  128. private ArrayList<SegNode> clone(ArrayList<SegNode> sns) {
  129. ArrayList<SegNode> result = null;
  130. if (sns != null && sns.size() > 0) {
  131. result = new ArrayList<SegNode>();
  132. for (SegNode sn : sns)
  133. try {
  134. result.add(sn.clone());
  135. } catch (CloneNotSupportedException e) {
  136. logger.error(e.getMessage(),e);
  137. }
  138. }
  139. return result;
  140. }
  141. // ??????????????
  142. private ArrayList<SegNode> getSegPath(SegGraph sg, ArrayList<Integer> bipath) {
  143. ArrayList<SegNode> path = null;
  144. if (sg != null && bipath != null) {
  145. ArrayList<SegNode> sns = sg.getSnList();
  146. path = new ArrayList<SegNode>();
  147. for (int index : bipath)
  148. path.add(sns.get(index));
  149. }
  150. return path;
  151. }
  152. // ????????????
  153. private SegResult outputResult(ArrayList<SegNode> wrList) {
  154. SegResult result = null;
  155. if (wrList != null && wrList.size() > 0) {
  156. result = new SegResult();
  157. ArrayList<SegAtom> saList = new ArrayList<SegAtom>();
  158. for (int i = 0; i < wrList.size(); i++) {
  159. SegNode sn = wrList.get(i);
  160. if (sn.getPos() != POSTag.SEN_BEGIN && sn.getPos() != POSTag.SEN_END) {
  161. SegAtom sa =sn.toSegAtom();
  162. saList.add(sa);
  163. }
  164. }
  165. SegAtom[] atoms = new SegAtom[saList.size() - 1];
  166. atoms = saList.toArray(atoms);
  167. result.setAtoms(atoms);
  168. }
  169. return result;
  170. }
  171. public void setSegPathCount(int segPathCount) {
  172. this.segPathCount = segPathCount;
  173. }
  174. public void setRecogniseUnknown(boolean isRecogniseUnknown) {
  175. this.isRecogniseUnknown = isRecogniseUnknown;
  176. }
  177. public void setOutputMidResult(boolean isOutputMidResult) {
  178. this.isOutputMidResult = isOutputMidResult;
  179. }
  180. }