PageRenderTime 32ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/stanford-corenlp-2011-06-19/src/edu/stanford/nlp/fsm/FastExactAutomatonMinimizer.java

https://github.com/ryantanner/thesis
Java | 319 lines | 226 code | 39 blank | 54 comment | 30 complexity | 2b1f3d0abf1871931974ad4d43a7fe45 MD5 | raw file
  1. package edu.stanford.nlp.fsm;
  2. import edu.stanford.nlp.trees.PennTreebankLanguagePack;
  3. import edu.stanford.nlp.util.Maps;
  4. import java.util.*;
  5. /**
  6. * Minimization in n log n a la Hopcroft.
  7. *
  8. * @author Dan Klein (klein@cs.stanford.edu)
  9. */
  10. public class FastExactAutomatonMinimizer implements AutomatonMinimizer {
  11. TransducerGraph unminimizedFA = null;
  12. Map memberToBlock = null;
  13. LinkedList splits = null;
  14. boolean sparseMode = true;
  15. static final Object SINK_NODE = "SINK_NODE";
  16. static class Split {
  17. Collection members;
  18. Object symbol;
  19. Block block;
  20. public Collection getMembers() {
  21. return members;
  22. }
  23. public Object getSymbol() {
  24. return symbol;
  25. }
  26. public Block getBlock() {
  27. return block;
  28. }
  29. public Split(Collection members, Object symbol, Block block) {
  30. this.members = members;
  31. this.symbol = symbol;
  32. this.block = block;
  33. }
  34. }
  35. static class Block {
  36. Set members;
  37. public Set getMembers() {
  38. return members;
  39. }
  40. public Block(Set members) {
  41. this.members = members;
  42. }
  43. }
  44. protected TransducerGraph getUnminimizedFA() {
  45. return unminimizedFA;
  46. }
  47. protected Collection getSymbols() {
  48. return getUnminimizedFA().getInputs();
  49. }
  50. public TransducerGraph minimizeFA(TransducerGraph unminimizedFA) {
  51. // System.out.println(unminimizedFA);
  52. this.unminimizedFA = unminimizedFA;
  53. this.splits = new LinkedList();
  54. this.memberToBlock = new HashMap(); //new IdentityHashMap(); // TEG: I had to change this b/c some weren't matching
  55. minimize();
  56. return buildMinimizedFA();
  57. }
  58. protected TransducerGraph buildMinimizedFA() {
  59. TransducerGraph minimizedFA = new TransducerGraph();
  60. TransducerGraph unminimizedFA = getUnminimizedFA();
  61. for (Iterator arcI = unminimizedFA.getArcs().iterator(); arcI.hasNext();) {
  62. TransducerGraph.Arc arc = (TransducerGraph.Arc) arcI.next();
  63. Object source = projectNode(arc.getSourceNode());
  64. Object target = projectNode(arc.getTargetNode());
  65. try {
  66. if (minimizedFA.canAddArc(source, target, arc.getInput(), arc.getOutput())) {
  67. minimizedFA.addArc(source, target, arc.getInput(), arc.getOutput());
  68. }
  69. } catch (Exception e) {
  70. //throw new IllegalArgumentException();
  71. }
  72. }
  73. minimizedFA.setStartNode(projectNode(unminimizedFA.getStartNode()));
  74. for (Iterator endIter = unminimizedFA.getEndNodes().iterator(); endIter.hasNext();) {
  75. Object o = endIter.next();
  76. minimizedFA.setEndNode(projectNode(o));
  77. }
  78. return minimizedFA;
  79. }
  80. protected Object projectNode(Object node) {
  81. Set members = getBlock(node).getMembers();
  82. return members;
  83. }
  84. protected boolean hasSplit() {
  85. return splits.size() > 0;
  86. }
  87. protected Split getSplit() {
  88. return (Split) splits.removeFirst();
  89. }
  90. protected void addSplit(Split split) {
  91. splits.addLast(split);
  92. }
  93. // protected Collection inverseImages(Collection block, Object symbol) {
  94. // List inverseImages = new ArrayList();
  95. // for (Iterator nodeI = block.iterator(); nodeI.hasNext();) {
  96. // Object node = nodeI.next();
  97. // inverseImages.addAll(getUnminimizedFA().getInboundArcs(node, symbol));
  98. // }
  99. // return inverseImages;
  100. // }
  101. protected Map sortIntoBlocks(Collection nodes) {
  102. Map blockToMembers = new IdentityHashMap();
  103. for (Iterator nodeI = nodes.iterator(); nodeI.hasNext();) {
  104. Object o = nodeI.next();
  105. Block block = getBlock(o);
  106. Maps.putIntoValueHashSet(blockToMembers, block, o);
  107. }
  108. return blockToMembers;
  109. }
  110. protected void makeBlock(Collection members) {
  111. Block block = new Block(new HashSet(members));
  112. for (Iterator memberI = block.getMembers().iterator(); memberI.hasNext();) {
  113. Object member = memberI.next();
  114. if (member != SINK_NODE) {
  115. // System.out.println("putting in memberToBlock: " + member + " " + block);
  116. memberToBlock.put(member, block);
  117. }
  118. }
  119. addSplits(block);
  120. }
  121. protected void addSplits(Block block) {
  122. Map symbolToTarget = new HashMap();
  123. for (Iterator memberI = block.getMembers().iterator(); memberI.hasNext();) {
  124. Object member = memberI.next();
  125. for (Iterator symbolI = getInverseArcs(member).iterator(); symbolI.hasNext();) {
  126. TransducerGraph.Arc arc = (TransducerGraph.Arc) symbolI.next();
  127. Object symbol = arc.getInput();
  128. Object target = arc.getTargetNode();
  129. Maps.putIntoValueArrayList(symbolToTarget, symbol, target);
  130. }
  131. }
  132. for (Iterator symbolI = symbolToTarget.keySet().iterator(); symbolI.hasNext();) {
  133. Object symbol = symbolI.next();
  134. addSplit(new Split((List) symbolToTarget.get(symbol), symbol, block));
  135. }
  136. }
  137. protected void removeAll(Collection block, Collection members) {
  138. // this is because AbstractCollection/Set.removeAll() isn't always linear in members.size()
  139. for (Iterator memberI = members.iterator(); memberI.hasNext();) {
  140. Object member = memberI.next();
  141. block.remove(member);
  142. }
  143. }
  144. protected Collection difference(Collection block, Collection members) {
  145. Set difference = new HashSet();
  146. for (Iterator memberI = block.iterator(); memberI.hasNext();) {
  147. Object member = memberI.next();
  148. if (!members.contains(member)) {
  149. difference.add(member);
  150. }
  151. }
  152. return difference;
  153. }
  154. protected Block getBlock(Object o) {
  155. Block result = (Block) memberToBlock.get(o);
  156. if (result == null) {
  157. System.out.println("No block found for: " + o); // debug
  158. System.out.println("But I do have blocks for: ");
  159. for (Iterator i = memberToBlock.keySet().iterator(); i.hasNext();) {
  160. System.out.println(i.next());
  161. }
  162. throw new RuntimeException("FastExactAutomatonMinimizer: no block found");
  163. }
  164. return result;
  165. }
  166. protected Collection getInverseImages(Split split) {
  167. List inverseImages = new ArrayList();
  168. Object symbol = split.getSymbol();
  169. Block block = split.getBlock();
  170. for (Iterator memberI = split.getMembers().iterator(); memberI.hasNext();) {
  171. Object member = memberI.next();
  172. if (!block.getMembers().contains(member)) {
  173. continue;
  174. }
  175. Collection arcs = getInverseArcs(member, symbol);
  176. for (Iterator arcI = arcs.iterator(); arcI.hasNext();) {
  177. TransducerGraph.Arc arc = (TransducerGraph.Arc) arcI.next();
  178. Object source = arc.getSourceNode();
  179. inverseImages.add(source);
  180. }
  181. }
  182. return inverseImages;
  183. }
  184. protected Collection getInverseArcs(Object member, Object symbol) {
  185. if (member != SINK_NODE) {
  186. return getUnminimizedFA().getArcsByTargetAndInput(member, symbol);
  187. }
  188. return getUnminimizedFA().getArcsByInput(symbol);
  189. }
  190. protected Collection getInverseArcs(Object member) {
  191. if (member != SINK_NODE) {
  192. return getUnminimizedFA().getArcsByTarget(member);
  193. }
  194. return getUnminimizedFA().getArcs();
  195. }
  196. protected void makeInitialBlocks() {
  197. // sink block (for if the automaton isn't complete
  198. makeBlock(Collections.singleton(SINK_NODE));
  199. // accepting block
  200. Set endNodes = getUnminimizedFA().getEndNodes();
  201. makeBlock(endNodes);
  202. // main block
  203. Collection nonFinalNodes = new HashSet(getUnminimizedFA().getNodes());
  204. nonFinalNodes.removeAll(endNodes);
  205. makeBlock(nonFinalNodes);
  206. }
  207. protected void minimize() {
  208. makeInitialBlocks();
  209. while (hasSplit()) {
  210. Split split = getSplit();
  211. Collection inverseImages = getInverseImages(split);
  212. Map inverseImagesByBlock = sortIntoBlocks(inverseImages);
  213. for (Iterator blockI = inverseImagesByBlock.keySet().iterator(); blockI.hasNext();) {
  214. Block block = (Block) blockI.next();
  215. Collection members = (Collection) inverseImagesByBlock.get(block);
  216. if (members.size() == 0 || members.size() == block.getMembers().size()) {
  217. continue;
  218. }
  219. if (members.size() > block.getMembers().size() - members.size()) {
  220. members = difference(block.getMembers(), members);
  221. }
  222. removeAll(block.getMembers(), members);
  223. makeBlock(members);
  224. }
  225. }
  226. }
  227. public static void main(String[] args) {
  228. /*
  229. TransducerGraph fa = new TransducerGraph();
  230. fa.addArc(fa.getStartNode(),"1","a","");
  231. fa.addArc(fa.getStartNode(),"2","b","");
  232. fa.addArc(fa.getStartNode(),"3","c","");
  233. fa.addArc("1","4","a","");
  234. fa.addArc("2","4","a","");
  235. fa.addArc("3","5","c","");
  236. fa.addArc("4",fa.getEndNode(),"c","");
  237. fa.addArc("5",fa.getEndNode(),"c","");
  238. System.out.println(fa);
  239. ExactAutomatonMinimizer minimizer = new ExactAutomatonMinimizer();
  240. System.out.println(minimizer.minimizeFA(fa));
  241. */
  242. System.out.println("Starting minimizer test...");
  243. List pathList = new ArrayList();
  244. TransducerGraph randomFA = TransducerGraph.createRandomGraph(5000, 5, 1.0, 5, pathList);
  245. List outputs = randomFA.getPathOutputs(pathList);
  246. TransducerGraph.GraphProcessor quasiDeterminizer = new QuasiDeterminizer();
  247. AutomatonMinimizer minimizer = new FastExactAutomatonMinimizer();
  248. TransducerGraph.NodeProcessor ntsp = new TransducerGraph.SetToStringNodeProcessor(new PennTreebankLanguagePack());
  249. TransducerGraph.ArcProcessor isp = new TransducerGraph.InputSplittingProcessor();
  250. TransducerGraph.ArcProcessor ocp = new TransducerGraph.OutputCombiningProcessor();
  251. TransducerGraph detGraph = quasiDeterminizer.processGraph(randomFA);
  252. TransducerGraph combGraph = new TransducerGraph(detGraph, ocp); // combine outputs into inputs
  253. TransducerGraph result = minimizer.minimizeFA(combGraph); // minimize the thing
  254. System.out.println("Minimized from " + randomFA.getNodes().size() + " to " + result.getNodes().size());
  255. result = new TransducerGraph(result, ntsp); // pull out strings from sets returned by minimizer
  256. result = new TransducerGraph(result, isp); // split outputs from inputs
  257. List minOutputs = result.getPathOutputs(pathList);
  258. System.out.println("Equal? " + outputs.equals(minOutputs));
  259. /*
  260. randomFA = new TransducerGraph(randomFA, new TransducerGraph.OutputCombiningProcessor());
  261. System.out.print("Starting fast minimization...");
  262. FastExactAutomatonMinimizer minimizer2 = new FastExactAutomatonMinimizer();
  263. Timing.startTime();
  264. TransducerGraph minimizedRandomFA = minimizer2.minimizeFA(randomFA);
  265. Timing.tick("done. ( "+randomFA.getArcs().size()+" arcs to "+minimizedRandomFA.getArcs().size()+" arcs)");
  266. minimizedRandomFA = new TransducerGraph(minimizedRandomFA, new TransducerGraph.InputSplittingProcessor());
  267. List minOutputs = minimizedRandomFA.getPathOutputs(pathList);
  268. System.out.println("Equal? "+outputs.equals(minOutputs));
  269. System.out.print("Starting slow minimization...");
  270. ExactAutomatonMinimizer minimizer = new ExactAutomatonMinimizer();
  271. Timing.startTime();
  272. minimizedRandomFA = minimizer.minimizeFA(randomFA);
  273. Timing.tick("done. ( "+randomFA.getArcs().size()+" arcs to "+minimizedRandomFA.getArcs().size()+" arcs)");
  274. minimizedRandomFA = new TransducerGraph(minimizedRandomFA, new TransducerGraph.InputSplittingProcessor());
  275. minOutputs = minimizedRandomFA.getPathOutputs(pathList);
  276. System.out.println("Equal? "+outputs.equals(minOutputs));
  277. */
  278. }
  279. }