PageRenderTime 45ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/projects/netbeans-7.3/spellchecker/src/org/netbeans/modules/spellchecker/TrieDictionary.java

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Java | 595 lines | 418 code | 126 blank | 51 comment | 65 complexity | 6785a49b947199fc91a6dc96adbbe28c MD5 | raw file
  1. /*
  2. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
  3. *
  4. * Copyright 1997-2012 Oracle and/or its affiliates. All rights reserved.
  5. *
  6. * Oracle and Java are registered trademarks of Oracle and/or its affiliates.
  7. * Other names may be trademarks of their respective owners.
  8. *
  9. * The contents of this file are subject to the terms of either the GNU
  10. * General Public License Version 2 only ("GPL") or the Common
  11. * Development and Distribution License("CDDL") (collectively, the
  12. * "License"). You may not use this file except in compliance with the
  13. * License. You can obtain a copy of the License at
  14. * http://www.netbeans.org/cddl-gplv2.html
  15. * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
  16. * specific language governing permissions and limitations under the
  17. * License. When distributing the software, include this License Header
  18. * Notice in each file and include the License file at
  19. * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this
  20. * particular file as subject to the "Classpath" exception as provided
  21. * by Oracle in the GPL Version 2 section of the License file that
  22. * accompanied this code. If applicable, add the following below the
  23. * License Header, with the fields enclosed by brackets [] replaced by
  24. * your own identifying information:
  25. * "Portions Copyrighted [year] [name of copyright owner]"
  26. *
  27. * Contributor(s):
  28. *
  29. * The Original Software is NetBeans. The Initial Developer of the Original
  30. * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
  31. * Microsystems, Inc. All Rights Reserved.
  32. *
  33. * If you wish your version of this file to be governed by only the CDDL
  34. * or only the GPL Version 2, indicate your decision by adding
  35. * "[Contributor] elects to include this software in this distribution
  36. * under the [CDDL or GPL Version 2] license." If you do not indicate a
  37. * single choice of license, a recipient has the option to distribute
  38. * your version of this file under either the CDDL, the GPL Version 2 or
  39. * to extend the choice of license to its licensees as provided above.
  40. * However, if you add GPL Version 2 code and therefore, elected the GPL
  41. * Version 2 license, then the option applies only if the new code is
  42. * made subject to such option by the copyright holder.
  43. */
  44. package org.netbeans.modules.spellchecker;
  45. import java.io.BufferedReader;
  46. import java.io.File;
  47. import java.io.FileInputStream;
  48. import java.io.FileNotFoundException;
  49. import java.io.IOException;
  50. import java.io.InputStreamReader;
  51. import java.io.RandomAccessFile;
  52. import java.net.URL;
  53. import java.nio.ByteBuffer;
  54. import java.nio.channels.FileChannel;
  55. import java.nio.channels.FileChannel.MapMode;
  56. import java.util.ArrayList;
  57. import java.util.Collections;
  58. import java.util.Iterator;
  59. import java.util.List;
  60. import java.util.Map;
  61. import java.util.Map.Entry;
  62. import java.util.SortedSet;
  63. import java.util.TreeMap;
  64. import java.util.TreeSet;
  65. import java.util.concurrent.TimeUnit;
  66. import java.util.concurrent.atomic.AtomicBoolean;
  67. import java.util.concurrent.atomic.AtomicReference;
  68. import java.util.logging.Level;
  69. import java.util.logging.Logger;
  70. import org.netbeans.modules.spellchecker.spi.dictionary.Dictionary;
  71. import org.netbeans.modules.spellchecker.spi.dictionary.ValidityType;
  72. import org.openide.modules.OnStop;
  73. import org.openide.modules.Places;
  74. import org.openide.util.CharSequences;
  75. import org.openide.util.Exceptions;
  76. import org.openide.util.RequestProcessor;
  77. import org.openide.util.RequestProcessor.Task;
  78. /**
  79. *
  80. * @author Jan Lahoda
  81. */
  82. public class TrieDictionary implements Dictionary {
  83. private static final Logger LOG = Logger.getLogger(TrieDictionary.class.getName());
  84. private final byte[] array;
  85. private final ByteBuffer buffer;
  86. TrieDictionary(byte[] array) {
  87. this.array = array;
  88. this.buffer = null;
  89. }
  90. private TrieDictionary(File data) throws IOException {
  91. this.array = null;
  92. FileInputStream ins = new FileInputStream(data);
  93. FileChannel channel = ins.getChannel();
  94. try {
  95. this.buffer = channel.map(MapMode.READ_ONLY, 0, channel.size());
  96. } finally {
  97. channel.close();
  98. ins.close();
  99. }
  100. }
  101. public ValidityType validateWord(CharSequence word) {
  102. String wordString = word.toString();
  103. ValidityType type = validateWordImpl(wordString.toLowerCase());
  104. if (type != ValidityType.VALID) {
  105. ValidityType curr = validateWordImpl(wordString);
  106. if (type == ValidityType.PREFIX_OF_VALID) {
  107. if (curr == ValidityType.VALID) {
  108. type = curr;
  109. }
  110. } else {
  111. type = curr;
  112. }
  113. }
  114. return type;
  115. }
  116. private ValidityType validateWordImpl(CharSequence word) {
  117. int node = findNode(word, 0, 4);
  118. if (node == (-1))
  119. return ValidityType.INVALID;
  120. if (readByte(node) == 0x01) {
  121. return ValidityType.VALID;
  122. }
  123. return ValidityType.PREFIX_OF_VALID;
  124. }
  125. public List<String> findValidWordsForPrefix(CharSequence word) {
  126. List<String> result = new ArrayList<String>();
  127. int node = findNode(word, 0, 4);
  128. if (node == (-1))
  129. return Collections.emptyList();
  130. return findValidWordsForPrefix(new StringBuffer(word), node, result);
  131. }
  132. public List<String> findProposals(CharSequence pattern) {
  133. ListProposalAcceptor result = new ListProposalAcceptor();
  134. findProposals(pattern, 2, 4, new StringBuffer(), result);
  135. return result;
  136. }
  137. private void findProposals(CharSequence pattern, int maxDistance, int node, StringBuffer word, ProposalAcceptor result) {
  138. int entries = readInt(node + 1);
  139. for (int currentEntry = 0; currentEntry < entries; currentEntry++) {
  140. char ac = readChar(node + 5 + currentEntry * 6);
  141. word.append(ac);
  142. int distance = distance(pattern, word);
  143. int targetNode = node + readInt(node + 5 + currentEntry * 6 + 2);
  144. if (distance < maxDistance) {
  145. if (readByte(targetNode) == 0x01) {
  146. result.add(word.toString());
  147. }
  148. }
  149. if ((distance - (pattern.length() - word.length())) < maxDistance) {
  150. findProposals(pattern, maxDistance, targetNode, word, result);
  151. }
  152. word.deleteCharAt(word.length() - 1);
  153. }
  154. }
  155. private void verifyDictionary() {
  156. findProposals("", Integer.MAX_VALUE, 4, new StringBuffer(), NULL_ACCEPTOR);
  157. }
  158. private List<String> findValidWordsForPrefix(StringBuffer foundSoFar, int node, List<String> result) {
  159. int entries = readInt(node + 1);
  160. for (int currentEntry = 0; currentEntry < entries; currentEntry++) {
  161. char ac = readChar(node + 5 + currentEntry * 6);
  162. foundSoFar.append(ac);
  163. int targetNode = node + readInt(node + 5 + currentEntry * 6 + 2);
  164. if (readByte(targetNode) == 0x01) {
  165. result.add(foundSoFar.toString());
  166. }
  167. findValidWordsForPrefix(foundSoFar, targetNode, result);
  168. foundSoFar.deleteCharAt(foundSoFar.length() - 1);
  169. }
  170. return result;
  171. }
  172. private int findNode(CharSequence word, int currentCharOffset, int currentNode) {
  173. if (word.length() <= currentCharOffset)
  174. return currentNode;
  175. char c = word.charAt(currentCharOffset);
  176. int entries = readInt(currentNode + 1);
  177. for (int currentEntry = 0; currentEntry < entries; currentEntry++) {
  178. char ac = readChar(currentNode + 5 + currentEntry * 6);
  179. if (ac == c) {
  180. int newNodeOffset = readInt(currentNode + 5 + currentEntry * 6 + 2);
  181. int newNode = currentNode + newNodeOffset;
  182. return findNode(word, currentCharOffset + 1, newNode);
  183. }
  184. }
  185. return -1;
  186. }
  187. private static final int CURRENT_TRIE_DICTIONARY_VERSION = 2;
  188. public static Dictionary getDictionary(String suffix, List<URL> sources) throws IOException {
  189. File trie = Places.getCacheSubfile("dict/dictionary" + suffix + ".trie" + CURRENT_TRIE_DICTIONARY_VERSION);
  190. return getDictionary(trie, sources);
  191. }
  192. static Dictionary getDictionary(File trie, List<URL> sources) throws IOException {
  193. return new FutureDictionary(trie, sources);
  194. }
  195. private static int toUnsigned(byte b) {
  196. if (b < 0) {
  197. return 256 + b;
  198. }
  199. return b;
  200. }
  201. private int readInt(int pos) {
  202. return (toUnsigned(readByte(pos + 0)) << 24) + (toUnsigned(readByte(pos + 1)) << 16) + (toUnsigned(readByte(pos + 2)) << 8) + toUnsigned(readByte(pos + 3));
  203. }
  204. private char readChar(int pos) {
  205. return (char) ((toUnsigned(readByte(pos + 0)) << 8) + toUnsigned(readByte(pos + 1)));
  206. }
  207. private byte readByte(int pos) {
  208. if (buffer != null) {
  209. return buffer.get(pos);
  210. } else {
  211. return array[pos];
  212. }
  213. }
  214. private static boolean compareChars(char c1, char c2) {
  215. return c1 == c2 || Character.toLowerCase(c1) == Character.toLowerCase(c2);
  216. }
  217. private static int distance(CharSequence pattern, CharSequence word) {
  218. int[] old = new int[pattern.length() + 1];
  219. int[] current = new int[pattern.length() + 1];
  220. int[] oldLength = new int[pattern.length() + 1];
  221. int[] length = new int[pattern.length() + 1];
  222. for (int cntr = 0; cntr < old.length; cntr++) {
  223. old[cntr] = pattern.length() + 1;//cntr;
  224. oldLength[cntr] = (-1);
  225. }
  226. current[0] = old[0] = oldLength[0] = length[0] = 0;
  227. int currentIndex = 0;
  228. while (currentIndex < word.length()) {
  229. for (int cntr = 0; cntr < pattern.length(); cntr++) {
  230. int insert = old[cntr + 1] + 1;
  231. int delete = current[cntr] + 1;
  232. int replace = old[cntr] + (compareChars(pattern.charAt(cntr), word.charAt(currentIndex)) ? 0 : 1);
  233. if (insert < delete) {
  234. if (insert < replace) {
  235. current[cntr + 1] = insert;
  236. length[cntr + 1] = oldLength[cntr + 1] + 1;
  237. } else {
  238. current[cntr + 1] = replace;
  239. length[cntr + 1] = oldLength[cntr] + 1;
  240. }
  241. } else {
  242. if (delete < replace) {
  243. current[cntr + 1] = delete;
  244. length[cntr + 1] = length[cntr];
  245. } else {
  246. current[cntr + 1] = replace;
  247. length[cntr + 1] = oldLength[cntr] + 1;
  248. }
  249. }
  250. }
  251. currentIndex++;
  252. int[] temp = old;
  253. old = current;
  254. current = temp;
  255. temp = oldLength;
  256. oldLength = length;
  257. length = temp;
  258. }
  259. return old[pattern.length()];
  260. }
  261. private static void constructTrie(ByteArray array, List<URL> sources) throws IOException {
  262. SortedSet<CharSequence> data = new TreeSet<CharSequence>();
  263. for (URL u : sources) {
  264. BufferedReader in = new BufferedReader(new InputStreamReader(u.openStream(), "UTF-8"));
  265. try {
  266. String line;
  267. while ((line = in.readLine()) != null) {
  268. data.add(CharSequences.create(line));
  269. }
  270. } finally {
  271. //TODO: wrap in try - catch:
  272. in.close();
  273. }
  274. }
  275. constructTrieData(array, data);
  276. }
  277. private static void constructTrieData(ByteArray array, SortedSet<? extends CharSequence> data) throws IOException {
  278. array.put(0, CURRENT_TRIE_DICTIONARY_VERSION);
  279. encodeOneLayer(array, 4, 0, data);
  280. }
  281. private static int encodeOneLayer(ByteArray array, int currentPointer, int currentChar, SortedSet<? extends CharSequence> data) throws IOException {
  282. Map<Character, SortedSet<CharSequence>> char2Words = new TreeMap<Character, SortedSet<CharSequence>>();
  283. boolean representsFullWord = !data.isEmpty() && data.first().length() <= currentChar;
  284. Iterator<? extends CharSequence> dataIt = data.iterator();
  285. if (representsFullWord) {
  286. dataIt.next();
  287. }
  288. while (dataIt.hasNext()) {
  289. CharSequence word = dataIt.next();
  290. char c = word.charAt(currentChar);
  291. SortedSet<CharSequence> words = char2Words.get(c);
  292. if (words == null) {
  293. char2Words.put(c, words = new TreeSet<CharSequence>());
  294. }
  295. words.add(word);
  296. }
  297. int entries = char2Words.size();
  298. //write flags:
  299. byte flags = 0x00;
  300. if (representsFullWord) {
  301. flags = 0x01;
  302. }
  303. array.put(currentPointer, flags);
  304. array.put(currentPointer + 1, entries);
  305. int currentEntry = 0;
  306. int childPointer = currentPointer + 5 + entries * 6;
  307. for (Entry<Character, SortedSet<CharSequence>> e : char2Words.entrySet()) {
  308. array.put(currentPointer + 5 + currentEntry * 6, e.getKey());
  309. array.put(currentPointer + 5 + currentEntry * 6 + 2, childPointer - currentPointer);
  310. childPointer = encodeOneLayer(array, childPointer, currentChar + 1, e.getValue());
  311. currentEntry++;
  312. }
  313. return childPointer;
  314. }
  315. private static final RequestProcessor WORKER = new RequestProcessor(TrieDictionary.class.getName(), 1, false, false);
  316. private static final class FutureDictionary implements Dictionary, Runnable {
  317. private final File trie;
  318. private final List<URL> sources;
  319. private final AtomicReference<Dictionary> delegate = new AtomicReference<Dictionary>();
  320. private final AtomicReference<Task> workingTask = new AtomicReference<Task>();
  321. private final AtomicBoolean wasBroken = new AtomicBoolean();
  322. public FutureDictionary(File trie, List<URL> sources) throws IOException {
  323. this.trie = trie;
  324. this.sources = sources;
  325. workingTask.set(WORKER.post(this));
  326. }
  327. public ValidityType validateWord(CharSequence word) {
  328. waitDictionaryConstructed();
  329. Dictionary dict = delegate.get();
  330. if (dict != null) {
  331. try {
  332. return dict.validateWord(word);
  333. } catch (IndexOutOfBoundsException ex) {
  334. rebuild(ex);
  335. }
  336. }
  337. return ValidityType.VALID;
  338. }
  339. public List<String> findValidWordsForPrefix(CharSequence word) {
  340. waitDictionaryConstructed();
  341. Dictionary dict = delegate.get();
  342. if (dict != null) {
  343. try {
  344. return dict.findValidWordsForPrefix(word);
  345. } catch (IndexOutOfBoundsException ex) {
  346. rebuild(ex);
  347. }
  348. }
  349. return Collections.emptyList();
  350. }
  351. public List<String> findProposals(CharSequence word) {
  352. waitDictionaryConstructed();
  353. Dictionary dict = delegate.get();
  354. if (dict != null) {
  355. try {
  356. return dict.findProposals(word);
  357. } catch (IndexOutOfBoundsException ex) {
  358. rebuild(ex);
  359. }
  360. }
  361. return Collections.emptyList();
  362. }
  363. private void waitDictionaryConstructed() {
  364. Task t = workingTask.get();
  365. if (t != null) {
  366. t.waitFinished();
  367. workingTask.set(null);
  368. }
  369. }
  370. private void rebuild(Throwable t) {
  371. //the on disk cache is likely broken, attempt to fix:
  372. if (!wasBroken.getAndSet(true)) {
  373. LOG.log(Level.INFO, "An exception thrown while read dictionary cache, attempting to rebuild.", t);
  374. workingTask.set(WORKER.post(this));
  375. } else {
  376. LOG.log(Level.INFO, "An exception thrown while read dictionary cache for second time, giving up.", t);
  377. delegate.set(null);
  378. }
  379. }
  380. public void run() {
  381. trie.getParentFile().mkdirs();
  382. if (trie.canRead()) {
  383. //validate the dictionary:
  384. try {
  385. TrieDictionary d = new TrieDictionary(trie);
  386. d.verifyDictionary();
  387. delegate.set(d);
  388. return ;//valid
  389. } catch (IOException ex) {
  390. LOG.log(Level.INFO, "Dictionary file failed validation, attempting to rebuild", ex);
  391. } catch (IndexOutOfBoundsException ex) {
  392. LOG.log(Level.INFO, "Dictionary file failed validation, attempting to rebuild", ex);
  393. }
  394. }
  395. trie.delete();
  396. File temp = new File(trie.getParentFile(), "dict.temp");
  397. temp.delete();
  398. try {
  399. ByteArray array = new ByteArray(temp);
  400. constructTrie(array, sources);
  401. array.close();
  402. LOG.log(Level.FINE, "trie file length: {0}", temp.length());
  403. temp.renameTo(trie);
  404. if (trie.canRead()) {
  405. TrieDictionary d = new TrieDictionary(trie);
  406. delegate.set(d);
  407. try {
  408. d.verifyDictionary();
  409. } catch (IndexOutOfBoundsException ex) {
  410. LOG.log(Level.INFO, "Cannot read the dictionary file", ex);
  411. wasBroken.set(true);
  412. }
  413. }
  414. } catch (IOException ex) {
  415. Exceptions.printStackTrace(ex);
  416. } finally {
  417. workingTask.set(null);
  418. if (temp.exists()) {
  419. LOG.log(Level.INFO, "Something went wrong during dictionary construction, the temporary file still exists - deleting.");
  420. temp.delete();
  421. }
  422. }
  423. }
  424. }
  425. private static interface ProposalAcceptor {
  426. public boolean add(String proposal);
  427. }
  428. private static class ListProposalAcceptor extends ArrayList<String> implements ProposalAcceptor {}
  429. private static class NullProposalAcceptor implements ProposalAcceptor {
  430. @Override public boolean add(String proposal) {
  431. return true;
  432. }
  433. }
  434. private static final NullProposalAcceptor NULL_ACCEPTOR = new NullProposalAcceptor();
  435. private static class ByteArray {
  436. private final RandomAccessFile out;
  437. public ByteArray(File out) throws FileNotFoundException {
  438. this.out = new RandomAccessFile(out, "rw");
  439. }
  440. public void put(int pos, char what) throws IOException {
  441. out.seek(pos);
  442. out.writeChar(what);
  443. }
  444. public void put(int pos, byte what) throws IOException {
  445. out.seek(pos);
  446. out.writeByte(what);
  447. }
  448. public void put(int pos, int what) throws IOException {
  449. out.seek(pos);
  450. out.writeInt(what);
  451. }
  452. public void close() throws IOException {
  453. out.close();
  454. }
  455. }
  456. @OnStop
  457. public static final class RunOnStop implements Runnable {
  458. @Override public void run() {
  459. WORKER.shutdown();
  460. while (!WORKER.isTerminated()) {
  461. try {
  462. WORKER.awaitTermination(10, TimeUnit.SECONDS);
  463. } catch (InterruptedException ex) {
  464. Exceptions.printStackTrace(ex);
  465. }
  466. }
  467. }
  468. }
  469. }