/ime/latinime/src/com/googlecode/eyesfree/inputmethod/latin/BinaryDictionary.java

http://eyes-free.googlecode.com/ · Java · 261 lines · 190 code · 29 blank · 42 comment · 38 complexity · 1a010a5a90d730c40dfa44fce0614b6f MD5 · raw file

  1. /*
  2. * Copyright (C) 2008 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. * use this file except in compliance with the License. You may obtain a copy of
  6. * the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. * License for the specific language governing permissions and limitations under
  14. * the License.
  15. */
  16. package com.googlecode.eyesfree.inputmethod.latin;
  17. import android.content.Context;
  18. import android.util.Log;
  19. import java.io.IOException;
  20. import java.io.InputStream;
  21. import java.nio.ByteBuffer;
  22. import java.nio.ByteOrder;
  23. import java.nio.channels.Channels;
  24. import java.util.Arrays;
  25. /**
  26. * Implements a static, compacted, binary dictionary of standard words.
  27. */
  28. public class BinaryDictionary extends Dictionary {
  29. /**
  30. * There is difference between what java and native code can handle.
  31. * This value should only be used in BinaryDictionary.java
  32. * It is necessary to keep it at this value because some languages e.g. German have
  33. * really long words.
  34. */
  35. protected static final int MAX_WORD_LENGTH = 48;
  36. private static final String TAG = "BinaryDictionary";
  37. private static final int MAX_ALTERNATIVES = 16;
  38. private static final int MAX_WORDS = 18;
  39. private static final int MAX_BIGRAMS = 60;
  40. private static final int TYPED_LETTER_MULTIPLIER = 2;
  41. private static final boolean ENABLE_MISSED_CHARACTERS = true;
  42. private int mDicTypeId;
  43. private int mNativeDict;
  44. private int mDictLength;
  45. private int[] mInputCodes = new int[MAX_WORD_LENGTH * MAX_ALTERNATIVES];
  46. private char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS];
  47. private char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS];
  48. private int[] mFrequencies = new int[MAX_WORDS];
  49. private int[] mFrequencies_bigrams = new int[MAX_BIGRAMS];
  50. // Keep a reference to the native dict direct buffer in Java to avoid
  51. // unexpected deallocation of the direct buffer.
  52. private ByteBuffer mNativeDictDirectBuffer;
  53. static {
  54. try {
  55. System.loadLibrary("jni_touchlatinime");
  56. } catch (UnsatisfiedLinkError ule) {
  57. Log.e("BinaryDictionary", "Could not load native library jni_latinime");
  58. }
  59. }
  60. /**
  61. * Create a dictionary from a raw resource file
  62. * @param context application context for reading resources
  63. * @param resId the resource containing the raw binary dictionary
  64. */
  65. public BinaryDictionary(Context context, int[] resId, int dicTypeId) {
  66. if (resId != null && resId.length > 0 && resId[0] != 0) {
  67. loadDictionary(context, resId);
  68. }
  69. mDicTypeId = dicTypeId;
  70. }
  71. /**
  72. * Create a dictionary from a byte buffer. This is used for testing.
  73. * @param context application context for reading resources
  74. * @param byteBuffer a ByteBuffer containing the binary dictionary
  75. */
  76. public BinaryDictionary(Context context, ByteBuffer byteBuffer, int dicTypeId) {
  77. if (byteBuffer != null) {
  78. if (byteBuffer.isDirect()) {
  79. mNativeDictDirectBuffer = byteBuffer;
  80. } else {
  81. mNativeDictDirectBuffer = ByteBuffer.allocateDirect(byteBuffer.capacity());
  82. byteBuffer.rewind();
  83. mNativeDictDirectBuffer.put(byteBuffer);
  84. }
  85. mDictLength = byteBuffer.capacity();
  86. mNativeDict = openNative(mNativeDictDirectBuffer,
  87. TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER);
  88. }
  89. mDicTypeId = dicTypeId;
  90. }
  91. private native int openNative(ByteBuffer bb, int typedLetterMultiplier,
  92. int fullWordMultiplier);
  93. private native void closeNative(int dict);
  94. private native boolean isValidWordNative(int nativeData, char[] word, int wordLength);
  95. private native int getSuggestionsNative(int dict, int[] inputCodes, int codesSize,
  96. char[] outputChars, int[] frequencies, int maxWordLength, int maxWords,
  97. int maxAlternatives, int skipPos, int[] nextLettersFrequencies, int nextLettersSize);
  98. private native int getBigramsNative(int dict, char[] prevWord, int prevWordLength,
  99. int[] inputCodes, int inputCodesLength, char[] outputChars, int[] frequencies,
  100. int maxWordLength, int maxBigrams, int maxAlternatives);
  101. private final void loadDictionary(Context context, int[] resId) {
  102. InputStream[] is = null;
  103. try {
  104. // merging separated dictionary into one if dictionary is separated
  105. int total = 0;
  106. is = new InputStream[resId.length];
  107. for (int i = 0; i < resId.length; i++) {
  108. is[i] = context.getResources().openRawResource(resId[i]);
  109. total += is[i].available();
  110. }
  111. mNativeDictDirectBuffer =
  112. ByteBuffer.allocateDirect(total).order(ByteOrder.nativeOrder());
  113. int got = 0;
  114. for (int i = 0; i < resId.length; i++) {
  115. got += Channels.newChannel(is[i]).read(mNativeDictDirectBuffer);
  116. }
  117. if (got != total) {
  118. Log.e(TAG, "Read " + got + " bytes, expected " + total);
  119. } else {
  120. mNativeDict = openNative(mNativeDictDirectBuffer,
  121. TYPED_LETTER_MULTIPLIER, FULL_WORD_FREQ_MULTIPLIER);
  122. mDictLength = total;
  123. }
  124. } catch (IOException e) {
  125. Log.w(TAG, "No available memory for binary dictionary");
  126. } finally {
  127. try {
  128. if (is != null) {
  129. for (int i = 0; i < is.length; i++) {
  130. is[i].close();
  131. }
  132. }
  133. } catch (IOException e) {
  134. Log.w(TAG, "Failed to close input stream");
  135. }
  136. }
  137. }
  138. @Override
  139. public void getBigrams(final WordComposer codes, final CharSequence previousWord,
  140. final WordCallback callback, int[] nextLettersFrequencies) {
  141. char[] chars = previousWord.toString().toCharArray();
  142. Arrays.fill(mOutputChars_bigrams, (char) 0);
  143. Arrays.fill(mFrequencies_bigrams, 0);
  144. int codesSize = codes.size();
  145. Arrays.fill(mInputCodes, -1);
  146. int[] alternatives = codes.getCodesAt(0);
  147. System.arraycopy(alternatives, 0, mInputCodes, 0,
  148. Math.min(alternatives.length, MAX_ALTERNATIVES));
  149. int count = getBigramsNative(mNativeDict, chars, chars.length, mInputCodes, codesSize,
  150. mOutputChars_bigrams, mFrequencies_bigrams, MAX_WORD_LENGTH, MAX_BIGRAMS,
  151. MAX_ALTERNATIVES);
  152. for (int j = 0; j < count; j++) {
  153. if (mFrequencies_bigrams[j] < 1) break;
  154. int start = j * MAX_WORD_LENGTH;
  155. int len = 0;
  156. while (mOutputChars_bigrams[start + len] != 0) {
  157. len++;
  158. }
  159. if (len > 0) {
  160. callback.addWord(mOutputChars_bigrams, start, len, mFrequencies_bigrams[j],
  161. mDicTypeId, DataType.BIGRAM);
  162. }
  163. }
  164. }
  165. @Override
  166. public void getWords(final WordComposer codes, final WordCallback callback,
  167. int[] nextLettersFrequencies) {
  168. final int codesSize = codes.size();
  169. // Won't deal with really long words.
  170. if (codesSize > MAX_WORD_LENGTH - 1) return;
  171. Arrays.fill(mInputCodes, -1);
  172. for (int i = 0; i < codesSize; i++) {
  173. int[] alternatives = codes.getCodesAt(i);
  174. System.arraycopy(alternatives, 0, mInputCodes, i * MAX_ALTERNATIVES,
  175. Math.min(alternatives.length, MAX_ALTERNATIVES));
  176. }
  177. Arrays.fill(mOutputChars, (char) 0);
  178. Arrays.fill(mFrequencies, 0);
  179. int count = getSuggestionsNative(mNativeDict, mInputCodes, codesSize,
  180. mOutputChars, mFrequencies,
  181. MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES, -1,
  182. nextLettersFrequencies,
  183. nextLettersFrequencies != null ? nextLettersFrequencies.length : 0);
  184. // If there aren't sufficient suggestions, search for words by allowing wild cards at
  185. // the different character positions. This feature is not ready for prime-time as we need
  186. // to figure out the best ranking for such words compared to proximity corrections and
  187. // completions.
  188. if (ENABLE_MISSED_CHARACTERS && count < 5) {
  189. for (int skip = 0; skip < codesSize; skip++) {
  190. int tempCount = getSuggestionsNative(mNativeDict, mInputCodes, codesSize,
  191. mOutputChars, mFrequencies,
  192. MAX_WORD_LENGTH, MAX_WORDS, MAX_ALTERNATIVES, skip,
  193. null, 0);
  194. count = Math.max(count, tempCount);
  195. if (tempCount > 0) break;
  196. }
  197. }
  198. for (int j = 0; j < count; j++) {
  199. if (mFrequencies[j] < 1) break;
  200. int start = j * MAX_WORD_LENGTH;
  201. int len = 0;
  202. while (mOutputChars[start + len] != 0) {
  203. len++;
  204. }
  205. if (len > 0) {
  206. callback.addWord(mOutputChars, start, len, mFrequencies[j], mDicTypeId,
  207. DataType.UNIGRAM);
  208. }
  209. }
  210. }
  211. @Override
  212. public boolean isValidWord(CharSequence word) {
  213. if (word == null) return false;
  214. char[] chars = word.toString().toCharArray();
  215. return isValidWordNative(mNativeDict, chars, chars.length);
  216. }
  217. public int getSize() {
  218. return mDictLength; // This value is initialized on the call to openNative()
  219. }
  220. @Override
  221. public synchronized void close() {
  222. if (mNativeDict != 0) {
  223. closeNative(mNativeDict);
  224. mNativeDict = 0;
  225. }
  226. }
  227. @Override
  228. protected void finalize() throws Throwable {
  229. close();
  230. super.finalize();
  231. }
  232. }