/ime/latinime/src/com/googlecode/eyesfree/inputmethod/latin/UserBigramDictionary.java

http://eyes-free.googlecode.com/ · Java · 401 lines · 268 code · 54 blank · 79 comment · 26 complexity · 056eaa949067341ccf4768e54ad363d5 MD5 · raw file

  1. /*
  2. * Copyright (C) 2010 Google Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5. * use this file except in compliance with the License. You may obtain a copy of
  6. * the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  12. * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  13. * License for the specific language governing permissions and limitations under
  14. * the License.
  15. */
  16. package com.googlecode.eyesfree.inputmethod.latin;
  17. import android.content.ContentValues;
  18. import android.content.Context;
  19. import android.database.Cursor;
  20. import android.database.sqlite.SQLiteDatabase;
  21. import android.database.sqlite.SQLiteOpenHelper;
  22. import android.database.sqlite.SQLiteQueryBuilder;
  23. import android.os.AsyncTask;
  24. import android.provider.BaseColumns;
  25. import android.util.Log;
  26. import java.util.HashMap;
  27. import java.util.HashSet;
  28. import java.util.Iterator;
  29. /**
  30. * Stores all the pairs user types in databases. Prune the database if the size
  31. * gets too big. Unlike AutoDictionary, it even stores the pairs that are already
  32. * in the dictionary.
  33. */
  34. public class UserBigramDictionary extends ExpandableDictionary {
  35. private static final String TAG = "UserBigramDictionary";
  36. /** Any pair being typed or picked */
  37. private static final int FREQUENCY_FOR_TYPED = 2;
  38. /** Maximum frequency for all pairs */
  39. private static final int FREQUENCY_MAX = 127;
  40. /**
  41. * If this pair is typed 6 times, it would be suggested.
  42. * Should be smaller than ContactsDictionary.FREQUENCY_FOR_CONTACTS_BIGRAM
  43. */
  44. protected static final int SUGGEST_THRESHOLD = 6 * FREQUENCY_FOR_TYPED;
  45. /** Maximum number of pairs. Pruning will start when databases goes above this number. */
  46. private static int sMaxUserBigrams = 10000;
  47. /**
  48. * When it hits maximum bigram pair, it will delete until you are left with
  49. * only (sMaxUserBigrams - sDeleteUserBigrams) pairs.
  50. * Do not keep this number small to avoid deleting too often.
  51. */
  52. private static int sDeleteUserBigrams = 1000;
  53. /**
  54. * Database version should increase if the database structure changes
  55. */
  56. private static final int DATABASE_VERSION = 1;
  57. private static final String DATABASE_NAME = "userbigram_dict.db";
  58. /** Name of the words table in the database */
  59. private static final String MAIN_TABLE_NAME = "main";
  60. // TODO: Consume less space by using a unique id for locale instead of the whole
  61. // 2-5 character string. (Same TODO from AutoDictionary)
  62. private static final String MAIN_COLUMN_ID = BaseColumns._ID;
  63. private static final String MAIN_COLUMN_WORD1 = "word1";
  64. private static final String MAIN_COLUMN_WORD2 = "word2";
  65. private static final String MAIN_COLUMN_LOCALE = "locale";
  66. /** Name of the frequency table in the database */
  67. private static final String FREQ_TABLE_NAME = "frequency";
  68. private static final String FREQ_COLUMN_ID = BaseColumns._ID;
  69. private static final String FREQ_COLUMN_PAIR_ID = "pair_id";
  70. private static final String FREQ_COLUMN_FREQUENCY = "freq";
  71. private final LatinIME mIme;
  72. /** Locale for which this auto dictionary is storing words */
  73. private String mLocale;
  74. private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>();
  75. private final Object mPendingWritesLock = new Object();
  76. private static volatile boolean sUpdatingDB = false;
  77. private final static HashMap<String, String> sDictProjectionMap;
  78. static {
  79. sDictProjectionMap = new HashMap<String, String>();
  80. sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID);
  81. sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1);
  82. sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2);
  83. sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE);
  84. sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID);
  85. sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID);
  86. sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY);
  87. }
  88. private static DatabaseHelper sOpenHelper = null;
  89. private static class Bigram {
  90. String word1;
  91. String word2;
  92. int frequency;
  93. Bigram(String word1, String word2, int frequency) {
  94. this.word1 = word1;
  95. this.word2 = word2;
  96. this.frequency = frequency;
  97. }
  98. @Override
  99. public boolean equals(Object bigram) {
  100. Bigram bigram2 = (Bigram) bigram;
  101. return (word1.equals(bigram2.word1) && word2.equals(bigram2.word2));
  102. }
  103. @Override
  104. public int hashCode() {
  105. return (word1 + " " + word2).hashCode();
  106. }
  107. }
  108. public void setDatabaseMax(int maxUserBigram) {
  109. sMaxUserBigrams = maxUserBigram;
  110. }
  111. public void setDatabaseDelete(int deleteUserBigram) {
  112. sDeleteUserBigrams = deleteUserBigram;
  113. }
  114. public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) {
  115. super(context, dicTypeId);
  116. mIme = ime;
  117. mLocale = locale;
  118. if (sOpenHelper == null) {
  119. sOpenHelper = new DatabaseHelper(getContext());
  120. }
  121. if (mLocale != null && mLocale.length() > 1) {
  122. loadDictionary();
  123. }
  124. }
  125. @Override
  126. public void close() {
  127. flushPendingWrites();
  128. // Don't close the database as locale changes will require it to be reopened anyway
  129. // Also, the database is written to somewhat frequently, so it needs to be kept alive
  130. // throughout the life of the process.
  131. // mOpenHelper.close();
  132. super.close();
  133. }
  134. /**
  135. * Pair will be added to the userbigram database.
  136. */
  137. public int addBigrams(String word1, String word2) {
  138. // remove caps
  139. if (mIme != null && mIme.getCurrentWord().isAutoCapitalized()) {
  140. word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1);
  141. }
  142. int freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED);
  143. if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX;
  144. synchronized (mPendingWritesLock) {
  145. if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) {
  146. mPendingWrites.add(new Bigram(word1, word2, freq));
  147. } else {
  148. Bigram bi = new Bigram(word1, word2, freq);
  149. mPendingWrites.remove(bi);
  150. mPendingWrites.add(bi);
  151. }
  152. }
  153. return freq;
  154. }
  155. /**
  156. * Schedules a background thread to write any pending words to the database.
  157. */
  158. public void flushPendingWrites() {
  159. synchronized (mPendingWritesLock) {
  160. // Nothing pending? Return
  161. if (mPendingWrites.isEmpty()) return;
  162. // Create a background thread to write the pending entries
  163. new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute();
  164. // Create a new map for writing new entries into while the old one is written to db
  165. mPendingWrites = new HashSet<Bigram>();
  166. }
  167. }
  168. /** Used for testing purpose **/
  169. void waitUntilUpdateDBDone() {
  170. synchronized (mPendingWritesLock) {
  171. while (sUpdatingDB) {
  172. try {
  173. Thread.sleep(100);
  174. } catch (InterruptedException e) {
  175. }
  176. }
  177. return;
  178. }
  179. }
  180. @Override
  181. public void loadDictionaryAsync() {
  182. // Load the words that correspond to the current input locale
  183. Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale });
  184. try {
  185. if (cursor.moveToFirst()) {
  186. int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1);
  187. int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2);
  188. int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY);
  189. while (!cursor.isAfterLast()) {
  190. String word1 = cursor.getString(word1Index);
  191. String word2 = cursor.getString(word2Index);
  192. int frequency = cursor.getInt(frequencyIndex);
  193. // Safeguard against adding really long words. Stack may overflow due
  194. // to recursive lookup
  195. if (word1.length() < MAX_WORD_LENGTH && word2.length() < MAX_WORD_LENGTH) {
  196. super.setBigram(word1, word2, frequency);
  197. }
  198. cursor.moveToNext();
  199. }
  200. }
  201. } finally {
  202. cursor.close();
  203. }
  204. }
  205. /**
  206. * Query the database
  207. */
  208. private Cursor query(String selection, String[] selectionArgs) {
  209. SQLiteQueryBuilder qb = new SQLiteQueryBuilder();
  210. // main INNER JOIN frequency ON (main._id=freq.pair_id)
  211. qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON ("
  212. + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "."
  213. + FREQ_COLUMN_PAIR_ID +")");
  214. qb.setProjectionMap(sDictProjectionMap);
  215. // Get the database and run the query
  216. SQLiteDatabase db = sOpenHelper.getReadableDatabase();
  217. Cursor c = qb.query(db,
  218. new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY },
  219. selection, selectionArgs, null, null, null);
  220. return c;
  221. }
  222. /**
  223. * This class helps open, create, and upgrade the database file.
  224. */
  225. private static class DatabaseHelper extends SQLiteOpenHelper {
  226. DatabaseHelper(Context context) {
  227. super(context, DATABASE_NAME, null, DATABASE_VERSION);
  228. }
  229. @Override
  230. public void onCreate(SQLiteDatabase db) {
  231. db.execSQL("PRAGMA foreign_keys = ON;");
  232. db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " ("
  233. + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY,"
  234. + MAIN_COLUMN_WORD1 + " TEXT,"
  235. + MAIN_COLUMN_WORD2 + " TEXT,"
  236. + MAIN_COLUMN_LOCALE + " TEXT"
  237. + ");");
  238. db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " ("
  239. + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY,"
  240. + FREQ_COLUMN_PAIR_ID + " INTEGER,"
  241. + FREQ_COLUMN_FREQUENCY + " INTEGER,"
  242. + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME
  243. + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE"
  244. + ");");
  245. }
  246. @Override
  247. public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) {
  248. Log.w(TAG, "Upgrading database from version " + oldVersion + " to "
  249. + newVersion + ", which will destroy all old data");
  250. db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME);
  251. db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME);
  252. onCreate(db);
  253. }
  254. }
  255. /**
  256. * Async task to write pending words to the database so that it stays in sync with
  257. * the in-memory trie.
  258. */
  259. private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
  260. private final HashSet<Bigram> mMap;
  261. private final DatabaseHelper mDbHelper;
  262. private final String mLocale;
  263. public UpdateDbTask(Context context, DatabaseHelper openHelper,
  264. HashSet<Bigram> pendingWrites, String locale) {
  265. mMap = pendingWrites;
  266. mLocale = locale;
  267. mDbHelper = openHelper;
  268. }
  269. /** Prune any old data if the database is getting too big. */
  270. private void checkPruneData(SQLiteDatabase db) {
  271. db.execSQL("PRAGMA foreign_keys = ON;");
  272. Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID },
  273. null, null, null, null, null);
  274. try {
  275. int totalRowCount = c.getCount();
  276. // prune out old data if we have too much data
  277. if (totalRowCount > sMaxUserBigrams) {
  278. int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams;
  279. int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID);
  280. c.moveToFirst();
  281. int count = 0;
  282. while (count < numDeleteRows && !c.isAfterLast()) {
  283. String pairId = c.getString(pairIdColumnId);
  284. // Deleting from MAIN table will delete the frequencies
  285. // due to FOREIGN KEY .. ON DELETE CASCADE
  286. db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?",
  287. new String[] { pairId });
  288. c.moveToNext();
  289. count++;
  290. }
  291. }
  292. } finally {
  293. c.close();
  294. }
  295. }
  296. @Override
  297. protected void onPreExecute() {
  298. sUpdatingDB = true;
  299. }
  300. @Override
  301. protected Void doInBackground(Void... v) {
  302. SQLiteDatabase db = mDbHelper.getWritableDatabase();
  303. db.execSQL("PRAGMA foreign_keys = ON;");
  304. // Write all the entries to the db
  305. Iterator<Bigram> iterator = mMap.iterator();
  306. while (iterator.hasNext()) {
  307. Bigram bi = iterator.next();
  308. // find pair id
  309. Cursor c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
  310. MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
  311. + MAIN_COLUMN_LOCALE + "=?",
  312. new String[] { bi.word1, bi.word2, mLocale }, null, null, null);
  313. int pairId;
  314. if (c.moveToFirst()) {
  315. // existing pair
  316. pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
  317. db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
  318. new String[] { Integer.toString(pairId) });
  319. } else {
  320. // new pair
  321. Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
  322. getContentValues(bi.word1, bi.word2, mLocale));
  323. pairId = pairIdLong.intValue();
  324. }
  325. c.close();
  326. // insert new frequency
  327. db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, bi.frequency));
  328. }
  329. checkPruneData(db);
  330. sUpdatingDB = false;
  331. return null;
  332. }
  333. private ContentValues getContentValues(String word1, String word2, String locale) {
  334. ContentValues values = new ContentValues(3);
  335. values.put(MAIN_COLUMN_WORD1, word1);
  336. values.put(MAIN_COLUMN_WORD2, word2);
  337. values.put(MAIN_COLUMN_LOCALE, locale);
  338. return values;
  339. }
  340. private ContentValues getFrequencyContentValues(int pairId, int frequency) {
  341. ContentValues values = new ContentValues(2);
  342. values.put(FREQ_COLUMN_PAIR_ID, pairId);
  343. values.put(FREQ_COLUMN_FREQUENCY, frequency);
  344. return values;
  345. }
  346. }
  347. }