PageRenderTime 41ms CodeModel.GetById 14ms app.highlight 22ms RepoModel.GetById 1ms app.codeStats 1ms

/ime/latinime/src/com/googlecode/eyesfree/inputmethod/latin/UserBigramDictionary.java

http://eyes-free.googlecode.com/
Java | 401 lines | 268 code | 54 blank | 79 comment | 26 complexity | 056eaa949067341ccf4768e54ad363d5 MD5 | raw file
  1/*
  2 * Copyright (C) 2010 Google Inc.
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  5 * use this file except in compliance with the License. You may obtain a copy of
  6 * the License at
  7 *
  8 * http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 13 * License for the specific language governing permissions and limitations under
 14 * the License.
 15 */
 16
 17package com.googlecode.eyesfree.inputmethod.latin;
 18
 19import android.content.ContentValues;
 20import android.content.Context;
 21import android.database.Cursor;
 22import android.database.sqlite.SQLiteDatabase;
 23import android.database.sqlite.SQLiteOpenHelper;
 24import android.database.sqlite.SQLiteQueryBuilder;
 25import android.os.AsyncTask;
 26import android.provider.BaseColumns;
 27import android.util.Log;
 28
 29import java.util.HashMap;
 30import java.util.HashSet;
 31import java.util.Iterator;
 32
 33/**
 34 * Stores all the pairs user types in databases. Prune the database if the size
 35 * gets too big. Unlike AutoDictionary, it even stores the pairs that are already
 36 * in the dictionary.
 37 */
 38public class UserBigramDictionary extends ExpandableDictionary {
 39    private static final String TAG = "UserBigramDictionary";
 40
 41    /** Any pair being typed or picked */
 42    private static final int FREQUENCY_FOR_TYPED = 2;
 43
 44    /** Maximum frequency for all pairs */
 45    private static final int FREQUENCY_MAX = 127;
 46
 47    /**
 48     * If this pair is typed 6 times, it would be suggested.
 49     * Should be smaller than ContactsDictionary.FREQUENCY_FOR_CONTACTS_BIGRAM
 50     */
 51    protected static final int SUGGEST_THRESHOLD = 6 * FREQUENCY_FOR_TYPED;
 52
 53    /** Maximum number of pairs. Pruning will start when databases goes above this number. */
 54    private static int sMaxUserBigrams = 10000;
 55
 56    /**
 57     * When it hits maximum bigram pair, it will delete until you are left with
 58     * only (sMaxUserBigrams - sDeleteUserBigrams) pairs.
 59     * Do not keep this number small to avoid deleting too often.
 60     */
 61    private static int sDeleteUserBigrams = 1000;
 62
 63    /**
 64     * Database version should increase if the database structure changes
 65     */
 66    private static final int DATABASE_VERSION = 1;
 67
 68    private static final String DATABASE_NAME = "userbigram_dict.db";
 69
 70    /** Name of the words table in the database */
 71    private static final String MAIN_TABLE_NAME = "main";
 72    // TODO: Consume less space by using a unique id for locale instead of the whole
 73    // 2-5 character string. (Same TODO from AutoDictionary)
 74    private static final String MAIN_COLUMN_ID = BaseColumns._ID;
 75    private static final String MAIN_COLUMN_WORD1 = "word1";
 76    private static final String MAIN_COLUMN_WORD2 = "word2";
 77    private static final String MAIN_COLUMN_LOCALE = "locale";
 78
 79    /** Name of the frequency table in the database */
 80    private static final String FREQ_TABLE_NAME = "frequency";
 81    private static final String FREQ_COLUMN_ID = BaseColumns._ID;
 82    private static final String FREQ_COLUMN_PAIR_ID = "pair_id";
 83    private static final String FREQ_COLUMN_FREQUENCY = "freq";
 84
 85    private final LatinIME mIme;
 86
 87    /** Locale for which this auto dictionary is storing words */
 88    private String mLocale;
 89
 90    private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>();
 91    private final Object mPendingWritesLock = new Object();
 92    private static volatile boolean sUpdatingDB = false;
 93
 94    private final static HashMap<String, String> sDictProjectionMap;
 95
 96    static {
 97        sDictProjectionMap = new HashMap<String, String>();
 98        sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID);
 99        sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1);
100        sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2);
101        sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE);
102
103        sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID);
104        sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID);
105        sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY);
106    }
107
108    private static DatabaseHelper sOpenHelper = null;
109
110    private static class Bigram {
111        String word1;
112        String word2;
113        int frequency;
114
115        Bigram(String word1, String word2, int frequency) {
116            this.word1 = word1;
117            this.word2 = word2;
118            this.frequency = frequency;
119        }
120
121        @Override
122        public boolean equals(Object bigram) {
123            Bigram bigram2 = (Bigram) bigram;
124            return (word1.equals(bigram2.word1) && word2.equals(bigram2.word2));
125        }
126
127        @Override
128        public int hashCode() {
129            return (word1 + " " + word2).hashCode();
130        }
131    }
132
133    public void setDatabaseMax(int maxUserBigram) {
134        sMaxUserBigrams = maxUserBigram;
135    }
136
137    public void setDatabaseDelete(int deleteUserBigram) {
138        sDeleteUserBigrams = deleteUserBigram;
139    }
140
141    public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) {
142        super(context, dicTypeId);
143        mIme = ime;
144        mLocale = locale;
145        if (sOpenHelper == null) {
146            sOpenHelper = new DatabaseHelper(getContext());
147        }
148        if (mLocale != null && mLocale.length() > 1) {
149            loadDictionary();
150        }
151    }
152
153    @Override
154    public void close() {
155        flushPendingWrites();
156        // Don't close the database as locale changes will require it to be reopened anyway
157        // Also, the database is written to somewhat frequently, so it needs to be kept alive
158        // throughout the life of the process.
159        // mOpenHelper.close();
160        super.close();
161    }
162
163    /**
164     * Pair will be added to the userbigram database.
165     */
166    public int addBigrams(String word1, String word2) {
167        // remove caps
168        if (mIme != null && mIme.getCurrentWord().isAutoCapitalized()) {
169            word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1);
170        }
171
172        int freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED);
173        if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX;
174        synchronized (mPendingWritesLock) {
175            if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) {
176                mPendingWrites.add(new Bigram(word1, word2, freq));
177            } else {
178                Bigram bi = new Bigram(word1, word2, freq);
179                mPendingWrites.remove(bi);
180                mPendingWrites.add(bi);
181            }
182        }
183
184        return freq;
185    }
186
187    /**
188     * Schedules a background thread to write any pending words to the database.
189     */
190    public void flushPendingWrites() {
191        synchronized (mPendingWritesLock) {
192            // Nothing pending? Return
193            if (mPendingWrites.isEmpty()) return;
194            // Create a background thread to write the pending entries
195            new UpdateDbTask(getContext(), sOpenHelper, mPendingWrites, mLocale).execute();
196            // Create a new map for writing new entries into while the old one is written to db
197            mPendingWrites = new HashSet<Bigram>();
198        }
199    }
200
201    /** Used for testing purpose **/
202    void waitUntilUpdateDBDone() {
203        synchronized (mPendingWritesLock) {
204            while (sUpdatingDB) {
205                try {
206                    Thread.sleep(100);
207                } catch (InterruptedException e) {
208                }
209            }
210            return;
211        }
212    }
213
214    @Override
215    public void loadDictionaryAsync() {
216        // Load the words that correspond to the current input locale
217        Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale });
218        try {
219            if (cursor.moveToFirst()) {
220                int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1);
221                int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2);
222                int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY);
223                while (!cursor.isAfterLast()) {
224                    String word1 = cursor.getString(word1Index);
225                    String word2 = cursor.getString(word2Index);
226                    int frequency = cursor.getInt(frequencyIndex);
227                    // Safeguard against adding really long words. Stack may overflow due
228                    // to recursive lookup
229                    if (word1.length() < MAX_WORD_LENGTH && word2.length() < MAX_WORD_LENGTH) {
230                        super.setBigram(word1, word2, frequency);
231                    }
232                    cursor.moveToNext();
233                }
234            }
235        } finally {
236            cursor.close();
237        }
238    }
239
240    /**
241     * Query the database
242     */
243    private Cursor query(String selection, String[] selectionArgs) {
244        SQLiteQueryBuilder qb = new SQLiteQueryBuilder();
245
246        // main INNER JOIN frequency ON (main._id=freq.pair_id)
247        qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON ("
248                + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "."
249                + FREQ_COLUMN_PAIR_ID +")");
250
251        qb.setProjectionMap(sDictProjectionMap);
252
253        // Get the database and run the query
254        SQLiteDatabase db = sOpenHelper.getReadableDatabase();
255        Cursor c = qb.query(db,
256                new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY },
257                selection, selectionArgs, null, null, null);
258        return c;
259    }
260
261    /**
262     * This class helps open, create, and upgrade the database file.
263     */
264    private static class DatabaseHelper extends SQLiteOpenHelper {
265
266        DatabaseHelper(Context context) {
267            super(context, DATABASE_NAME, null, DATABASE_VERSION);
268        }
269
270        @Override
271        public void onCreate(SQLiteDatabase db) {
272            db.execSQL("PRAGMA foreign_keys = ON;");
273            db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " ("
274                    + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY,"
275                    + MAIN_COLUMN_WORD1 + " TEXT,"
276                    + MAIN_COLUMN_WORD2 + " TEXT,"
277                    + MAIN_COLUMN_LOCALE + " TEXT"
278                    + ");");
279            db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " ("
280                    + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY,"
281                    + FREQ_COLUMN_PAIR_ID + " INTEGER,"
282                    + FREQ_COLUMN_FREQUENCY + " INTEGER,"
283                    + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME
284                    + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE"
285                    + ");");
286        }
287
288        @Override
289        public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) {
290            Log.w(TAG, "Upgrading database from version " + oldVersion + " to "
291                    + newVersion + ", which will destroy all old data");
292            db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME);
293            db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME);
294            onCreate(db);
295        }
296    }
297
298    /**
299     * Async task to write pending words to the database so that it stays in sync with
300     * the in-memory trie.
301     */
302    private static class UpdateDbTask extends AsyncTask<Void, Void, Void> {
303        private final HashSet<Bigram> mMap;
304        private final DatabaseHelper mDbHelper;
305        private final String mLocale;
306
307        public UpdateDbTask(Context context, DatabaseHelper openHelper,
308                HashSet<Bigram> pendingWrites, String locale) {
309            mMap = pendingWrites;
310            mLocale = locale;
311            mDbHelper = openHelper;
312        }
313
314        /** Prune any old data if the database is getting too big. */
315        private void checkPruneData(SQLiteDatabase db) {
316            db.execSQL("PRAGMA foreign_keys = ON;");
317            Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID },
318                    null, null, null, null, null);
319            try {
320                int totalRowCount = c.getCount();
321                // prune out old data if we have too much data
322                if (totalRowCount > sMaxUserBigrams) {
323                    int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams;
324                    int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID);
325                    c.moveToFirst();
326                    int count = 0;
327                    while (count < numDeleteRows && !c.isAfterLast()) {
328                        String pairId = c.getString(pairIdColumnId);
329                        // Deleting from MAIN table will delete the frequencies
330                        // due to FOREIGN KEY .. ON DELETE CASCADE
331                        db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?",
332                            new String[] { pairId });
333                        c.moveToNext();
334                        count++;
335                    }
336                }
337            } finally {
338                c.close();
339            }
340        }
341
342        @Override
343        protected void onPreExecute() {
344            sUpdatingDB = true;
345        }
346
347        @Override
348        protected Void doInBackground(Void... v) {
349            SQLiteDatabase db = mDbHelper.getWritableDatabase();
350            db.execSQL("PRAGMA foreign_keys = ON;");
351            // Write all the entries to the db
352            Iterator<Bigram> iterator = mMap.iterator();
353            while (iterator.hasNext()) {
354                Bigram bi = iterator.next();
355
356                // find pair id
357                Cursor c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID },
358                        MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND "
359                        + MAIN_COLUMN_LOCALE + "=?",
360                        new String[] { bi.word1, bi.word2, mLocale }, null, null, null);
361
362                int pairId;
363                if (c.moveToFirst()) {
364                    // existing pair
365                    pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID));
366                    db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?",
367                            new String[] { Integer.toString(pairId) });
368                } else {
369                    // new pair
370                    Long pairIdLong = db.insert(MAIN_TABLE_NAME, null,
371                            getContentValues(bi.word1, bi.word2, mLocale));
372                    pairId = pairIdLong.intValue();
373                }
374                c.close();
375
376                // insert new frequency
377                db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, bi.frequency));
378            }
379            checkPruneData(db);
380            sUpdatingDB = false;
381
382            return null;
383        }
384
385        private ContentValues getContentValues(String word1, String word2, String locale) {
386            ContentValues values = new ContentValues(3);
387            values.put(MAIN_COLUMN_WORD1, word1);
388            values.put(MAIN_COLUMN_WORD2, word2);
389            values.put(MAIN_COLUMN_LOCALE, locale);
390            return values;
391        }
392
393        private ContentValues getFrequencyContentValues(int pairId, int frequency) {
394           ContentValues values = new ContentValues(2);
395           values.put(FREQ_COLUMN_PAIR_ID, pairId);
396           values.put(FREQ_COLUMN_FREQUENCY, frequency);
397           return values;
398        }
399    }
400
401}