PageRenderTime 69ms CodeModel.GetById 13ms app.highlight 47ms RepoModel.GetById 1ms app.codeStats 1ms

/ime/latinime/src/com/googlecode/eyesfree/inputmethod/latin/ExpandableDictionary.java

http://eyes-free.googlecode.com/
Java | 691 lines | 542 code | 52 blank | 97 comment | 106 complexity | 4b862e0b93d14bf8d7760a1adb2b47b0 MD5 | raw file
  1/*
  2 * Copyright (C) 2009 The Android Open Source Project
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License");
  5 * you may not use this file except in compliance with the License.
  6 * You may obtain a copy of the License at
  7 *
  8 *      http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS,
 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 * See the License for the specific language governing permissions and
 14 * limitations under the License.
 15 */
 16
 17package com.googlecode.eyesfree.inputmethod.latin;
 18
 19import android.content.Context;
 20import android.os.AsyncTask;
 21
 22import java.util.LinkedList;
 23
 24/**
 25 * Base class for an in-memory dictionary that can grow dynamically and can
 26 * be searched for suggestions and valid words.
 27 */
 28public class ExpandableDictionary extends Dictionary {
 29    /**
 30     * There is difference between what java and native code can handle.
 31     * It uses 32 because Java stack overflows when greater value is used.
 32     */
 33    protected static final int MAX_WORD_LENGTH = 32;
 34
 35    private Context mContext;
 36    private char[] mWordBuilder = new char[MAX_WORD_LENGTH];
 37    private int mDicTypeId;
 38    private int mMaxDepth;
 39    private int mInputLength;
 40    private int[] mNextLettersFrequencies;
 41    private StringBuilder sb = new StringBuilder(MAX_WORD_LENGTH);
 42
 43    private static final char QUOTE = '\'';
 44
 45    private boolean mRequiresReload;
 46
 47    private boolean mUpdatingDictionary;
 48
 49    // Use this lock before touching mUpdatingDictionary & mRequiresDownload
 50    private Object mUpdatingLock = new Object();
 51
 52    static class Node {
 53        char code;
 54        int frequency;
 55        boolean terminal;
 56        Node parent;
 57        NodeArray children;
 58        LinkedList<NextWord> ngrams; // Supports ngram
 59    }
 60
 61    static class NodeArray {
 62        Node[] data;
 63        int length = 0;
 64        private static final int INCREMENT = 2;
 65
 66        NodeArray() {
 67            data = new Node[INCREMENT];
 68        }
 69
 70        void add(Node n) {
 71            if (length + 1 > data.length) {
 72                Node[] tempData = new Node[length + INCREMENT];
 73                if (length > 0) {
 74                    System.arraycopy(data, 0, tempData, 0, length);
 75                }
 76                data = tempData;
 77            }
 78            data[length++] = n;
 79        }
 80    }
 81
 82    static class NextWord {
 83        Node word;
 84        NextWord nextWord;
 85        int frequency;
 86
 87        NextWord(Node word, int frequency) {
 88            this.word = word;
 89            this.frequency = frequency;
 90        }
 91    }
 92
 93
 94    private NodeArray mRoots;
 95
 96    private int[][] mCodes;
 97
 98    ExpandableDictionary(Context context, int dicTypeId) {
 99        mContext = context;
100        clearDictionary();
101        mCodes = new int[MAX_WORD_LENGTH][];
102        mDicTypeId = dicTypeId;
103    }
104
105    public void loadDictionary() {
106        synchronized (mUpdatingLock) {
107            startDictionaryLoadingTaskLocked();
108        }
109    }
110
111    public void startDictionaryLoadingTaskLocked() {
112        if (!mUpdatingDictionary) {
113            mUpdatingDictionary = true;
114            mRequiresReload = false;
115            new LoadDictionaryTask().execute();
116        }
117    }
118
119    public void setRequiresReload(boolean reload) {
120        synchronized (mUpdatingLock) {
121            mRequiresReload = reload;
122        }
123    }
124
125    public boolean getRequiresReload() {
126        return mRequiresReload;
127    }
128
129    /** Override to load your dictionary here, on a background thread. */
130    public void loadDictionaryAsync() {
131    }
132
133    Context getContext() {
134        return mContext;
135    }
136    
137    int getMaxWordLength() {
138        return MAX_WORD_LENGTH;
139    }
140
141    public void addWord(String word, int frequency) {
142        addWordRec(mRoots, word, 0, frequency, null);
143    }
144
145    private void addWordRec(NodeArray children, final String word, final int depth,
146            final int frequency, Node parentNode) {
147        final int wordLength = word.length();
148        final char c = word.charAt(depth);
149        // Does children have the current character?
150        final int childrenLength = children.length;
151        Node childNode = null;
152        boolean found = false;
153        for (int i = 0; i < childrenLength; i++) {
154            childNode = children.data[i];
155            if (childNode.code == c) {
156                found = true;
157                break;
158            }
159        }
160        if (!found) {
161            childNode = new Node();
162            childNode.code = c;
163            childNode.parent = parentNode;
164            children.add(childNode);
165        }
166        if (wordLength == depth + 1) {
167            // Terminate this word
168            childNode.terminal = true;
169            childNode.frequency = Math.max(frequency, childNode.frequency);
170            if (childNode.frequency > 255) childNode.frequency = 255;
171            return;
172        }
173        if (childNode.children == null) {
174            childNode.children = new NodeArray();
175        }
176        addWordRec(childNode.children, word, depth + 1, frequency, childNode);
177    }
178
179    @Override
180    public void getWords(final WordComposer codes, final WordCallback callback,
181            int[] nextLettersFrequencies) {
182        synchronized (mUpdatingLock) {
183            // If we need to update, start off a background task
184            if (mRequiresReload) startDictionaryLoadingTaskLocked();
185            // Currently updating contacts, don't return any results.
186            if (mUpdatingDictionary) return;
187        }
188
189        mInputLength = codes.size();
190        mNextLettersFrequencies = nextLettersFrequencies;
191        if (mCodes.length < mInputLength) mCodes = new int[mInputLength][];
192        // Cache the codes so that we don't have to lookup an array list
193        for (int i = 0; i < mInputLength; i++) {
194            mCodes[i] = codes.getCodesAt(i);
195        }
196        mMaxDepth = mInputLength * 3;
197        getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback);
198        for (int i = 0; i < mInputLength; i++) {
199            getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback);
200        }
201    }
202
203    @Override
204    public synchronized boolean isValidWord(CharSequence word) {
205        synchronized (mUpdatingLock) {
206            // If we need to update, start off a background task
207            if (mRequiresReload) startDictionaryLoadingTaskLocked();
208            if (mUpdatingDictionary) return false;
209        }
210        final int freq = getWordFrequency(word);
211        return freq > -1;
212    }
213
214    /**
215     * Returns the word's frequency or -1 if not found
216     */
217    public int getWordFrequency(CharSequence word) {
218        Node node = searchNode(mRoots, word, 0, word.length());
219        return (node == null) ? -1 : node.frequency;
220    }
221
222    /**
223     * Recursively traverse the tree for words that match the input. Input consists of
224     * a list of arrays. Each item in the list is one input character position. An input
225     * character is actually an array of multiple possible candidates. This function is not
226     * optimized for speed, assuming that the user dictionary will only be a few hundred words in
227     * size.
228     * @param roots node whose children have to be search for matches
229     * @param codes the input character codes
230     * @param word the word being composed as a possible match
231     * @param depth the depth of traversal - the length of the word being composed thus far
232     * @param completion whether the traversal is now in completion mode - meaning that we've
233     * exhausted the input and we're looking for all possible suffixes.
234     * @param snr current weight of the word being formed
235     * @param inputIndex position in the input characters. This can be off from the depth in 
236     * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
237     * "wouldve", it could be matching "would've", so the depth will be one more than the
238     * inputIndex
239     * @param callback the callback class for adding a word
240     */
241    protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, 
242            final int depth, boolean completion, int snr, int inputIndex, int skipPos,
243            WordCallback callback) {
244        final int count = roots.length;
245        final int codeSize = mInputLength;
246        // Optimization: Prune out words that are too long compared to how much was typed.
247        if (depth > mMaxDepth) {
248            return;
249        }
250        int[] currentChars = null;
251        if (codeSize <= inputIndex) {
252            completion = true;
253        } else {
254            currentChars = mCodes[inputIndex];
255        }
256
257        for (int i = 0; i < count; i++) {
258            final Node node = roots.data[i];
259            final char c = node.code;
260            final char lowerC = toLowerCase(c);
261            final boolean terminal = node.terminal;
262            final NodeArray children = node.children;
263            final int freq = node.frequency;
264            if (completion) {
265                word[depth] = c;
266                if (terminal) {
267                    if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId,
268                                DataType.UNIGRAM)) {
269                        return;
270                    }
271                    // Add to frequency of next letters for predictive correction
272                    if (mNextLettersFrequencies != null && depth >= inputIndex && skipPos < 0
273                            && mNextLettersFrequencies.length > word[inputIndex]) {
274                        mNextLettersFrequencies[word[inputIndex]]++;
275                    }
276                }
277                if (children != null) {
278                    getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
279                            skipPos, callback);
280                }
281            } else if ((c == QUOTE && currentChars[0] != QUOTE) || depth == skipPos) {
282                // Skip the ' and continue deeper
283                word[depth] = c;
284                if (children != null) {
285                    getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 
286                            skipPos, callback);
287                }
288            } else {
289                // Don't use alternatives if we're looking for missing characters
290                final int alternativesSize = skipPos >= 0? 1 : currentChars.length;
291                for (int j = 0; j < alternativesSize; j++) {
292                    final int addedAttenuation = (j > 0 ? 1 : 2);
293                    final int currentChar = currentChars[j];
294                    if (currentChar == -1) {
295                        break;
296                    }
297                    if (currentChar == lowerC || currentChar == c) {
298                        word[depth] = c;
299
300                        if (codeSize == inputIndex + 1) {
301                            if (terminal) {
302                                if (INCLUDE_TYPED_WORD_IF_VALID 
303                                        || !same(word, depth + 1, codes.getTypedWord())) {
304                                    int finalFreq = freq * snr * addedAttenuation;
305                                    if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
306                                    callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
307                                            DataType.UNIGRAM);
308                                }
309                            }
310                            if (children != null) {
311                                getWordsRec(children, codes, word, depth + 1,
312                                        true, snr * addedAttenuation, inputIndex + 1,
313                                        skipPos, callback);
314                            }
315                        } else if (children != null) {
316                            getWordsRec(children, codes, word, depth + 1, 
317                                    false, snr * addedAttenuation, inputIndex + 1,
318                                    skipPos, callback);
319                        }
320                    }
321                }
322            }
323        }
324    }
325
326    protected int setBigram(String word1, String word2, int frequency) {
327        return addOrSetBigram(word1, word2, frequency, false);
328    }
329
330    protected int addBigram(String word1, String word2, int frequency) {
331        return addOrSetBigram(word1, word2, frequency, true);
332    }
333
334    /**
335     * Adds bigrams to the in-memory trie structure that is being used to retrieve any word
336     * @param frequency frequency for this bigrams
337     * @param addFrequency if true, it adds to current frequency
338     * @return returns the final frequency
339     */
340    private int addOrSetBigram(String word1, String word2, int frequency, boolean addFrequency) {
341        Node firstWord = searchWord(mRoots, word1, 0, null);
342        Node secondWord = searchWord(mRoots, word2, 0, null);
343        LinkedList<NextWord> bigram = firstWord.ngrams;
344        if (bigram == null || bigram.size() == 0) {
345            firstWord.ngrams = new LinkedList<NextWord>();
346            bigram = firstWord.ngrams;
347        } else {
348            for (NextWord nw : bigram) {
349                if (nw.word == secondWord) {
350                    if (addFrequency) {
351                        nw.frequency += frequency;
352                    } else {
353                        nw.frequency = frequency;
354                    }
355                    return nw.frequency;
356                }
357            }
358        }
359        NextWord nw = new NextWord(secondWord, frequency);
360        firstWord.ngrams.add(nw);
361        return frequency;
362    }
363
364    /**
365     * Searches for the word and add the word if it does not exist.
366     * @return Returns the terminal node of the word we are searching for.
367     */
368    private Node searchWord(NodeArray children, String word, int depth, Node parentNode) {
369        final int wordLength = word.length();
370        final char c = word.charAt(depth);
371        // Does children have the current character?
372        final int childrenLength = children.length;
373        Node childNode = null;
374        boolean found = false;
375        for (int i = 0; i < childrenLength; i++) {
376            childNode = children.data[i];
377            if (childNode.code == c) {
378                found = true;
379                break;
380            }
381        }
382        if (!found) {
383            childNode = new Node();
384            childNode.code = c;
385            childNode.parent = parentNode;
386            children.add(childNode);
387        }
388        if (wordLength == depth + 1) {
389            // Terminate this word
390            childNode.terminal = true;
391            return childNode;
392        }
393        if (childNode.children == null) {
394            childNode.children = new NodeArray();
395        }
396        return searchWord(childNode.children, word, depth + 1, childNode);
397    }
398
399    // @VisibleForTesting
400    boolean reloadDictionaryIfRequired() {
401        synchronized (mUpdatingLock) {
402            // If we need to update, start off a background task
403            if (mRequiresReload) startDictionaryLoadingTaskLocked();
404            // Currently updating contacts, don't return any results.
405            return mUpdatingDictionary;
406        }
407    }
408
409    private void runReverseLookUp(final CharSequence previousWord, final WordCallback callback) {
410        Node prevWord = searchNode(mRoots, previousWord, 0, previousWord.length());
411        if (prevWord != null && prevWord.ngrams != null) {
412            reverseLookUp(prevWord.ngrams, callback);
413        }
414    }
415
416    @Override
417    public void getBigrams(final WordComposer codes, final CharSequence previousWord,
418            final WordCallback callback, int[] nextLettersFrequencies) {
419        if (!reloadDictionaryIfRequired()) {
420            runReverseLookUp(previousWord, callback);
421        }
422    }
423
424    /**
425     * Used only for testing purposes
426     * This function will wait for loading from database to be done
427     */
428    void waitForDictionaryLoading() {
429        while (mUpdatingDictionary) {
430            try {
431                Thread.sleep(100);
432            } catch (InterruptedException e) {
433            }
434        }
435    }
436
437    /**
438     * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words
439     * through callback.
440     * @param terminalNodes list of terminal nodes we want to add
441     */
442    private void reverseLookUp(LinkedList<NextWord> terminalNodes,
443            final WordCallback callback) {
444        Node node;
445        int freq;
446        for (NextWord nextWord : terminalNodes) {
447            node = nextWord.word;
448            freq = nextWord.frequency;
449            // TODO Not the best way to limit suggestion threshold
450            if (freq >= UserBigramDictionary.SUGGEST_THRESHOLD) {
451                sb.setLength(0);
452                do {
453                    sb.insert(0, node.code);
454                    node = node.parent;
455                } while(node != null);
456
457                // TODO better way to feed char array?
458                callback.addWord(sb.toString().toCharArray(), 0, sb.length(), freq, mDicTypeId,
459                        DataType.BIGRAM);
460            }
461        }
462    }
463
464    /**
465     * Search for the terminal node of the word
466     * @return Returns the terminal node of the word if the word exists
467     */
468    private Node searchNode(final NodeArray children, final CharSequence word, final int offset,
469            final int length) {
470        // TODO Consider combining with addWordRec
471        final int count = children.length;
472        char currentChar = word.charAt(offset);
473        for (int j = 0; j < count; j++) {
474            final Node node = children.data[j];
475            if (node.code == currentChar) {
476                if (offset == length - 1) {
477                    if (node.terminal) {
478                        return node;
479                    }
480                } else {
481                    if (node.children != null) {
482                        Node returnNode = searchNode(node.children, word, offset + 1, length);
483                        if (returnNode != null) return returnNode;
484                    }
485                }
486            }
487        }
488        return null;
489    }
490
491    protected void clearDictionary() {
492        mRoots = new NodeArray();
493    }
494
495    private class LoadDictionaryTask extends AsyncTask<Void, Void, Void> {
496        @Override
497        protected Void doInBackground(Void... v) {
498            loadDictionaryAsync();
499            synchronized (mUpdatingLock) {
500                mUpdatingDictionary = false;
501            }
502            return null;
503        }
504    }
505
506    static char toLowerCase(char c) {
507        if (c < BASE_CHARS.length) {
508            c = BASE_CHARS[c];
509        }
510        if (c >= 'A' && c <= 'Z') {
511            c = (char) (c | 32);
512        } else if (c > 127) {
513            c = Character.toLowerCase(c);
514        }
515        return c;
516    }
517
518    /**
519     * Table mapping most combined Latin, Greek, and Cyrillic characters
520     * to their base characters.  If c is in range, BASE_CHARS[c] == c
521     * if c is not a combined character, or the base character if it
522     * is combined.
523     */
524    static final char BASE_CHARS[] = {
525        0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 
526        0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 
527        0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 
528        0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 
529        0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 
530        0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 
531        0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 
532        0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 
533        0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 
534        0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 
535        0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 
536        0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 
537        0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 
538        0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 
539        0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 
540        0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 
541        0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 
542        0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 
543        0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 
544        0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 
545        0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 
546        0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, 
547        0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, 
548        0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, 
549        0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, 
550        0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 
551        0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, 
552        0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f
553                                                                        // Manually changed df to 73
554        0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, 
555        0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 
556        0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, 
557        0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f
558        0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 
559        0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 
560        0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 
561        0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, 
562        0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, 
563        0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 
564        0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, 
565        0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 
566        0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, 
567        0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, 
568        0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, 
569        0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, 
570        0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, 
571        0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 
572        0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 
573        0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, 
574        0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, 
575        0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, 
576        0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, 
577        0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, 
578        0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, 
579        0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, 
580        0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, 
581        0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, 
582        0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, 
583        0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, 
584        0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, 
585        0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, 
586        0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, 
587        0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, 
588        0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, 
589        0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, 
590        0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 
591        0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, 
592        0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 
593        0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, 
594        0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, 
595        0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, 
596        0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, 
597        0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, 
598        0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 
599        0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, 
600        0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, 
601        0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, 
602        0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, 
603        0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, 
604        0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 
605        0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, 
606        0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 
607        0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, 
608        0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 
609        0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, 
610        0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, 
611        0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, 
612        0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, 
613        0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, 
614        0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, 
615        0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, 
616        0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, 
617        0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, 
618        0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, 
619        0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, 
620        0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, 
621        0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, 
622        0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 
623        0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, 
624        0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 
625        0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, 
626        0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 
627        0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, 
628        0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 
629        0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, 
630        0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, 
631        0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, 
632        0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 
633        0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, 
634        0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 
635        0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 
636        0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, 
637        0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, 
638        0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, 
639        0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, 
640        0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 
641        0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 
642        0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 
643        0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, 
644        0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 
645        0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 
646        0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 
647        0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, 
648        0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, 
649        0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, 
650        0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, 
651        0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, 
652        0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, 
653        0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, 
654        0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 
655        0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, 
656        0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 
657        0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 
658        0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 
659        0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 
660        0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 
661        0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 
662        0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 
663        0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 
664        0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, 
665        0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, 
666        0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, 
667        0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, 
668        0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, 
669        0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, 
670        0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 
671        0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, 
672        0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, 
673        0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, 
674        0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, 
675        0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, 
676        0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, 
677        0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, 
678        0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, 
679        0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, 
680        0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, 
681        0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, 
682        0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, 
683        0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, 
684        0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 
685        0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, 
686    };
687
688    // generated with:
689    // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }'
690
691}