PageRenderTime 77ms CodeModel.GetById 20ms app.highlight 37ms RepoModel.GetById 13ms app.codeStats 1ms

/src/im/gpinyin/include/dictlist.h

http://ftk.googlecode.com/
C++ Header | 120 lines | 52 code | 28 blank | 40 comment | 0 complexity | cc8511693506997d675270559207a9e0 MD5 | raw file
  1/*
  2 * Copyright (C) 2009 The Android Open Source Project
  3 *
  4 * Licensed under the Apache License, Version 2.0 (the "License");
  5 * you may not use this file except in compliance with the License.
  6 * You may obtain a copy of the License at
  7 *
  8 *      http://www.apache.org/licenses/LICENSE-2.0
  9 *
 10 * Unless required by applicable law or agreed to in writing, software
 11 * distributed under the License is distributed on an "AS IS" BASIS,
 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 * See the License for the specific language governing permissions and
 14 * limitations under the License.
 15 */
 16
 17#ifndef PINYINIME_INCLUDE_DICTLIST_H__
 18#define PINYINIME_INCLUDE_DICTLIST_H__
 19
 20#include <stdlib.h>
 21#include <stdio.h>
 22#include "./dictdef.h"
 23#include "./searchutility.h"
 24#include "./spellingtrie.h"
 25#include "./utf16char.h"
 26
 27namespace ime_pinyin {
 28
 29class DictList {
 30 private:
 31  bool initialized_;
 32
 33  const SpellingTrie *spl_trie_;
 34
 35  // Number of SingCharItem. The first is blank, because id 0 is invalid.
 36  unsigned scis_num_;
 37  char16 *scis_hz_;
 38  SpellingId *scis_splid_;
 39
 40  // The large memory block to store the word list.
 41  char16 *buf_;
 42
 43  // Starting position of those words whose lengths are i+1, counted in
 44  // char16
 45  unsigned start_pos_[kMaxLemmaSize + 1];
 46
 47  unsigned start_id_[kMaxLemmaSize + 1];
 48
 49  int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
 50
 51  bool alloc_resource(unsigned buf_size, unsigned scim_num);
 52
 53  void free_resource();
 54
 55#ifdef ___BUILD_MODEL___
 56  // Calculate the requsted memory, including the start_pos[] buffer.
 57  unsigned calculate_size(const LemmaEntry *lemma_arr, unsigned lemma_num);
 58
 59  void fill_scis(const SingleCharItem *scis, unsigned scis_num);
 60
 61  // Copy the related content to the inner buffer
 62  // It should be called after calculate_size()
 63  void fill_list(const LemmaEntry *lemma_arr, unsigned lemma_num);
 64
 65  // Find the starting position for the buffer of those 2-character Chinese word
 66  // whose first character is the given Chinese character.
 67  char16* find_pos2_startedbyhz(char16 hz_char);
 68#endif
 69
 70  // Find the starting position for the buffer of those words whose lengths are
 71  // word_len. The given parameter cmp_func decides how many characters from
 72  // beginning will be used to compare.
 73  char16* find_pos_startedbyhzs(const char16 last_hzs[],
 74                                unsigned word_Len,
 75                                int (*cmp_func)(const void *, const void *));
 76
 77 public:
 78
 79  DictList();
 80  ~DictList();
 81
 82  bool save_list(FILE *fp);
 83  bool load_list(FILE *fp);
 84
 85#ifdef ___BUILD_MODEL___
 86  // Init the list from the LemmaEntry array.
 87  // lemma_arr should have been sorted by the hanzi_str, and have been given
 88  // ids from 1
 89  bool init_list(const SingleCharItem *scis, unsigned scis_num,
 90                 const LemmaEntry *lemma_arr, unsigned lemma_num);
 91#endif
 92
 93  // Get the hanzi string for the given id
 94  uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
 95
 96  void convert_to_hanzis(char16 *str, uint16 str_len);
 97
 98  void convert_to_scis_ids(char16 *str, uint16 str_len);
 99
100  // last_hzs stores the last n Chinese characters history, its length should be
101  // less or equal than kMaxPredictSize.
102  // hzs_len specifies the length(<= kMaxPredictSize).
103  // predict_buf is used to store the result.
104  // buf_len specifies the buffer length.
105  // b4_used specifies how many items before predict_buf have been used.
106  // Returned value is the number of newly added items.
107  unsigned predict(const char16 last_hzs[], uint16 hzs_len,
108                 NPredictItem *npre_items, unsigned npre_max,
109                 unsigned b4_used);
110
111  // If half_splid is a valid half spelling id, return those full spelling
112  // ids which share this half id.
113  uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
114                              uint16 *splids, uint16 max_splids);
115
116  LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
117};
118}
119
120#endif  // PINYINIME_INCLUDE_DICTLIST_H__