/src/im/gpinyin/include/dictlist.h

http://ftk.googlecode.com/ · C++ Header · 120 lines · 52 code · 28 blank · 40 comment · 0 complexity · cc8511693506997d675270559207a9e0 MD5 · raw file

  1. /*
  2. * Copyright (C) 2009 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef PINYINIME_INCLUDE_DICTLIST_H__
  17. #define PINYINIME_INCLUDE_DICTLIST_H__
  18. #include <stdlib.h>
  19. #include <stdio.h>
  20. #include "./dictdef.h"
  21. #include "./searchutility.h"
  22. #include "./spellingtrie.h"
  23. #include "./utf16char.h"
  24. namespace ime_pinyin {
  25. class DictList {
  26. private:
  27. bool initialized_;
  28. const SpellingTrie *spl_trie_;
  29. // Number of SingCharItem. The first is blank, because id 0 is invalid.
  30. unsigned scis_num_;
  31. char16 *scis_hz_;
  32. SpellingId *scis_splid_;
  33. // The large memory block to store the word list.
  34. char16 *buf_;
  35. // Starting position of those words whose lengths are i+1, counted in
  36. // char16
  37. unsigned start_pos_[kMaxLemmaSize + 1];
  38. unsigned start_id_[kMaxLemmaSize + 1];
  39. int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
  40. bool alloc_resource(unsigned buf_size, unsigned scim_num);
  41. void free_resource();
  42. #ifdef ___BUILD_MODEL___
  43. // Calculate the requsted memory, including the start_pos[] buffer.
  44. unsigned calculate_size(const LemmaEntry *lemma_arr, unsigned lemma_num);
  45. void fill_scis(const SingleCharItem *scis, unsigned scis_num);
  46. // Copy the related content to the inner buffer
  47. // It should be called after calculate_size()
  48. void fill_list(const LemmaEntry *lemma_arr, unsigned lemma_num);
  49. // Find the starting position for the buffer of those 2-character Chinese word
  50. // whose first character is the given Chinese character.
  51. char16* find_pos2_startedbyhz(char16 hz_char);
  52. #endif
  53. // Find the starting position for the buffer of those words whose lengths are
  54. // word_len. The given parameter cmp_func decides how many characters from
  55. // beginning will be used to compare.
  56. char16* find_pos_startedbyhzs(const char16 last_hzs[],
  57. unsigned word_Len,
  58. int (*cmp_func)(const void *, const void *));
  59. public:
  60. DictList();
  61. ~DictList();
  62. bool save_list(FILE *fp);
  63. bool load_list(FILE *fp);
  64. #ifdef ___BUILD_MODEL___
  65. // Init the list from the LemmaEntry array.
  66. // lemma_arr should have been sorted by the hanzi_str, and have been given
  67. // ids from 1
  68. bool init_list(const SingleCharItem *scis, unsigned scis_num,
  69. const LemmaEntry *lemma_arr, unsigned lemma_num);
  70. #endif
  71. // Get the hanzi string for the given id
  72. uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
  73. void convert_to_hanzis(char16 *str, uint16 str_len);
  74. void convert_to_scis_ids(char16 *str, uint16 str_len);
  75. // last_hzs stores the last n Chinese characters history, its length should be
  76. // less or equal than kMaxPredictSize.
  77. // hzs_len specifies the length(<= kMaxPredictSize).
  78. // predict_buf is used to store the result.
  79. // buf_len specifies the buffer length.
  80. // b4_used specifies how many items before predict_buf have been used.
  81. // Returned value is the number of newly added items.
  82. unsigned predict(const char16 last_hzs[], uint16 hzs_len,
  83. NPredictItem *npre_items, unsigned npre_max,
  84. unsigned b4_used);
  85. // If half_splid is a valid half spelling id, return those full spelling
  86. // ids which share this half id.
  87. uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
  88. uint16 *splids, uint16 max_splids);
  89. LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
  90. };
  91. }
  92. #endif // PINYINIME_INCLUDE_DICTLIST_H__