/src/im/gpinyin/include/spellingtable.h

http://ftk.googlecode.com/ · C++ Header · 111 lines · 41 code · 24 blank · 46 comment · 0 complexity · f355e5ae4326c4d3f17c083bbb67054d MD5 · raw file

  1. /*
  2. * Copyright (C) 2009 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef PINYINIME_INCLUDE_SPELLINGTABLE_H__
  17. #define PINYINIME_INCLUDE_SPELLINGTABLE_H__
  18. #include <stdlib.h>
  19. #include "./dictdef.h"
  20. namespace ime_pinyin {
  21. #ifdef ___BUILD_MODEL___
  22. const unsigned kMaxSpellingSize = kMaxPinyinSize;
  23. typedef struct {
  24. char str[kMaxSpellingSize + 1];
  25. double freq;
  26. } RawSpelling, *PRawSpelling;
  27. // This class is used to store the spelling strings
  28. // The length of the input spelling string should be less or equal to the
  29. // spelling_size_ (set by init_table). If the input string is too long,
  30. // we only keep its first spelling_size_ chars.
  31. class SpellingTable {
  32. private:
  33. static const unsigned kNotSupportNum = 3;
  34. static const char kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1];
  35. bool need_score_;
  36. unsigned spelling_max_num_;
  37. RawSpelling *raw_spellings_;
  38. // Used to store spelling strings. If the spelling table needs to calculate
  39. // score, an extra char after each spelling string is the score.
  40. // An item with a lower score has a higher probability.
  41. char *spelling_buf_;
  42. unsigned spelling_size_;
  43. double total_freq_;
  44. unsigned spelling_num_;
  45. double score_amplifier_;
  46. unsigned char average_score_;
  47. // If frozen is true, put_spelling() and contain() are not allowed to call.
  48. bool frozen_;
  49. unsigned get_hash_pos(const char* spelling_str);
  50. unsigned hash_pos_next(unsigned hash_pos);
  51. void free_resource();
  52. public:
  53. SpellingTable();
  54. ~SpellingTable();
  55. // pure_spl_size is the pure maximum spelling string size. For example,
  56. // "zhuang" is the longgest item in Pinyin, so pure_spl_size should be 6.
  57. // spl_max_num is the maximum number of spelling strings to store.
  58. // need_score is used to indicate whether the caller needs to calculate a
  59. // score for each spelling.
  60. bool init_table(unsigned pure_spl_size, unsigned spl_max_num, bool need_score);
  61. // Put a spelling string to the table.
  62. // It always returns false if called after arrange() withtout a new
  63. // init_table() operation.
  64. // freq is the spelling's occuring count.
  65. // If the spelling has been in the table, occuring count will accumulated.
  66. bool put_spelling(const char* spelling_str, double spl_count);
  67. // Test whether a spelling string is in the table.
  68. // It always returns false, when being called after arrange() withtout a new
  69. // init_table() operation.
  70. bool contain(const char* spelling_str);
  71. // Sort the spelling strings and put them from the begin of the buffer.
  72. // Return the pointer of the sorted spelling strings.
  73. // item_size and spl_num return the item size and number of spelling.
  74. // Because each spelling uses a '\0' as terminator, the returned item_size is
  75. // at least one char longer than the spl_size parameter specified by
  76. // init_table(). If the table is initialized to calculate score, item_size
  77. // will be increased by 1, and current_spl_str[item_size - 1] stores an
  78. // unsinged char score.
  79. // An item with a lower score has a higher probability.
  80. // Do not call put_spelling() and contains() after arrange().
  81. const char* arrange(unsigned *item_size, unsigned *spl_num);
  82. float get_score_amplifier();
  83. unsigned char get_average_score();
  84. };
  85. #endif // ___BUILD_MODEL___
  86. }
  87. #endif // PINYINIME_INCLUDE_SPELLINGTABLE_H__