/src/im/gpinyin/include/searchutility.h

http://ftk.googlecode.com/ · C++ Header · 142 lines · 54 code · 26 blank · 62 comment · 0 complexity · 20abf7dc435bd7e10f222bed413022c3 MD5 · raw file

  1. /*
  2. * Copyright (C) 2009 The Android Open Source Project
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
  17. #define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
  18. #include <stdlib.h>
  19. #include "./spellingtrie.h"
  20. namespace ime_pinyin {
  21. // Type used to identify the size of a pool, such as id pool, etc.
  22. typedef uint16 PoolPosType;
  23. // Type used to identify a parsing mile stone in an atom dictionary.
  24. typedef uint16 MileStoneHandle;
  25. // Type used to express a lemma and its probability score.
  26. typedef struct {
  27. unsigned id:(kLemmaIdSize * 8);
  28. unsigned lma_len:4;
  29. uint16 psb; // The score, the lower psb, the higher possibility.
  30. // For single character items, we may also need Hanzi.
  31. // For multiple characer items, ignore it.
  32. char16 hanzi;
  33. } LmaPsbItem, *PLmaPsbItem;
  34. // LmaPsbItem extended with string.
  35. typedef struct {
  36. LmaPsbItem lpi;
  37. char16 str[kMaxLemmaSize + 1];
  38. } LmaPsbStrItem, *PLmaPsbStrItem;
  39. typedef struct {
  40. float psb;
  41. char16 pre_hzs[kMaxPredictSize];
  42. uint16 his_len; // The length of the history used to do the prediction.
  43. } NPredictItem, *PNPredictItem;
  44. // Parameter structure used to extend in a dictionary. All dictionaries
  45. // receives the same DictExtPara and a dictionary specific MileStoneHandle for
  46. // extending.
  47. //
  48. // When the user inputs a new character, AtomDictBase::extend_dict() will be
  49. // called at least once for each dictionary.
  50. //
  51. // For example, when the user inputs "wm", extend_dict() will be called twice,
  52. // and the DictExtPara parameter are as follows respectively:
  53. // 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1;
  54. // splid_end_split = false; id_start = wa(the first id start with 'w');
  55. // id_num = number of ids starting with 'w'.
  56. // 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1;
  57. // splid_end_split = false; id_start = wa; id_num = number of ids starting with
  58. // 'w'.
  59. //
  60. // For string "women", one of the cases of the DictExtPara parameter is:
  61. // splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"),
  62. // step_no = 4; splid_end_split = false; id_start = men, id_num = 1.
  63. //
  64. typedef struct {
  65. // Spelling ids for extending, there are splids_extended + 1 ids in the
  66. // buffer.
  67. // For a normal lemma, there can only be kMaxLemmaSize spelling ids in max,
  68. // but for a composing phrase, there can kMaxSearchSteps spelling ids.
  69. uint16 splids[kMaxSearchSteps];
  70. // Number of ids that have been used before. splids[splids_extended] is the
  71. // newly added id for the current extension.
  72. uint16 splids_extended;
  73. // The step span of the extension. It is also the size of the string for
  74. // the newly added spelling id.
  75. uint16 ext_len;
  76. // The step number for the current extension. It is also the ending position
  77. // in the input Pinyin string for the substring of spelling ids in splids[].
  78. // For example, when the user inputs "women", step_no = 4.
  79. // This parameter may useful to manage the MileStoneHandle list for each
  80. // step. When the user deletes a character from the string, MileStoneHandle
  81. // objects for the the steps after that character should be reset; when the
  82. // user begins a new string, all MileStoneHandle objects should be reset.
  83. uint16 step_no;
  84. // Indicate whether the newly added spelling ends with a splitting character
  85. bool splid_end_split;
  86. // If the newly added id is a half id, id_start is the first id of the
  87. // corresponding full ids; if the newly added id is a full id, id_start is
  88. // that id.
  89. uint16 id_start;
  90. // If the newly added id is a half id, id_num is the number of corresponding
  91. // ids; if it is a full id, id_num == 1.
  92. uint16 id_num;
  93. }DictExtPara, *PDictExtPara;
  94. bool is_system_lemma(LemmaIdType lma_id);
  95. bool is_user_lemma(LemmaIdType lma_id);
  96. bool is_composing_lemma(LemmaIdType lma_id);
  97. int cmp_lpi_with_psb(const void *p1, const void *p2);
  98. int cmp_lpi_with_unified_psb(const void *p1, const void *p2);
  99. int cmp_lpi_with_id(const void *p1, const void *p2);
  100. int cmp_lpi_with_hanzi(const void *p1, const void *p2);
  101. int cmp_lpsi_with_str(const void *p1, const void *p2);
  102. int cmp_hanzis_1(const void *p1, const void *p2);
  103. int cmp_hanzis_2(const void *p1, const void *p2);
  104. int cmp_hanzis_3(const void *p1, const void *p2);
  105. int cmp_hanzis_4(const void *p1, const void *p2);
  106. int cmp_hanzis_5(const void *p1, const void *p2);
  107. int cmp_hanzis_6(const void *p1, const void *p2);
  108. int cmp_hanzis_7(const void *p1, const void *p2);
  109. int cmp_hanzis_8(const void *p1, const void *p2);
  110. int cmp_npre_by_score(const void *p1, const void *p2);
  111. int cmp_npre_by_hislen_score(const void *p1, const void *p2);
  112. int cmp_npre_by_hanzi_score(const void *p1, const void *p2);
  113. unsigned remove_duplicate_npre(NPredictItem *npre_items, unsigned npre_num);
  114. unsigned align_to_unsigned(unsigned size);
  115. } // namespace
  116. #endif // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__