/googleclient/third_party/icu38/files/source/common/triedict.h

http://o3d.googlecode.com/ · C Header · 389 lines · 98 code · 54 blank · 237 comment · 0 complexity · 01de4a17162656550826a6c7cdcc9760 MD5 · raw file

  1. /**
  2. *******************************************************************************
  3. * Copyright (C) 2006, International Business Machines Corporation and others. *
  4. * All Rights Reserved. *
  5. *******************************************************************************
  6. */
  7. #ifndef TRIEDICT_H
  8. #define TRIEDICT_H
  9. #include "unicode/utypes.h"
  10. #include "unicode/uobject.h"
  11. #include "unicode/utext.h"
  12. struct UEnumeration;
  13. struct UDataSwapper;
  14. struct UDataMemory;
  15. /**
  16. * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
  17. *
  18. * @param ds Pointer to UDataSwapper containing global data about the
  19. * transformation and function pointers for handling primitive
  20. * types.
  21. * @param inData Pointer to the input data to be transformed or examined.
  22. * @param length Length of the data, counting bytes. May be -1 for preflighting.
  23. * If length>=0, then transform the data.
  24. * If length==-1, then only determine the length of the data.
  25. * The length cannot be determined from the data itself for all
  26. * types of data (e.g., not for simple arrays of integers).
  27. * @param outData Pointer to the output data buffer.
  28. * If length>=0 (transformation), then the output buffer must
  29. * have a capacity of at least length.
  30. * If length==-1, then outData will not be used and can be NULL.
  31. * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
  32. * fulfill U_SUCCESS on input.
  33. * @return The actual length of the data.
  34. *
  35. * @see UDataSwapper
  36. */
  37. U_CAPI int32_t U_EXPORT2
  38. triedict_swap(const UDataSwapper *ds,
  39. const void *inData, int32_t length, void *outData,
  40. UErrorCode *pErrorCode);
  41. U_NAMESPACE_BEGIN
  42. class StringEnumeration;
  43. /*******************************************************************
  44. * TrieWordDictionary
  45. */
  46. /**
  47. * <p>TrieWordDictionary is an abstract class that represents a word
  48. * dictionary based on a trie. The base protocol is read-only.
  49. * Subclasses may allow writing.</p>
  50. */
  51. class U_COMMON_API TrieWordDictionary : public UMemory {
  52. public:
  53. /**
  54. * <p>Default constructor.</p>
  55. *
  56. */
  57. TrieWordDictionary();
  58. /**
  59. * <p>Virtual destructor.</p>
  60. */
  61. virtual ~TrieWordDictionary();
  62. /**
  63. * <p>Returns true if the dictionary contains values associated with each word.</p>
  64. */
  65. virtual UBool getValued() const = 0;
  66. /**
  67. * <p>Find dictionary words that match the text.</p>
  68. *
  69. * @param text A UText representing the text. The
  70. * iterator is left after the longest prefix match in the dictionary.
  71. * @param maxLength The maximum number of code units to match.
  72. * @param lengths An array that is filled with the lengths of words that matched.
  73. * @param count Filled with the number of elements output in lengths.
  74. * @param limit The size of the lengths array; this limits the number of words output.
  75. * @param values An array that is filled with the values associated with the matched words.
  76. * @return The number of characters in text that were matched.
  77. */
  78. virtual int32_t matches( UText *text,
  79. int32_t maxLength,
  80. int32_t *lengths,
  81. int &count,
  82. int limit,
  83. uint16_t *values = NULL) const = 0;
  84. /**
  85. * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
  86. *
  87. * @param status A status code recording the success of the call.
  88. * @return A StringEnumeration that will iterate through the whole dictionary.
  89. * The caller is responsible for closing it. The order is unspecified.
  90. */
  91. virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
  92. };
  93. /*******************************************************************
  94. * MutableTrieDictionary
  95. */
  96. /**
  97. * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
  98. * added.</p>
  99. */
  100. struct TernaryNode; // Forwards declaration
  101. class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
  102. private:
  103. /**
  104. * The root node of the trie
  105. * @internal
  106. */
  107. TernaryNode *fTrie;
  108. /**
  109. * A UText for internal use
  110. * @internal
  111. */
  112. UText *fIter;
  113. /**
  114. * A UText for internal use
  115. * @internal
  116. */
  117. UBool fValued;
  118. friend class CompactTrieDictionary; // For fast conversion
  119. public:
  120. /**
  121. * <p>Constructor.</p>
  122. *
  123. * @param median A UChar around which to balance the trie. Ideally, it should
  124. * begin at least one word that is near the median of the set in the dictionary
  125. * @param status A status code recording the success of the call.
  126. * @param containsValue True if the dictionary stores values associated with each word.
  127. */
  128. MutableTrieDictionary( UChar median, UErrorCode &status, UBool containsValue = FALSE );
  129. /**
  130. * <p>Virtual destructor.</p>
  131. */
  132. virtual ~MutableTrieDictionary();
  133. /**
  134. * Indicate whether the MutableTrieDictionary stores values associated with each word
  135. */
  136. void setValued(UBool valued){
  137. fValued = valued;
  138. }
  139. /**
  140. * <p>Returns true if the dictionary contains values associated with each word.</p>
  141. */
  142. virtual UBool getValued() const {
  143. return fValued;
  144. }
  145. /**
  146. * <p>Find dictionary words that match the text.</p>
  147. *
  148. * @param text A UText representing the text. The
  149. * iterator is left after the longest prefix match in the dictionary.
  150. * @param maxLength The maximum number of code units to match.
  151. * @param lengths An array that is filled with the lengths of words that matched.
  152. * @param count Filled with the number of elements output in lengths.
  153. * @param limit The size of the lengths array; this limits the number of words output.
  154. * @param values An array that is filled with the values associated with the matched words.
  155. * @return The number of characters in text that were matched.
  156. */
  157. virtual int32_t matches( UText *text,
  158. int32_t maxLength,
  159. int32_t *lengths,
  160. int &count,
  161. int limit,
  162. uint16_t *values = NULL) const;
  163. /**
  164. * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
  165. *
  166. * @param status A status code recording the success of the call.
  167. * @return A StringEnumeration that will iterate through the whole dictionary.
  168. * The caller is responsible for closing it. The order is unspecified.
  169. */
  170. virtual StringEnumeration *openWords( UErrorCode &status ) const;
  171. /**
  172. * <p>Add one word to the dictionary with an optional associated value.</p>
  173. *
  174. * @param word A UChar buffer containing the word.
  175. * @param length The length of the word.
  176. * @param status The resultant status.
  177. * @param value The nonzero value associated with this word.
  178. */
  179. virtual void addWord( const UChar *word,
  180. int32_t length,
  181. UErrorCode &status,
  182. uint16_t value = 0);
  183. #if 0
  184. /**
  185. * <p>Add all strings from a UEnumeration to the dictionary.</p>
  186. *
  187. * @param words A UEnumeration that will return the desired words.
  188. * @param status The resultant status
  189. */
  190. virtual void addWords( UEnumeration *words, UErrorCode &status );
  191. #endif
  192. protected:
  193. /**
  194. * <p>Search the dictionary for matches.</p>
  195. *
  196. * @param text A UText representing the text. The
  197. * iterator is left after the longest prefix match in the dictionary.
  198. * @param maxLength The maximum number of code units to match.
  199. * @param lengths An array that is filled with the lengths of words that matched.
  200. * @param count Filled with the number of elements output in lengths.
  201. * @param limit The size of the lengths array; this limits the number of words output.
  202. * @param parent The parent of the current node.
  203. * @param pMatched The returned parent node matched the input/
  204. * @param values An array that is filled with the values associated with the matched words.
  205. * @return The number of characters in text that were matched.
  206. */
  207. virtual int32_t search( UText *text,
  208. int32_t maxLength,
  209. int32_t *lengths,
  210. int &count,
  211. int limit,
  212. TernaryNode *&parent,
  213. UBool &pMatched,
  214. uint16_t *values = NULL) const;
  215. private:
  216. /**
  217. * <p>Private constructor. The root node it not allocated.</p>
  218. *
  219. * @param status A status code recording the success of the call.
  220. * @param containsValues True if the dictionary will store a value associated
  221. * with each word added.
  222. */
  223. MutableTrieDictionary( UErrorCode &status, UBool containsValues = false );
  224. };
  225. /*******************************************************************
  226. * CompactTrieDictionary
  227. */
  228. //forward declarations
  229. struct CompactTrieHeader;
  230. struct CompactTrieInfo;
  231. /**
  232. * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
  233. * to save space.</p>
  234. */
  235. class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
  236. private:
  237. /**
  238. * The header of the CompactTrieDictionary which contains all info
  239. */
  240. CompactTrieInfo *fInfo;
  241. /**
  242. * A UBool indicating whether or not we own the fData.
  243. */
  244. UBool fOwnData;
  245. UDataMemory *fUData;
  246. public:
  247. /**
  248. * <p>Construct a dictionary from a UDataMemory.</p>
  249. *
  250. * @param data A pointer to a UDataMemory, which is adopted
  251. * @param status A status code giving the result of the constructor
  252. */
  253. CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
  254. /**
  255. * <p>Construct a dictionary from raw saved data.</p>
  256. *
  257. * @param data A pointer to the raw data, which is still owned by the caller
  258. * @param status A status code giving the result of the constructor
  259. */
  260. CompactTrieDictionary(const void *dataObj, UErrorCode &status);
  261. /**
  262. * <p>Construct a dictionary from a MutableTrieDictionary.</p>
  263. *
  264. * @param dict The dictionary to use as input.
  265. * @param status A status code recording the success of the call.
  266. */
  267. CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
  268. /**
  269. * <p>Virtual destructor.</p>
  270. */
  271. virtual ~CompactTrieDictionary();
  272. /**
  273. * <p>Returns true if the dictionary contains values associated with each word.</p>
  274. */
  275. virtual UBool getValued() const;
  276. /**
  277. * <p>Find dictionary words that match the text.</p>
  278. *
  279. * @param text A UText representing the text. The
  280. * iterator is left after the longest prefix match in the dictionary.
  281. * @param maxLength The maximum number of code units to match.
  282. * @param lengths An array that is filled with the lengths of words that matched.
  283. * @param count Filled with the number of elements output in lengths.
  284. * @param limit The size of the lengths array; this limits the number of words output.
  285. * @param values An array that is filled with the values associated with the matched words.
  286. * @return The number of characters in text that were matched.
  287. */
  288. virtual int32_t matches( UText *text,
  289. int32_t maxLength,
  290. int32_t *lengths,
  291. int &count,
  292. int limit,
  293. uint16_t *values = NULL) const;
  294. /**
  295. * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
  296. *
  297. * @param status A status code recording the success of the call.
  298. * @return A StringEnumeration that will iterate through the whole dictionary.
  299. * The caller is responsible for closing it. The order is unspecified.
  300. */
  301. virtual StringEnumeration *openWords( UErrorCode &status ) const;
  302. /**
  303. * <p>Return the size of the compact data.</p>
  304. *
  305. * @return The size of the dictionary's compact data.
  306. */
  307. virtual uint32_t dataSize() const;
  308. /**
  309. * <p>Return a void * pointer to the (unmanaged) compact data, platform-endian.</p>
  310. *
  311. * @return The data for the compact dictionary, suitable for passing to the
  312. * constructor.
  313. */
  314. virtual const void *data() const;
  315. /**
  316. * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
  317. *
  318. * @param status A status code recording the success of the call.
  319. * @return A MutableTrieDictionary with the same data as this dictionary
  320. */
  321. virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
  322. private:
  323. /**
  324. * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
  325. *
  326. * @param dict The dictionary to convert.
  327. * @param status A status code recording the success of the call.
  328. * @return A single data blob starting with a CompactTrieHeader.
  329. */
  330. static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
  331. UErrorCode &status );
  332. };
  333. U_NAMESPACE_END
  334. /* TRIEDICT_H */
  335. #endif