/common/brkeng.h

https://github.com/CyanogenMod/android_external_icu4c · C Header · 289 lines · 56 code · 41 blank · 192 comment · 0 complexity · 7ade05322961ff9df5419fc0bb33fefb MD5 · raw file

  1. /**
  2. ************************************************************************************
  3. * Copyright (C) 2006-2012, International Business Machines Corporation and others. *
  4. * All Rights Reserved. *
  5. ************************************************************************************
  6. */
  7. #ifndef BRKENG_H
  8. #define BRKENG_H
  9. #include "unicode/utypes.h"
  10. #include "unicode/uobject.h"
  11. #include "unicode/utext.h"
  12. #include "unicode/uscript.h"
  13. U_NAMESPACE_BEGIN
  14. class UnicodeSet;
  15. class UStack;
  16. class DictionaryMatcher;
  17. /*******************************************************************
  18. * LanguageBreakEngine
  19. */
  20. /**
  21. * <p>LanguageBreakEngines implement language-specific knowledge for
  22. * finding text boundaries within a run of characters belonging to a
  23. * specific set. The boundaries will be of a specific kind, e.g. word,
  24. * line, etc.</p>
  25. *
  26. * <p>LanguageBreakEngines should normally be implemented so as to
  27. * be shared between threads without locking.</p>
  28. */
  29. class LanguageBreakEngine : public UMemory {
  30. public:
  31. /**
  32. * <p>Default constructor.</p>
  33. *
  34. */
  35. LanguageBreakEngine();
  36. /**
  37. * <p>Virtual destructor.</p>
  38. */
  39. virtual ~LanguageBreakEngine();
  40. /**
  41. * <p>Indicate whether this engine handles a particular character for
  42. * a particular kind of break.</p>
  43. *
  44. * @param c A character which begins a run that the engine might handle
  45. * @param breakType The type of text break which the caller wants to determine
  46. * @return TRUE if this engine handles the particular character and break
  47. * type.
  48. */
  49. virtual UBool handles(UChar32 c, int32_t breakType) const = 0;
  50. /**
  51. * <p>Find any breaks within a run in the supplied text.</p>
  52. *
  53. * @param text A UText representing the text. The
  54. * iterator is left at the end of the run of characters which the engine
  55. * is capable of handling.
  56. * @param startPos The start of the run within the supplied text.
  57. * @param endPos The end of the run within the supplied text.
  58. * @param reverse Whether the caller is looking for breaks in a reverse
  59. * direction.
  60. * @param breakType The type of break desired, or -1.
  61. * @param foundBreaks An allocated C array of the breaks found, if any
  62. * @return The number of breaks found.
  63. */
  64. virtual int32_t findBreaks( UText *text,
  65. int32_t startPos,
  66. int32_t endPos,
  67. UBool reverse,
  68. int32_t breakType,
  69. UStack &foundBreaks ) const = 0;
  70. };
  71. /*******************************************************************
  72. * LanguageBreakFactory
  73. */
  74. /**
  75. * <p>LanguageBreakFactorys find and return a LanguageBreakEngine
  76. * that can determine breaks for characters in a specific set, if
  77. * such an object can be found.</p>
  78. *
  79. * <p>If a LanguageBreakFactory is to be shared between threads,
  80. * appropriate synchronization must be used; there is none internal
  81. * to the factory.</p>
  82. *
  83. * <p>A LanguageBreakEngine returned by a LanguageBreakFactory can
  84. * normally be shared between threads without synchronization, unless
  85. * the specific subclass of LanguageBreakFactory indicates otherwise.</p>
  86. *
  87. * <p>A LanguageBreakFactory is responsible for deleting any LanguageBreakEngine
  88. * it returns when it itself is deleted, unless the specific subclass of
  89. * LanguageBreakFactory indicates otherwise. Naturally, the factory should
  90. * not be deleted until the LanguageBreakEngines it has returned are no
  91. * longer needed.</p>
  92. */
  93. class LanguageBreakFactory : public UMemory {
  94. public:
  95. /**
  96. * <p>Default constructor.</p>
  97. *
  98. */
  99. LanguageBreakFactory();
  100. /**
  101. * <p>Virtual destructor.</p>
  102. */
  103. virtual ~LanguageBreakFactory();
  104. /**
  105. * <p>Find and return a LanguageBreakEngine that can find the desired
  106. * kind of break for the set of characters to which the supplied
  107. * character belongs. It is up to the set of available engines to
  108. * determine what the sets of characters are.</p>
  109. *
  110. * @param c A character that begins a run for which a LanguageBreakEngine is
  111. * sought.
  112. * @param breakType The kind of text break for which a LanguageBreakEngine is
  113. * sought.
  114. * @return A LanguageBreakEngine with the desired characteristics, or 0.
  115. */
  116. virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType) = 0;
  117. };
  118. /*******************************************************************
  119. * UnhandledEngine
  120. */
  121. /**
  122. * <p>UnhandledEngine is a special subclass of LanguageBreakEngine that
  123. * handles characters that no other LanguageBreakEngine is available to
  124. * handle. It is told the character and the type of break; at its
  125. * discretion it may handle more than the specified character (e.g.,
  126. * the entire script to which that character belongs.</p>
  127. *
  128. * <p>UnhandledEngines may not be shared between threads without
  129. * external synchronization.</p>
  130. */
  131. class UnhandledEngine : public LanguageBreakEngine {
  132. private:
  133. /**
  134. * The sets of characters handled, for each break type
  135. * @internal
  136. */
  137. UnicodeSet *fHandled[4];
  138. public:
  139. /**
  140. * <p>Default constructor.</p>
  141. *
  142. */
  143. UnhandledEngine(UErrorCode &status);
  144. /**
  145. * <p>Virtual destructor.</p>
  146. */
  147. virtual ~UnhandledEngine();
  148. /**
  149. * <p>Indicate whether this engine handles a particular character for
  150. * a particular kind of break.</p>
  151. *
  152. * @param c A character which begins a run that the engine might handle
  153. * @param breakType The type of text break which the caller wants to determine
  154. * @return TRUE if this engine handles the particular character and break
  155. * type.
  156. */
  157. virtual UBool handles(UChar32 c, int32_t breakType) const;
  158. /**
  159. * <p>Find any breaks within a run in the supplied text.</p>
  160. *
  161. * @param text A UText representing the text (TODO: UText). The
  162. * iterator is left at the end of the run of characters which the engine
  163. * is capable of handling.
  164. * @param startPos The start of the run within the supplied text.
  165. * @param endPos The end of the run within the supplied text.
  166. * @param reverse Whether the caller is looking for breaks in a reverse
  167. * direction.
  168. * @param breakType The type of break desired, or -1.
  169. * @param foundBreaks An allocated C array of the breaks found, if any
  170. * @return The number of breaks found.
  171. */
  172. virtual int32_t findBreaks( UText *text,
  173. int32_t startPos,
  174. int32_t endPos,
  175. UBool reverse,
  176. int32_t breakType,
  177. UStack &foundBreaks ) const;
  178. /**
  179. * <p>Tell the engine to handle a particular character and break type.</p>
  180. *
  181. * @param c A character which the engine should handle
  182. * @param breakType The type of text break for which the engine should handle c
  183. */
  184. virtual void handleCharacter(UChar32 c, int32_t breakType);
  185. };
  186. /*******************************************************************
  187. * ICULanguageBreakFactory
  188. */
  189. /**
  190. * <p>ICULanguageBreakFactory is the default LanguageBreakFactory for
  191. * ICU. It creates dictionary-based LanguageBreakEngines from dictionary
  192. * data in the ICU data file.</p>
  193. */
  194. class ICULanguageBreakFactory : public LanguageBreakFactory {
  195. private:
  196. /**
  197. * The stack of break engines created by this factory
  198. * @internal
  199. */
  200. UStack *fEngines;
  201. public:
  202. /**
  203. * <p>Standard constructor.</p>
  204. *
  205. */
  206. ICULanguageBreakFactory(UErrorCode &status);
  207. /**
  208. * <p>Virtual destructor.</p>
  209. */
  210. virtual ~ICULanguageBreakFactory();
  211. /**
  212. * <p>Find and return a LanguageBreakEngine that can find the desired
  213. * kind of break for the set of characters to which the supplied
  214. * character belongs. It is up to the set of available engines to
  215. * determine what the sets of characters are.</p>
  216. *
  217. * @param c A character that begins a run for which a LanguageBreakEngine is
  218. * sought.
  219. * @param breakType The kind of text break for which a LanguageBreakEngine is
  220. * sought.
  221. * @return A LanguageBreakEngine with the desired characteristics, or 0.
  222. */
  223. virtual const LanguageBreakEngine *getEngineFor(UChar32 c, int32_t breakType);
  224. protected:
  225. /**
  226. * <p>Create a LanguageBreakEngine for the set of characters to which
  227. * the supplied character belongs, for the specified break type.</p>
  228. *
  229. * @param c A character that begins a run for which a LanguageBreakEngine is
  230. * sought.
  231. * @param breakType The kind of text break for which a LanguageBreakEngine is
  232. * sought.
  233. * @return A LanguageBreakEngine with the desired characteristics, or 0.
  234. */
  235. virtual const LanguageBreakEngine *loadEngineFor(UChar32 c, int32_t breakType);
  236. /**
  237. * <p>Create a DictionaryMatcher for the specified script and break type.</p>
  238. * @param script An ISO 15924 script code that identifies the dictionary to be
  239. * created.
  240. * @param breakType The kind of text break for which a dictionary is
  241. * sought.
  242. * @return A DictionaryMatcher with the desired characteristics, or NULL.
  243. */
  244. virtual DictionaryMatcher *loadDictionaryMatcherFor(UScriptCode script, int32_t breakType);
  245. };
  246. U_NAMESPACE_END
  247. /* BRKENG_H */
  248. #endif