PageRenderTime 46ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/external/icu4c/i18n/nortrans.cpp

https://gitlab.com/brian0218/rk3066_r-box_android4.2.2_sdk
C++ | 175 lines | 93 code | 18 blank | 64 comment | 11 complexity | 435d6afcf8f6e4593dc3b5fc771d8479 MD5 | raw file
  1. /*
  2. **********************************************************************
  3. * Copyright (C) 2001-2010, International Business Machines
  4. * Corporation and others. All Rights Reserved.
  5. **********************************************************************
  6. * Date Name Description
  7. * 07/03/01 aliu Creation.
  8. **********************************************************************
  9. */
  10. #include "unicode/utypes.h"
  11. #if !UCONFIG_NO_TRANSLITERATION
  12. #include "unicode/normalizer2.h"
  13. #include "cstring.h"
  14. #include "nortrans.h"
  15. U_NAMESPACE_BEGIN
  16. UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NormalizationTransliterator)
  17. static inline Transliterator::Token cstrToken(const char *s) {
  18. return Transliterator::pointerToken((void *)s);
  19. }
  20. /**
  21. * System registration hook.
  22. */
  23. void NormalizationTransliterator::registerIDs() {
  24. // In the Token, the byte after the NUL is the UNormalization2Mode.
  25. Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFC"),
  26. _create, cstrToken("nfc\0\0"));
  27. Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKC"),
  28. _create, cstrToken("nfkc\0\0"));
  29. Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFD"),
  30. _create, cstrToken("nfc\0\1"));
  31. Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKD"),
  32. _create, cstrToken("nfkc\0\1"));
  33. Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCD"),
  34. _create, cstrToken("nfc\0\2"));
  35. Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCC"),
  36. _create, cstrToken("nfc\0\3"));
  37. Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFC"),
  38. UNICODE_STRING_SIMPLE("NFD"), TRUE);
  39. Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFKC"),
  40. UNICODE_STRING_SIMPLE("NFKD"), TRUE);
  41. Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCC"),
  42. UNICODE_STRING_SIMPLE("NFD"), FALSE);
  43. Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCD"),
  44. UNICODE_STRING_SIMPLE("FCD"), FALSE);
  45. }
  46. /**
  47. * Factory methods
  48. */
  49. Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,
  50. Token context) {
  51. const char *name = (const char *)context.pointer;
  52. UNormalization2Mode mode = (UNormalization2Mode)uprv_strchr(name, 0)[1];
  53. UErrorCode errorCode = U_ZERO_ERROR;
  54. const Normalizer2 *norm2 = Normalizer2::getInstance(NULL, name, mode, errorCode);
  55. if(U_SUCCESS(errorCode)) {
  56. return new NormalizationTransliterator(ID, *norm2);
  57. } else {
  58. return NULL;
  59. }
  60. }
  61. /**
  62. * Constructs a transliterator.
  63. */
  64. NormalizationTransliterator::NormalizationTransliterator(const UnicodeString& id,
  65. const Normalizer2 &norm2) :
  66. Transliterator(id, 0), fNorm2(norm2) {}
  67. /**
  68. * Destructor.
  69. */
  70. NormalizationTransliterator::~NormalizationTransliterator() {
  71. }
  72. /**
  73. * Copy constructor.
  74. */
  75. NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :
  76. Transliterator(o), fNorm2(o.fNorm2) {}
  77. /**
  78. * Transliterator API.
  79. */
  80. Transliterator* NormalizationTransliterator::clone(void) const {
  81. return new NormalizationTransliterator(*this);
  82. }
  83. /**
  84. * Implements {@link Transliterator#handleTransliterate}.
  85. */
  86. void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
  87. UBool isIncremental) const {
  88. // start and limit of the input range
  89. int32_t start = offsets.start;
  90. int32_t limit = offsets.limit;
  91. if(start >= limit) {
  92. return;
  93. }
  94. /*
  95. * Normalize as short chunks at a time as possible even in
  96. * bulk mode, so that styled text is minimally disrupted.
  97. * In incremental mode, a chunk that ends with offsets.limit
  98. * must not be normalized.
  99. *
  100. * If it was known that the input text is not styled, then
  101. * a bulk mode normalization could look like this:
  102. UnicodeString input, normalized;
  103. int32_t length = limit - start;
  104. _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));
  105. input.releaseBuffer(length);
  106. UErrorCode status = U_ZERO_ERROR;
  107. fNorm2.normalize(input, normalized, status);
  108. text.handleReplaceBetween(start, limit, normalized);
  109. int32_t delta = normalized.length() - length;
  110. offsets.contextLimit += delta;
  111. offsets.limit += delta;
  112. offsets.start = limit + delta;
  113. */
  114. UErrorCode errorCode = U_ZERO_ERROR;
  115. UnicodeString segment;
  116. UnicodeString normalized;
  117. UChar32 c = text.char32At(start);
  118. do {
  119. int32_t prev = start;
  120. // Skip at least one character so we make progress.
  121. // c holds the character at start.
  122. segment.remove();
  123. do {
  124. segment.append(c);
  125. start += U16_LENGTH(c);
  126. } while(start < limit && !fNorm2.hasBoundaryBefore(c = text.char32At(start)));
  127. if(start == limit && isIncremental && !fNorm2.hasBoundaryAfter(c)) {
  128. // stop in incremental mode when we reach the input limit
  129. // in case there are additional characters that could change the
  130. // normalization result
  131. start=prev;
  132. break;
  133. }
  134. fNorm2.normalize(segment, normalized, errorCode);
  135. if(U_FAILURE(errorCode)) {
  136. break;
  137. }
  138. if(segment != normalized) {
  139. // replace the input chunk with its normalized form
  140. text.handleReplaceBetween(prev, start, normalized);
  141. // update all necessary indexes accordingly
  142. int32_t delta = normalized.length() - (start - prev);
  143. start += delta;
  144. limit += delta;
  145. }
  146. } while(start < limit);
  147. offsets.start = start;
  148. offsets.contextLimit += limit - offsets.limit;
  149. offsets.limit = limit;
  150. }
  151. U_NAMESPACE_END
  152. #endif /* #if !UCONFIG_NO_TRANSLITERATION */