PageRenderTime 26ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/third_party/WebKit/Source/platform/fonts/ScriptRunIterator.cpp

https://gitlab.com/0072016/Facebook-SDK-
C++ | 378 lines | 256 code | 43 blank | 79 comment | 77 complexity | 478874dfc779a713a752e6807f7455a1 MD5 | raw file
  1. // Copyright 2015 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include "ScriptRunIterator.h"
  5. #include "platform/Logging.h"
  6. #include "wtf/Threading.h"
  7. #include <algorithm>
  8. namespace blink {
  9. typedef ScriptData::PairedBracketType PairedBracketType;
  10. const int ScriptData::kMaxScriptCount = 20;
  11. ScriptData::~ScriptData()
  12. {
  13. }
  14. void ICUScriptData::getScripts(UChar32 ch, Vector<UScriptCode>& dst) const
  15. {
  16. UErrorCode status = U_ZERO_ERROR;
  17. // Leave room to insert primary script. It's not strictly necessary but
  18. // it ensures that the result won't ever be greater than kMaxScriptCount,
  19. // which some client someday might expect.
  20. dst.resize(kMaxScriptCount - 1);
  21. // Note, ICU convention is to return the number of available items
  22. // regardless of the capacity passed to the call. So count can be greater
  23. // than dst->size(), if a later version of the unicode data has more
  24. // than kMaxScriptCount items.
  25. int count = uscript_getScriptExtensions(
  26. ch, &dst[0], dst.size(), &status);
  27. if (status == U_BUFFER_OVERFLOW_ERROR) {
  28. // Allow this, we'll just use what we have.
  29. DLOG(ERROR) << "Exceeded maximum script count of " << kMaxScriptCount << " for 0x" << std::hex << ch;
  30. count = dst.size();
  31. status = U_ZERO_ERROR;
  32. }
  33. UScriptCode primaryScript = uscript_getScript(ch, &status);
  34. if (U_FAILURE(status)) {
  35. DLOG(ERROR) << "Could not get icu script data: " << status << " for 0x" << std::hex << ch;
  36. dst.clear();
  37. return;
  38. }
  39. dst.resize(count);
  40. if (primaryScript == dst.at(0)) {
  41. // Only one script (might be common or inherited -- these are never in
  42. // the extensions unless they're the only script), or extensions are in
  43. // priority order already.
  44. return;
  45. }
  46. if (primaryScript != USCRIPT_INHERITED
  47. && primaryScript != USCRIPT_COMMON
  48. && primaryScript != USCRIPT_INVALID_CODE) {
  49. // Not common or primary, with extensions that are not in order. We know
  50. // the primary, so we insert it at the front and swap the previous front
  51. // to somewhere else in the list.
  52. auto it = std::find(dst.begin() + 1, dst.end(), primaryScript);
  53. if (it == dst.end()) {
  54. dst.append(primaryScript);
  55. }
  56. std::swap(*dst.begin(), *it);
  57. return;
  58. }
  59. if (primaryScript == USCRIPT_COMMON) {
  60. if (count == 1) {
  61. // Common with a preferred script. Keep common at head.
  62. dst.prepend(primaryScript);
  63. return;
  64. }
  65. // Ignore common. Find the preferred script of the multiple scripts that
  66. // remain, and ensure it is at the head. Just keep swapping them in,
  67. // there aren't likely to be many.
  68. for (size_t i = 1; i < dst.size(); ++i) {
  69. if (dst.at(0) == USCRIPT_LATIN || dst.at(i) < dst.at(0)) {
  70. std::swap(dst.at(0), dst.at(i));
  71. }
  72. }
  73. return;
  74. }
  75. // The primary is inherited, and there are other scripts. Put inherited at
  76. // the front, the true primary next, and then the others in random order.
  77. // TODO: Take into account the language of a document if available.
  78. // Otherwise, use Unicode block as a tie breaker. Comparing
  79. // ScriptCodes as integers is not meaningful because 'old' scripts are
  80. // just sorted in alphabetic order.
  81. dst.append(dst.at(0));
  82. dst.at(0) = primaryScript;
  83. for (size_t i = 2; i < dst.size(); ++i) {
  84. if (dst.at(1) == USCRIPT_LATIN || dst.at(i) < dst.at(1)) {
  85. std::swap(dst.at(1), dst.at(i));
  86. }
  87. }
  88. }
  89. UChar32 ICUScriptData::getPairedBracket(UChar32 ch) const
  90. {
  91. return u_getBidiPairedBracket(ch);
  92. }
  93. PairedBracketType ICUScriptData::getPairedBracketType(UChar32 ch) const
  94. {
  95. return static_cast<PairedBracketType>(
  96. u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE));
  97. }
  98. const ICUScriptData* ICUScriptData::instance()
  99. {
  100. DEFINE_THREAD_SAFE_STATIC_LOCAL(const ICUScriptData, icuScriptDataInstance, (new ICUScriptData()));
  101. return &icuScriptDataInstance;
  102. }
  103. ScriptRunIterator::ScriptRunIterator(const UChar* text, size_t length, const ScriptData* data)
  104. : m_text(text)
  105. , m_length(length)
  106. , m_bracketsFixupDepth(0)
  107. // The initial value of m_aheadCharacter is not used.
  108. , m_aheadCharacter(0)
  109. , m_aheadPos(0)
  110. , m_commonPreferred(USCRIPT_COMMON)
  111. , m_scriptData(data)
  112. {
  113. ASSERT(text);
  114. ASSERT(data);
  115. if (m_aheadPos < m_length) {
  116. m_currentSet.clear();
  117. // Priming the m_currentSet with USCRIPT_COMMON here so that the first
  118. // resolution between m_currentSet and m_nextSet in mergeSets() leads to
  119. // chosing the script of the first consumed character.
  120. m_currentSet.append(USCRIPT_COMMON);
  121. U16_NEXT(m_text, m_aheadPos, m_length, m_aheadCharacter);
  122. m_scriptData->getScripts(m_aheadCharacter, m_aheadSet);
  123. }
  124. }
  125. ScriptRunIterator::ScriptRunIterator(const UChar* text, size_t length)
  126. : ScriptRunIterator(text, length, ICUScriptData::instance())
  127. {
  128. }
  129. bool ScriptRunIterator::consume(unsigned& limit, UScriptCode& script)
  130. {
  131. if (m_currentSet.isEmpty()) {
  132. return false;
  133. }
  134. size_t pos;
  135. UChar32 ch;
  136. while (fetch(&pos, &ch)) {
  137. PairedBracketType pairedType = m_scriptData->getPairedBracketType(ch);
  138. switch (pairedType) {
  139. case PairedBracketType::BracketTypeOpen:
  140. openBracket(ch);
  141. break;
  142. case PairedBracketType::BracketTypeClose:
  143. closeBracket(ch);
  144. break;
  145. default:
  146. break;
  147. }
  148. if (!mergeSets()) {
  149. limit = pos;
  150. script = resolveCurrentScript();
  151. fixupStack(script);
  152. m_currentSet = m_nextSet;
  153. return true;
  154. }
  155. }
  156. limit = m_length;
  157. script = resolveCurrentScript();
  158. m_currentSet.clear();
  159. return true;
  160. }
  161. void ScriptRunIterator::openBracket(UChar32 ch)
  162. {
  163. if (m_brackets.size() == kMaxBrackets) {
  164. m_brackets.removeFirst();
  165. if (m_bracketsFixupDepth == kMaxBrackets) {
  166. --m_bracketsFixupDepth;
  167. }
  168. }
  169. m_brackets.append(BracketRec({ ch, USCRIPT_COMMON }));
  170. ++m_bracketsFixupDepth;
  171. }
  172. void ScriptRunIterator::closeBracket(UChar32 ch)
  173. {
  174. if (m_brackets.size() > 0) {
  175. UChar32 target = m_scriptData->getPairedBracket(ch);
  176. for (auto it = m_brackets.rbegin(); it != m_brackets.rend(); ++it) {
  177. if (it->ch == target) {
  178. // Have a match, use open paren's resolved script.
  179. UScriptCode script = it->script;
  180. m_nextSet.clear();
  181. m_nextSet.append(script);
  182. // And pop stack to this point.
  183. int numPopped = std::distance(m_brackets.rbegin(), it);
  184. // TODO: No resize operation in WTF::Deque?
  185. for (int i = 0; i < numPopped; ++i)
  186. m_brackets.removeLast();
  187. m_bracketsFixupDepth = std::max(static_cast<size_t>(0),
  188. m_bracketsFixupDepth - numPopped);
  189. return;
  190. }
  191. }
  192. }
  193. // leave stack alone, no match
  194. }
  195. // Keep items in m_currentSet that are in m_nextSet.
  196. //
  197. // If the sets are disjoint, return false and leave m_currentSet unchanged. Else
  198. // return true and make current set the intersection. Make sure to maintain
  199. // current priority script as priority if it remains, else retain next priority
  200. // script if it remains.
  201. //
  202. // Also maintain a common preferred script. If current and next are both
  203. // common, and there is no common preferred script and next has a preferred
  204. // script, set the common preferred script to that of next.
  205. bool ScriptRunIterator::mergeSets()
  206. {
  207. if (m_nextSet.isEmpty() || m_currentSet.isEmpty()) {
  208. return false;
  209. }
  210. auto currentSetIt = m_currentSet.begin();
  211. auto currentEnd = m_currentSet.end();
  212. // Most of the time, this is the only one.
  213. // Advance the current iterator, we won't need to check it again later.
  214. UScriptCode priorityScript = *currentSetIt++;
  215. // If next is common or inherited, the only thing that might change
  216. // is the common preferred script.
  217. if (m_nextSet.at(0) <= USCRIPT_INHERITED) {
  218. if (m_nextSet.size() == 2 && priorityScript <= USCRIPT_INHERITED && m_commonPreferred == USCRIPT_COMMON) {
  219. m_commonPreferred = m_nextSet.at(1);
  220. }
  221. return true;
  222. }
  223. // If current is common or inherited, use the next script set.
  224. if (priorityScript <= USCRIPT_INHERITED) {
  225. m_currentSet = m_nextSet;
  226. return true;
  227. }
  228. // Neither is common or inherited. If current is a singleton,
  229. // just see if it exists in the next set. This is the common case.
  230. auto next_it = m_nextSet.begin();
  231. auto next_end = m_nextSet.end();
  232. if (currentSetIt == currentEnd) {
  233. return std::find(next_it, next_end, priorityScript) != next_end;
  234. }
  235. // Establish the priority script, if we have one.
  236. // First try current priority script.
  237. bool havePriority = std::find(next_it, next_end, priorityScript)
  238. != next_end;
  239. if (!havePriority) {
  240. // So try next priority script.
  241. // Skip the first current script, we already know it's not there.
  242. // Advance the next iterator, later we won't need to check it again.
  243. priorityScript = *next_it++;
  244. havePriority = std::find(currentSetIt, currentEnd, priorityScript) != currentEnd;
  245. }
  246. // Note that we can never write more scripts into the current vector than
  247. // it already contains, so currentWriteIt won't ever exceed the size/capacity.
  248. auto currentWriteIt = m_currentSet.begin();
  249. if (havePriority) {
  250. // keep the priority script.
  251. *currentWriteIt++ = priorityScript;
  252. }
  253. if (next_it != next_end) {
  254. // Iterate over the remaining current scripts, and keep them if
  255. // they occur in the remaining next scripts.
  256. while (currentSetIt != currentEnd) {
  257. UScriptCode sc = *currentSetIt++;
  258. if (std::find(next_it, next_end, sc) != next_end) {
  259. *currentWriteIt++ = sc;
  260. }
  261. }
  262. }
  263. // Only change current if the run continues.
  264. int written = std::distance(m_currentSet.begin(), currentWriteIt);
  265. if (written > 0) {
  266. m_currentSet.resize(written);
  267. return true;
  268. }
  269. return false;
  270. }
  271. // When we hit the end of the run, and resolve the script, we now know the
  272. // resolved script of any open bracket that was pushed on the stack since
  273. // the start of the run. Fixup depth records how many of these there
  274. // were. We've maintained this count during pushes, and taken care to
  275. // adjust it if the stack got overfull and open brackets were pushed off
  276. // the bottom. This sets the script of the fixup_depth topmost entries of the
  277. // stack to the resolved script.
  278. void ScriptRunIterator::fixupStack(UScriptCode resolvedScript)
  279. {
  280. if (m_bracketsFixupDepth > 0) {
  281. if (m_bracketsFixupDepth > m_brackets.size()) {
  282. // Should never happen unless someone breaks the code.
  283. DLOG(ERROR) << "Brackets fixup depth exceeds size of bracket vector.";
  284. m_bracketsFixupDepth = m_brackets.size();
  285. }
  286. auto it = m_brackets.rbegin();
  287. for (size_t i = 0; i < m_bracketsFixupDepth; ++i) {
  288. it->script = resolvedScript;
  289. ++it;
  290. }
  291. m_bracketsFixupDepth = 0;
  292. }
  293. }
  294. bool ScriptRunIterator::fetch(size_t* pos, UChar32* ch)
  295. {
  296. if (m_aheadPos > m_length) {
  297. return false;
  298. }
  299. *pos = m_aheadPos - (m_aheadCharacter >= 0x10000 ? 2 : 1);
  300. *ch = m_aheadCharacter;
  301. m_nextSet.swap(m_aheadSet);
  302. if (m_aheadPos == m_length) {
  303. // No more data to fetch, but last character still needs to be
  304. // processed. Advance m_aheadPos so that next time we will know
  305. // this has been done.
  306. m_aheadPos++;
  307. return true;
  308. }
  309. U16_NEXT(m_text, m_aheadPos, m_length, m_aheadCharacter);
  310. m_scriptData->getScripts(m_aheadCharacter, m_aheadSet);
  311. if (m_aheadSet.isEmpty()) {
  312. // No scripts for this character. This has already been logged, so
  313. // we just terminate processing this text.
  314. return false;
  315. }
  316. if (m_aheadSet[0] == USCRIPT_INHERITED && m_aheadSet.size() > 1) {
  317. if (m_nextSet[0] == USCRIPT_COMMON) {
  318. // Overwrite the next set with the non-inherited portion of the set.
  319. m_nextSet = m_aheadSet;
  320. m_nextSet.remove(0);
  321. // Discard the remaining values, we'll inherit.
  322. m_aheadSet.resize(1);
  323. } else {
  324. // Else, this applies to anything.
  325. m_aheadSet.resize(1);
  326. }
  327. }
  328. return true;
  329. }
  330. UScriptCode ScriptRunIterator::resolveCurrentScript() const
  331. {
  332. UScriptCode result = m_currentSet.at(0);
  333. return result == USCRIPT_COMMON ? m_commonPreferred : result;
  334. }
  335. } // namespace blink