/src/corelib/tools/qchar.cpp

https://bitbucket.org/ultra_iter/qt-vtl · C++ · 1662 lines · 582 code · 154 blank · 926 comment · 107 complexity · ffd32e982ee13fd4da9e715acce62b5f MD5 · raw file

  1. /****************************************************************************
  2. **
  3. ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
  4. ** All rights reserved.
  5. ** Contact: Nokia Corporation (qt-info@nokia.com)
  6. **
  7. ** This file is part of the QtCore module of the Qt Toolkit.
  8. **
  9. ** $QT_BEGIN_LICENSE:LGPL$
  10. ** GNU Lesser General Public License Usage
  11. ** This file may be used under the terms of the GNU Lesser General Public
  12. ** License version 2.1 as published by the Free Software Foundation and
  13. ** appearing in the file LICENSE.LGPL included in the packaging of this
  14. ** file. Please review the following information to ensure the GNU Lesser
  15. ** General Public License version 2.1 requirements will be met:
  16. ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
  17. **
  18. ** In addition, as a special exception, Nokia gives you certain additional
  19. ** rights. These rights are described in the Nokia Qt LGPL Exception
  20. ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
  21. **
  22. ** GNU General Public License Usage
  23. ** Alternatively, this file may be used under the terms of the GNU General
  24. ** Public License version 3.0 as published by the Free Software Foundation
  25. ** and appearing in the file LICENSE.GPL included in the packaging of this
  26. ** file. Please review the following information to ensure the GNU General
  27. ** Public License version 3.0 requirements will be met:
  28. ** http://www.gnu.org/copyleft/gpl.html.
  29. **
  30. ** Other Usage
  31. ** Alternatively, this file may be used in accordance with the terms and
  32. ** conditions contained in a signed written agreement between you and Nokia.
  33. **
  34. **
  35. **
  36. **
  37. **
  38. ** $QT_END_LICENSE$
  39. **
  40. ****************************************************************************/
  41. // Don't define it while compiling this module, or USERS of Qt will
  42. // not be able to link.
  43. #ifdef QT_NO_CAST_FROM_ASCII
  44. # undef QT_NO_CAST_FROM_ASCII
  45. #endif
  46. #ifdef QT_NO_CAST_TO_ASCII
  47. # undef QT_NO_CAST_TO_ASCII
  48. #endif
  49. #include "qchar.h"
  50. #include "qdatastream.h"
  51. #include "qtextcodec.h"
  52. #include "qunicodetables_p.h"
  53. #include "qunicodetables.cpp"
  54. QT_BEGIN_NAMESPACE
  55. #ifndef QT_NO_CODEC_FOR_C_STRINGS
  56. # ifdef QT_NO_TEXTCODEC
  57. # define QT_NO_CODEC_FOR_C_STRINGS
  58. # endif
  59. #endif
  60. #define FLAG(x) (1 << (x))
  61. /*!
  62. \class QLatin1Char
  63. \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
  64. \ingroup string-processing
  65. This class is only useful to avoid the codec for C strings business
  66. in the QChar(ch) constructor. You can avoid it by writing
  67. QChar(ch, 0).
  68. \sa QChar, QLatin1String, QString
  69. */
  70. /*!
  71. \fn const char QLatin1Char::toLatin1() const
  72. Converts a Latin-1 character to an 8-bit ASCII representation of
  73. the character.
  74. */
  75. /*!
  76. \fn const ushort QLatin1Char::unicode() const
  77. Converts a Latin-1 character to an 16-bit-encoded Unicode representation
  78. of the character.
  79. */
  80. /*!
  81. \fn QLatin1Char::QLatin1Char(char c)
  82. Constructs a Latin-1 character for \a c. This constructor should be
  83. used when the encoding of the input character is known to be Latin-1.
  84. */
  85. /*!
  86. \class QChar
  87. \brief The QChar class provides a 16-bit Unicode character.
  88. \ingroup string-processing
  89. \reentrant
  90. In Qt, Unicode characters are 16-bit entities without any markup
  91. or structure. This class represents such an entity. It is
  92. lightweight, so it can be used everywhere. Most compilers treat
  93. it like a \c{unsigned short}.
  94. QChar provides a full complement of testing/classification
  95. functions, converting to and from other formats, converting from
  96. composed to decomposed Unicode, and trying to compare and
  97. case-convert if you ask it to.
  98. The classification functions include functions like those in the
  99. standard C++ header \<cctype\> (formerly \<ctype.h\>), but
  100. operating on the full range of Unicode characters. They all
  101. return true if the character is a certain type of character;
  102. otherwise they return false. These classification functions are
  103. isNull() (returns true if the character is '\\0'), isPrint()
  104. (true if the character is any sort of printable character,
  105. including whitespace), isPunct() (any sort of punctation),
  106. isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
  107. sort of numeric character, not just 0-9), isLetterOrNumber(), and
  108. isDigit() (decimal digits). All of these are wrappers around
  109. category() which return the Unicode-defined category of each
  110. character.
  111. QChar also provides direction(), which indicates the "natural"
  112. writing direction of this character. The joining() function
  113. indicates how the character joins with its neighbors (needed
  114. mostly for Arabic) and finally hasMirrored(), which indicates
  115. whether the character needs to be mirrored when it is printed in
  116. its "unnatural" writing direction.
  117. Composed Unicode characters (like \aring) can be converted to
  118. decomposed Unicode ("a" followed by "ring above") by using
  119. decomposition().
  120. In Unicode, comparison is not necessarily possible and case
  121. conversion is very difficult at best. Unicode, covering the
  122. "entire" world, also includes most of the world's case and
  123. sorting problems. operator==() and friends will do comparison
  124. based purely on the numeric Unicode value (code point) of the
  125. characters, and toUpper() and toLower() will do case changes when
  126. the character has a well-defined uppercase/lowercase equivalent.
  127. For locale-dependent comparisons, use
  128. QString::localeAwareCompare().
  129. The conversion functions include unicode() (to a scalar),
  130. toLatin1() (to scalar, but converts all non-Latin-1 characters to
  131. 0), row() (gives the Unicode row), cell() (gives the Unicode
  132. cell), digitValue() (gives the integer value of any of the
  133. numerous digit characters), and a host of constructors.
  134. QChar provides constructors and cast operators that make it easy
  135. to convert to and from traditional 8-bit \c{char}s. If you
  136. defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
  137. explained in the QString documentation, you will need to
  138. explicitly call fromAscii() or fromLatin1(), or use QLatin1Char,
  139. to construct a QChar from an 8-bit \c char, and you will need to
  140. call toAscii() or toLatin1() to get the 8-bit value back.
  141. \sa QString, Unicode, QLatin1Char
  142. */
  143. /*!
  144. \enum QChar::UnicodeVersion
  145. Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
  146. introduced a certain character.
  147. \value Unicode_1_1 Version 1.1
  148. \value Unicode_2_0 Version 2.0
  149. \value Unicode_2_1_2 Version 2.1.2
  150. \value Unicode_3_0 Version 3.0
  151. \value Unicode_3_1 Version 3.1
  152. \value Unicode_3_2 Version 3.2
  153. \value Unicode_4_0 Version 4.0
  154. \value Unicode_4_1 Version 4.1
  155. \value Unicode_5_0 Version 5.0
  156. \value Unicode_Unassigned The value is not assigned to any character
  157. in version 5.0 of Unicode.
  158. \sa unicodeVersion()
  159. */
  160. /*!
  161. \enum QChar::Category
  162. This enum maps the Unicode character categories.
  163. The following characters are normative in Unicode:
  164. \value Mark_NonSpacing Unicode class name Mn
  165. \value Mark_SpacingCombining Unicode class name Mc
  166. \value Mark_Enclosing Unicode class name Me
  167. \value Number_DecimalDigit Unicode class name Nd
  168. \value Number_Letter Unicode class name Nl
  169. \value Number_Other Unicode class name No
  170. \value Separator_Space Unicode class name Zs
  171. \value Separator_Line Unicode class name Zl
  172. \value Separator_Paragraph Unicode class name Zp
  173. \value Other_Control Unicode class name Cc
  174. \value Other_Format Unicode class name Cf
  175. \value Other_Surrogate Unicode class name Cs
  176. \value Other_PrivateUse Unicode class name Co
  177. \value Other_NotAssigned Unicode class name Cn
  178. The following categories are informative in Unicode:
  179. \value Letter_Uppercase Unicode class name Lu
  180. \value Letter_Lowercase Unicode class name Ll
  181. \value Letter_Titlecase Unicode class name Lt
  182. \value Letter_Modifier Unicode class name Lm
  183. \value Letter_Other Unicode class name Lo
  184. \value Punctuation_Connector Unicode class name Pc
  185. \value Punctuation_Dash Unicode class name Pd
  186. \value Punctuation_Open Unicode class name Ps
  187. \value Punctuation_Close Unicode class name Pe
  188. \value Punctuation_InitialQuote Unicode class name Pi
  189. \value Punctuation_FinalQuote Unicode class name Pf
  190. \value Punctuation_Other Unicode class name Po
  191. \value Symbol_Math Unicode class name Sm
  192. \value Symbol_Currency Unicode class name Sc
  193. \value Symbol_Modifier Unicode class name Sk
  194. \value Symbol_Other Unicode class name So
  195. \value NoCategory Qt cannot find an appropriate category for the character.
  196. \omitvalue Punctuation_Dask
  197. \sa category()
  198. */
  199. /*!
  200. \enum QChar::Direction
  201. This enum type defines the Unicode direction attributes. See the
  202. \l{http://www.unicode.org/}{Unicode Standard} for a description
  203. of the values.
  204. In order to conform to C/C++ naming conventions "Dir" is prepended
  205. to the codes used in the Unicode Standard.
  206. \value DirAL
  207. \value DirAN
  208. \value DirB
  209. \value DirBN
  210. \value DirCS
  211. \value DirEN
  212. \value DirES
  213. \value DirET
  214. \value DirL
  215. \value DirLRE
  216. \value DirLRO
  217. \value DirNSM
  218. \value DirON
  219. \value DirPDF
  220. \value DirR
  221. \value DirRLE
  222. \value DirRLO
  223. \value DirS
  224. \value DirWS
  225. \sa direction()
  226. */
  227. /*!
  228. \enum QChar::Decomposition
  229. This enum type defines the Unicode decomposition attributes. See
  230. the \l{http://www.unicode.org/}{Unicode Standard} for a
  231. description of the values.
  232. \value NoDecomposition
  233. \value Canonical
  234. \value Circle
  235. \value Compat
  236. \value Final
  237. \value Font
  238. \value Fraction
  239. \value Initial
  240. \value Isolated
  241. \value Medial
  242. \value Narrow
  243. \value NoBreak
  244. \value Small
  245. \value Square
  246. \value Sub
  247. \value Super
  248. \value Vertical
  249. \value Wide
  250. \omitvalue Single
  251. \sa decomposition()
  252. */
  253. /*!
  254. \enum QChar::Joining
  255. This enum type defines the Unicode joining attributes. See the
  256. \l{http://www.unicode.org/}{Unicode Standard} for a description
  257. of the values.
  258. \value Center
  259. \value Dual
  260. \value OtherJoining
  261. \value Right
  262. \sa joining()
  263. */
  264. /*!
  265. \enum QChar::CombiningClass
  266. \internal
  267. This enum type defines names for some of the Unicode combining
  268. classes. See the \l{http://www.unicode.org/}{Unicode Standard}
  269. for a description of the values.
  270. \value Combining_Above
  271. \value Combining_AboveAttached
  272. \value Combining_AboveLeft
  273. \value Combining_AboveLeftAttached
  274. \value Combining_AboveRight
  275. \value Combining_AboveRightAttached
  276. \value Combining_Below
  277. \value Combining_BelowAttached
  278. \value Combining_BelowLeft
  279. \value Combining_BelowLeftAttached
  280. \value Combining_BelowRight
  281. \value Combining_BelowRightAttached
  282. \value Combining_DoubleAbove
  283. \value Combining_DoubleBelow
  284. \value Combining_IotaSubscript
  285. \value Combining_Left
  286. \value Combining_LeftAttached
  287. \value Combining_Right
  288. \value Combining_RightAttached
  289. */
  290. /*!
  291. \enum QChar::SpecialCharacter
  292. \value Null A QChar with this value isNull().
  293. \value Nbsp Non-breaking space.
  294. \value ReplacementCharacter The character shown when a font has no glyph
  295. for a certain codepoint. A special question mark character is often
  296. used. Codecs use this codepoint when input data cannot be
  297. represented in Unicode.
  298. \value ObjectReplacementCharacter Used to represent an object such as an
  299. image when such objects cannot be presented.
  300. \value ByteOrderMark
  301. \value ByteOrderSwapped
  302. \value ParagraphSeparator
  303. \value LineSeparator
  304. \omitvalue null
  305. \omitvalue replacement
  306. \omitvalue byteOrderMark
  307. \omitvalue byteOrderSwapped
  308. \omitvalue nbsp
  309. */
  310. /*!
  311. \fn void QChar::setCell(uchar cell)
  312. \internal
  313. */
  314. /*!
  315. \fn void QChar::setRow(uchar row)
  316. \internal
  317. */
  318. /*!
  319. \fn QChar::QChar()
  320. Constructs a null QChar ('\\0').
  321. \sa isNull()
  322. */
  323. /*!
  324. \fn QChar::QChar(QLatin1Char ch)
  325. Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
  326. */
  327. /*!
  328. \fn QChar::QChar(SpecialCharacter ch)
  329. Constructs a QChar for the predefined character value \a ch.
  330. */
  331. /*!
  332. Constructs a QChar corresponding to ASCII/Latin-1 character \a
  333. ch.
  334. */
  335. QChar::QChar(char ch)
  336. {
  337. #ifndef QT_NO_CODEC_FOR_C_STRINGS
  338. if (QTextCodec::codecForCStrings())
  339. // #####
  340. ucs = QTextCodec::codecForCStrings()->toUnicode(&ch, 1).at(0).unicode();
  341. else
  342. #endif
  343. ucs = uchar(ch);
  344. }
  345. /*!
  346. Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
  347. */
  348. QChar::QChar(uchar ch)
  349. {
  350. #ifndef QT_NO_CODEC_FOR_C_STRINGS
  351. if (QTextCodec::codecForCStrings()) {
  352. // #####
  353. char c = char(ch);
  354. ucs = QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
  355. } else
  356. #endif
  357. ucs = ch;
  358. }
  359. /*!
  360. \fn QChar::QChar(uchar cell, uchar row)
  361. Constructs a QChar for Unicode cell \a cell in row \a row.
  362. \sa cell(), row()
  363. */
  364. /*!
  365. \fn QChar::QChar(ushort code)
  366. Constructs a QChar for the character with Unicode code point \a
  367. code.
  368. */
  369. /*!
  370. \fn QChar::QChar(short code)
  371. Constructs a QChar for the character with Unicode code point \a
  372. code.
  373. */
  374. /*!
  375. \fn QChar::QChar(uint code)
  376. Constructs a QChar for the character with Unicode code point \a
  377. code.
  378. */
  379. /*!
  380. \fn QChar::QChar(int code)
  381. Constructs a QChar for the character with Unicode code point \a
  382. code.
  383. */
  384. /*!
  385. \fn bool QChar::isNull() const
  386. Returns true if the character is the Unicode character 0x0000
  387. ('\\0'); otherwise returns false.
  388. */
  389. /*!
  390. \fn uchar QChar::cell() const
  391. Returns the cell (least significant byte) of the Unicode
  392. character.
  393. \sa row()
  394. */
  395. /*!
  396. \fn uchar QChar::row() const
  397. Returns the row (most significant byte) of the Unicode character.
  398. \sa cell()
  399. */
  400. /*!
  401. Returns true if the character is a printable character; otherwise
  402. returns false. This is any character not of category Cc or Cn.
  403. Note that this gives no indication of whether the character is
  404. available in a particular font.
  405. */
  406. bool QChar::isPrint() const
  407. {
  408. const int test = FLAG(Other_Control) |
  409. FLAG(Other_NotAssigned);
  410. return !(FLAG(qGetProp(ucs)->category) & test);
  411. }
  412. /*!
  413. Returns true if the character is a separator character
  414. (Separator_* categories); otherwise returns false.
  415. */
  416. bool QChar::isSpace() const
  417. {
  418. if(ucs >= 9 && ucs <=13)
  419. return true;
  420. const int test = FLAG(Separator_Space) |
  421. FLAG(Separator_Line) |
  422. FLAG(Separator_Paragraph);
  423. return FLAG(qGetProp(ucs)->category) & test;
  424. }
  425. /*!
  426. Returns true if the character is a mark (Mark_* categories);
  427. otherwise returns false.
  428. See QChar::Category for more information regarding marks.
  429. */
  430. bool QChar::isMark() const
  431. {
  432. const int test = FLAG(Mark_NonSpacing) |
  433. FLAG(Mark_SpacingCombining) |
  434. FLAG(Mark_Enclosing);
  435. return FLAG(qGetProp(ucs)->category) & test;
  436. }
  437. /*!
  438. Returns true if the character is a punctuation mark (Punctuation_*
  439. categories); otherwise returns false.
  440. */
  441. bool QChar::isPunct() const
  442. {
  443. const int test = FLAG(Punctuation_Connector) |
  444. FLAG(Punctuation_Dash) |
  445. FLAG(Punctuation_Open) |
  446. FLAG(Punctuation_Close) |
  447. FLAG(Punctuation_InitialQuote) |
  448. FLAG(Punctuation_FinalQuote) |
  449. FLAG(Punctuation_Other);
  450. return FLAG(qGetProp(ucs)->category) & test;
  451. }
  452. /*!
  453. Returns true if the character is a letter (Letter_* categories);
  454. otherwise returns false.
  455. */
  456. bool QChar::isLetter() const
  457. {
  458. const int test = FLAG(Letter_Uppercase) |
  459. FLAG(Letter_Lowercase) |
  460. FLAG(Letter_Titlecase) |
  461. FLAG(Letter_Modifier) |
  462. FLAG(Letter_Other);
  463. return FLAG(qGetProp(ucs)->category) & test;
  464. }
  465. /*!
  466. Returns true if the character is a number (Number_* categories,
  467. not just 0-9); otherwise returns false.
  468. \sa isDigit()
  469. */
  470. bool QChar::isNumber() const
  471. {
  472. const int test = FLAG(Number_DecimalDigit) |
  473. FLAG(Number_Letter) |
  474. FLAG(Number_Other);
  475. return FLAG(qGetProp(ucs)->category) & test;
  476. }
  477. /*!
  478. Returns true if the character is a letter or number (Letter_* or
  479. Number_* categories); otherwise returns false.
  480. */
  481. bool QChar::isLetterOrNumber() const
  482. {
  483. const int test = FLAG(Letter_Uppercase) |
  484. FLAG(Letter_Lowercase) |
  485. FLAG(Letter_Titlecase) |
  486. FLAG(Letter_Modifier) |
  487. FLAG(Letter_Other) |
  488. FLAG(Number_DecimalDigit) |
  489. FLAG(Number_Letter) |
  490. FLAG(Number_Other);
  491. return FLAG(qGetProp(ucs)->category) & test;
  492. }
  493. /*!
  494. Returns true if the character is a decimal digit
  495. (Number_DecimalDigit); otherwise returns false.
  496. */
  497. bool QChar::isDigit() const
  498. {
  499. return (qGetProp(ucs)->category == Number_DecimalDigit);
  500. }
  501. /*!
  502. Returns true if the character is a symbol (Symbol_* categories);
  503. otherwise returns false.
  504. */
  505. bool QChar::isSymbol() const
  506. {
  507. const int test = FLAG(Symbol_Math) |
  508. FLAG(Symbol_Currency) |
  509. FLAG(Symbol_Modifier) |
  510. FLAG(Symbol_Other);
  511. return FLAG(qGetProp(ucs)->category) & test;
  512. }
  513. /*!
  514. \fn bool QChar::isHighSurrogate() const
  515. Returns true if the QChar is the high part of a utf16 surrogate
  516. (ie. if its code point is between 0xd800 and 0xdbff, inclusive).
  517. */
  518. /*!
  519. \fn bool QChar::isLowSurrogate() const
  520. Returns true if the QChar is the low part of a utf16 surrogate
  521. (ie. if its code point is between 0xdc00 and 0xdfff, inclusive).
  522. */
  523. /*!
  524. \fn static bool QChar::isHighSurrogate(uint ucs4)
  525. \since 4.7
  526. Returns true if the UCS-4-encoded character specified by \a ucs4
  527. is the high part of a utf16 surrogate
  528. (ie. if its code point is between 0xd800 and 0xdbff, inclusive).
  529. */
  530. /*!
  531. \fn static bool QChar::isLowSurrogate(uint ucs4)
  532. \since 4.7
  533. Returns true if the UCS-4-encoded character specified by \a ucs4
  534. is the low part of a utf16 surrogate
  535. (ie. if its code point is between 0xdc00 and 0xdfff, inclusive).
  536. */
  537. /*!
  538. \fn static bool QChar::requiresSurrogates(uint ucs4)
  539. \since 4.7
  540. Returns true if the UCS-4-encoded character specified by \a ucs4
  541. can be split into the high and low parts of a utf16 surrogate
  542. (ie. if its code point is greater than or equals to 0x10000).
  543. */
  544. /*!
  545. \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
  546. Converts a UTF16 surrogate pair with the given \a high and \a low values
  547. to its UCS-4 code point.
  548. */
  549. /*!
  550. \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
  551. Converts a utf16 surrogate pair (\a high, \a low) to its ucs4 code point.
  552. */
  553. /*!
  554. \fn static ushort QChar::highSurrogate(uint ucs4)
  555. Returns the high surrogate value of a ucs4 code point.
  556. The returned result is undefined if \a ucs4 is smaller than 0x10000.
  557. */
  558. /*!
  559. \fn static ushort QChar::lowSurrogate(uint ucs4)
  560. Returns the low surrogate value of a ucs4 code point.
  561. The returned result is undefined if \a ucs4 is smaller than 0x10000.
  562. */
  563. /*!
  564. Returns the numeric value of the digit, or -1 if the character is
  565. not a digit.
  566. */
  567. int QChar::digitValue() const
  568. {
  569. return qGetProp(ucs)->digitValue;
  570. }
  571. /*!
  572. \overload
  573. Returns the numeric value of the digit, specified by the UCS-2-encoded
  574. character, \a ucs2, or -1 if the character is not a digit.
  575. */
  576. int QChar::digitValue(ushort ucs2)
  577. {
  578. return qGetProp(ucs2)->digitValue;
  579. }
  580. /*!
  581. \overload
  582. Returns the numeric value of the digit specified by the UCS-4-encoded
  583. character, \a ucs4, or -1 if the character is not a digit.
  584. */
  585. int QChar::digitValue(uint ucs4)
  586. {
  587. if (ucs4 > UNICODE_LAST_CODEPOINT)
  588. return 0;
  589. return qGetProp(ucs4)->digitValue;
  590. }
  591. /*!
  592. Returns the character's category.
  593. */
  594. QChar::Category QChar::category() const
  595. {
  596. return (QChar::Category) qGetProp(ucs)->category;
  597. }
  598. /*!
  599. \overload
  600. \since 4.3
  601. Returns the category of the UCS-4-encoded character specified by \a ucs4.
  602. */
  603. QChar::Category QChar::category(uint ucs4)
  604. {
  605. if (ucs4 > UNICODE_LAST_CODEPOINT)
  606. return QChar::NoCategory;
  607. return (QChar::Category) qGetProp(ucs4)->category;
  608. }
  609. /*!
  610. \overload
  611. Returns the category of the UCS-2-encoded character specified by \a ucs2.
  612. */
  613. QChar::Category QChar::category(ushort ucs2)
  614. {
  615. return (QChar::Category) qGetProp(ucs2)->category;
  616. }
  617. /*!
  618. Returns the character's direction.
  619. */
  620. QChar::Direction QChar::direction() const
  621. {
  622. return (QChar::Direction) qGetProp(ucs)->direction;
  623. }
  624. /*!
  625. \overload
  626. Returns the direction of the UCS-4-encoded character specified by \a ucs4.
  627. */
  628. QChar::Direction QChar::direction(uint ucs4)
  629. {
  630. if (ucs4 > UNICODE_LAST_CODEPOINT)
  631. return QChar::DirL;
  632. return (QChar::Direction) qGetProp(ucs4)->direction;
  633. }
  634. /*!
  635. \overload
  636. Returns the direction of the UCS-2-encoded character specified by \a ucs2.
  637. */
  638. QChar::Direction QChar::direction(ushort ucs2)
  639. {
  640. return (QChar::Direction) qGetProp(ucs2)->direction;
  641. }
  642. /*!
  643. Returns information about the joining properties of the character
  644. (needed for certain languages such as Arabic).
  645. */
  646. QChar::Joining QChar::joining() const
  647. {
  648. return (QChar::Joining) qGetProp(ucs)->joining;
  649. }
  650. /*!
  651. \overload
  652. Returns information about the joining properties of the UCS-4-encoded
  653. character specified by \a ucs4 (needed for certain languages such as
  654. Arabic).
  655. */
  656. QChar::Joining QChar::joining(uint ucs4)
  657. {
  658. if (ucs4 > UNICODE_LAST_CODEPOINT)
  659. return QChar::OtherJoining;
  660. return (QChar::Joining) qGetProp(ucs4)->joining;
  661. }
  662. /*!
  663. \overload
  664. Returns information about the joining properties of the UCS-2-encoded
  665. character specified by \a ucs2 (needed for certain languages such as
  666. Arabic).
  667. */
  668. QChar::Joining QChar::joining(ushort ucs2)
  669. {
  670. return (QChar::Joining) qGetProp(ucs2)->joining;
  671. }
  672. /*!
  673. Returns true if the character should be reversed if the text
  674. direction is reversed; otherwise returns false.
  675. Same as (ch.mirroredChar() != ch).
  676. \sa mirroredChar()
  677. */
  678. bool QChar::hasMirrored() const
  679. {
  680. return qGetProp(ucs)->mirrorDiff != 0;
  681. }
  682. /*!
  683. \fn bool QChar::isLower() const
  684. Returns true if the character is a lowercase letter, i.e.
  685. category() is Letter_Lowercase.
  686. \sa isUpper(), toLower(), toUpper()
  687. */
  688. /*!
  689. \fn bool QChar::isUpper() const
  690. Returns true if the character is an uppercase letter, i.e.
  691. category() is Letter_Uppercase.
  692. \sa isLower(), toUpper(), toLower()
  693. */
  694. /*!
  695. \fn bool QChar::isTitleCase() const
  696. \since 4.3
  697. Returns true if the character is a titlecase letter, i.e.
  698. category() is Letter_Titlecase.
  699. \sa isLower(), toUpper(), toLower(), toTitleCase()
  700. */
  701. /*!
  702. Returns the mirrored character if this character is a mirrored
  703. character; otherwise returns the character itself.
  704. \sa hasMirrored()
  705. */
  706. QChar QChar::mirroredChar() const
  707. {
  708. return ucs + qGetProp(ucs)->mirrorDiff;
  709. }
  710. /*!
  711. \overload
  712. Returns the mirrored character if the UCS-4-encoded character specified
  713. by \a ucs4 is a mirrored character; otherwise returns the character itself.
  714. \sa hasMirrored()
  715. */
  716. uint QChar::mirroredChar(uint ucs4)
  717. {
  718. if (ucs4 > UNICODE_LAST_CODEPOINT)
  719. return ucs4;
  720. return ucs4 + qGetProp(ucs4)->mirrorDiff;
  721. }
  722. /*!
  723. \overload
  724. Returns the mirrored character if the UCS-2-encoded character specified
  725. by \a ucs2 is a mirrored character; otherwise returns the character itself.
  726. \sa hasMirrored()
  727. */
  728. ushort QChar::mirroredChar(ushort ucs2)
  729. {
  730. return ucs2 + qGetProp(ucs2)->mirrorDiff;
  731. }
  732. enum {
  733. Hangul_SBase = 0xac00,
  734. Hangul_LBase = 0x1100,
  735. Hangul_VBase = 0x1161,
  736. Hangul_TBase = 0x11a7,
  737. Hangul_SCount = 11172,
  738. Hangul_LCount = 19,
  739. Hangul_VCount = 21,
  740. Hangul_TCount = 28,
  741. Hangul_NCount = 21*28
  742. };
  743. // buffer has to have a length of 3. It's needed for Hangul decomposition
  744. static const unsigned short * QT_FASTCALL decompositionHelper
  745. (uint ucs4, int *length, int *tag, unsigned short *buffer)
  746. {
  747. *length = 0;
  748. if (ucs4 > UNICODE_LAST_CODEPOINT)
  749. return 0;
  750. if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
  751. int SIndex = ucs4 - Hangul_SBase;
  752. buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
  753. buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
  754. buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
  755. *length = buffer[2] == Hangul_TBase ? 2 : 3;
  756. *tag = QChar::Canonical;
  757. return buffer;
  758. }
  759. const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
  760. if (index == 0xffff)
  761. return 0;
  762. const unsigned short *decomposition = uc_decomposition_map+index;
  763. *tag = (*decomposition) & 0xff;
  764. *length = (*decomposition) >> 8;
  765. return decomposition+1;
  766. }
  767. /*!
  768. Decomposes a character into its parts. Returns an empty string if
  769. no decomposition exists.
  770. */
  771. QString QChar::decomposition() const
  772. {
  773. return decomposition(ucs);
  774. }
  775. /*!
  776. \overload
  777. Decomposes the UCS-4-encoded character specified by \a ucs4 into its
  778. constituent parts. Returns an empty string if no decomposition exists.
  779. */
  780. QString QChar::decomposition(uint ucs4)
  781. {
  782. unsigned short buffer[3];
  783. int length;
  784. int tag;
  785. const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
  786. return QString::fromUtf16(d, length);
  787. }
  788. /*!
  789. Returns the tag defining the composition of the character. Returns
  790. QChar::Single if no decomposition exists.
  791. */
  792. QChar::Decomposition QChar::decompositionTag() const
  793. {
  794. return decompositionTag(ucs);
  795. }
  796. /*!
  797. \overload
  798. Returns the tag defining the composition of the UCS-4-encoded character
  799. specified by \a ucs4. Returns QChar::Single if no decomposition exists.
  800. */
  801. QChar::Decomposition QChar::decompositionTag(uint ucs4)
  802. {
  803. if (ucs4 > UNICODE_LAST_CODEPOINT)
  804. return QChar::NoDecomposition;
  805. const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
  806. if (index == 0xffff)
  807. return QChar::NoDecomposition;
  808. return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
  809. }
  810. /*!
  811. Returns the combining class for the character as defined in the
  812. Unicode standard. This is mainly useful as a positioning hint for
  813. marks attached to a base character.
  814. The Qt text rendering engine uses this information to correctly
  815. position non-spacing marks around a base character.
  816. */
  817. unsigned char QChar::combiningClass() const
  818. {
  819. return (unsigned char) qGetProp(ucs)->combiningClass;
  820. }
  821. /*!
  822. \overload
  823. Returns the combining class for the UCS-4-encoded character specified by
  824. \a ucs4, as defined in the Unicode standard.
  825. */
  826. unsigned char QChar::combiningClass(uint ucs4)
  827. {
  828. if (ucs4 > UNICODE_LAST_CODEPOINT)
  829. return 0;
  830. return (unsigned char) qGetProp(ucs4)->combiningClass;
  831. }
  832. /*!
  833. \overload
  834. Returns the combining class for the UCS-2-encoded character specified by
  835. \a ucs2, as defined in the Unicode standard.
  836. */
  837. unsigned char QChar::combiningClass(ushort ucs2)
  838. {
  839. return (unsigned char) qGetProp(ucs2)->combiningClass;
  840. }
  841. /*!
  842. Returns the Unicode version that introduced this character.
  843. */
  844. QChar::UnicodeVersion QChar::unicodeVersion() const
  845. {
  846. return (QChar::UnicodeVersion) qGetProp(ucs)->unicodeVersion;
  847. }
  848. /*!
  849. \overload
  850. Returns the Unicode version that introduced the character specified in
  851. its UCS-4-encoded form as \a ucs4.
  852. */
  853. QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4)
  854. {
  855. if (ucs4 > UNICODE_LAST_CODEPOINT)
  856. return QChar::Unicode_Unassigned;
  857. return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
  858. }
  859. /*!
  860. \overload
  861. Returns the Unicode version that introduced the character specified in
  862. its UCS-2-encoded form as \a ucs2.
  863. */
  864. QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2)
  865. {
  866. return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion;
  867. }
  868. /*!
  869. \since 4.8
  870. Returns the most recent supported Unicode version.
  871. */
  872. QChar::UnicodeVersion QChar::currentUnicodeVersion()
  873. {
  874. return UNICODE_DATA_VERSION;
  875. }
  876. /*!
  877. Returns the lowercase equivalent if the character is uppercase or titlecase;
  878. otherwise returns the character itself.
  879. */
  880. QChar QChar::toLower() const
  881. {
  882. const QUnicodeTables::Properties *p = qGetProp(ucs);
  883. if (!p->lowerCaseSpecial)
  884. return ucs + p->lowerCaseDiff;
  885. return ucs;
  886. }
  887. /*!
  888. \overload
  889. Returns the lowercase equivalent of the UCS-4-encoded character specified
  890. by \a ucs4 if the character is uppercase or titlecase; otherwise returns
  891. the character itself.
  892. */
  893. uint QChar::toLower(uint ucs4)
  894. {
  895. if (ucs4 > UNICODE_LAST_CODEPOINT)
  896. return ucs4;
  897. const QUnicodeTables::Properties *p = qGetProp(ucs4);
  898. if (!p->lowerCaseSpecial)
  899. return ucs4 + p->lowerCaseDiff;
  900. return ucs4;
  901. }
  902. /*!
  903. \overload
  904. Returns the lowercase equivalent of the UCS-2-encoded character specified
  905. by \a ucs2 if the character is uppercase or titlecase; otherwise returns
  906. the character itself.
  907. */
  908. ushort QChar::toLower(ushort ucs2)
  909. {
  910. const QUnicodeTables::Properties *p = qGetProp(ucs2);
  911. if (!p->lowerCaseSpecial)
  912. return ucs2 + p->lowerCaseDiff;
  913. return ucs2;
  914. }
  915. /*!
  916. Returns the uppercase equivalent if the character is lowercase or titlecase;
  917. otherwise returns the character itself.
  918. */
  919. QChar QChar::toUpper() const
  920. {
  921. const QUnicodeTables::Properties *p = qGetProp(ucs);
  922. if (!p->upperCaseSpecial)
  923. return ucs + p->upperCaseDiff;
  924. return ucs;
  925. }
  926. /*!
  927. \overload
  928. Returns the uppercase equivalent of the UCS-4-encoded character specified
  929. by \a ucs4 if the character is lowercase or titlecase; otherwise returns
  930. the character itself.
  931. */
  932. uint QChar::toUpper(uint ucs4)
  933. {
  934. if (ucs4 > UNICODE_LAST_CODEPOINT)
  935. return ucs4;
  936. const QUnicodeTables::Properties *p = qGetProp(ucs4);
  937. if (!p->upperCaseSpecial)
  938. return ucs4 + p->upperCaseDiff;
  939. return ucs4;
  940. }
  941. /*!
  942. \overload
  943. Returns the uppercase equivalent of the UCS-2-encoded character specified
  944. by \a ucs2 if the character is lowercase or titlecase; otherwise returns
  945. the character itself.
  946. */
  947. ushort QChar::toUpper(ushort ucs2)
  948. {
  949. const QUnicodeTables::Properties *p = qGetProp(ucs2);
  950. if (!p->upperCaseSpecial)
  951. return ucs2 + p->upperCaseDiff;
  952. return ucs2;
  953. }
  954. /*!
  955. Returns the title case equivalent if the character is lowercase or uppercase;
  956. otherwise returns the character itself.
  957. */
  958. QChar QChar::toTitleCase() const
  959. {
  960. const QUnicodeTables::Properties *p = qGetProp(ucs);
  961. if (!p->titleCaseSpecial)
  962. return ucs + p->titleCaseDiff;
  963. return ucs;
  964. }
  965. /*!
  966. \overload
  967. Returns the title case equivalent of the UCS-4-encoded character specified
  968. by \a ucs4 if the character is lowercase or uppercase; otherwise returns
  969. the character itself.
  970. */
  971. uint QChar::toTitleCase(uint ucs4)
  972. {
  973. if (ucs4 > UNICODE_LAST_CODEPOINT)
  974. return ucs4;
  975. const QUnicodeTables::Properties *p = qGetProp(ucs4);
  976. if (!p->titleCaseSpecial)
  977. return ucs4 + p->titleCaseDiff;
  978. return ucs4;
  979. }
  980. /*!
  981. \overload
  982. Returns the title case equivalent of the UCS-2-encoded character specified
  983. by \a ucs2 if the character is lowercase or uppercase; otherwise returns
  984. the character itself.
  985. */
  986. ushort QChar::toTitleCase(ushort ucs2)
  987. {
  988. const QUnicodeTables::Properties *p = qGetProp(ucs2);
  989. if (!p->titleCaseSpecial)
  990. return ucs2 + p->titleCaseDiff;
  991. return ucs2;
  992. }
  993. static inline uint foldCase(const ushort *ch, const ushort *start)
  994. {
  995. uint c = *ch;
  996. if (QChar(c).isLowSurrogate() && ch > start && QChar(*(ch - 1)).isHighSurrogate())
  997. c = QChar::surrogateToUcs4(*(ch - 1), c);
  998. return *ch + qGetProp(c)->caseFoldDiff;
  999. }
  1000. static inline uint foldCase(uint ch, uint &last)
  1001. {
  1002. uint c = ch;
  1003. if (QChar(c).isLowSurrogate() && QChar(last).isHighSurrogate())
  1004. c = QChar::surrogateToUcs4(last, c);
  1005. last = ch;
  1006. return ch + qGetProp(c)->caseFoldDiff;
  1007. }
  1008. static inline ushort foldCase(ushort ch)
  1009. {
  1010. return ch + qGetProp(ch)->caseFoldDiff;
  1011. }
  1012. /*!
  1013. Returns the case folded equivalent of the character. For most Unicode characters this
  1014. is the same as toLowerCase().
  1015. */
  1016. QChar QChar::toCaseFolded() const
  1017. {
  1018. return ucs + qGetProp(ucs)->caseFoldDiff;
  1019. }
  1020. /*!
  1021. \overload
  1022. Returns the case folded equivalent of the UCS-4-encoded character specified
  1023. by \a ucs4. For most Unicode characters this is the same as toLowerCase().
  1024. */
  1025. uint QChar::toCaseFolded(uint ucs4)
  1026. {
  1027. if (ucs4 > UNICODE_LAST_CODEPOINT)
  1028. return ucs4;
  1029. return ucs4 + qGetProp(ucs4)->caseFoldDiff;
  1030. }
  1031. /*!
  1032. \overload
  1033. Returns the case folded equivalent of the UCS-2-encoded character specified
  1034. by \a ucs2. For most Unicode characters this is the same as toLowerCase().
  1035. */
  1036. ushort QChar::toCaseFolded(ushort ucs2)
  1037. {
  1038. return ucs2 + qGetProp(ucs2)->caseFoldDiff;
  1039. }
  1040. /*!
  1041. \fn char QChar::latin1() const
  1042. Use toLatin1() instead.
  1043. */
  1044. /*!
  1045. \fn char QChar::ascii() const
  1046. Use toAscii() instead.
  1047. */
  1048. /*!
  1049. \fn char QChar::toLatin1() const
  1050. Returns the Latin-1 character equivalent to the QChar, or 0. This
  1051. is mainly useful for non-internationalized software.
  1052. \sa toAscii(), unicode(), QTextCodec::codecForCStrings()
  1053. */
  1054. /*!
  1055. \fn char QChar::toAscii() const
  1056. Returns the character value of the QChar obtained using the current
  1057. codec used to read C strings, or 0 if the character is not representable
  1058. using this codec. The default codec handles Latin-1 encoded text,
  1059. but this can be changed to assist developers writing source code using
  1060. other encodings.
  1061. The main purpose of this function is to preserve ASCII characters used
  1062. in C strings. This is mainly useful for developers of non-internationalized
  1063. software.
  1064. \sa toLatin1(), unicode(), QTextCodec::codecForCStrings()
  1065. */
  1066. #ifdef Q_COMPILER_MANGLES_RETURN_TYPE
  1067. const char QChar::toAscii() const
  1068. #else
  1069. char QChar::toAscii() const
  1070. #endif
  1071. {
  1072. #ifndef QT_NO_CODEC_FOR_C_STRINGS
  1073. if (QTextCodec::codecForCStrings())
  1074. // #####
  1075. return QTextCodec::codecForCStrings()->fromUnicode(QString(*this)).at(0);
  1076. #endif
  1077. return ucs > 0xff ? 0 : char(ucs);
  1078. }
  1079. /*!
  1080. \fn QChar QChar::fromLatin1(char c)
  1081. Converts the Latin-1 character \a c to its equivalent QChar. This
  1082. is mainly useful for non-internationalized software.
  1083. \sa fromAscii(), unicode(), QTextCodec::codecForCStrings()
  1084. */
  1085. /*!
  1086. Converts the ASCII character \a c to its equivalent QChar. This
  1087. is mainly useful for non-internationalized software.
  1088. An alternative is to use QLatin1Char.
  1089. \sa fromLatin1(), unicode(), QTextCodec::codecForCStrings()
  1090. */
  1091. QChar QChar::fromAscii(char c)
  1092. {
  1093. #ifndef QT_NO_CODEC_FOR_C_STRINGS
  1094. if (QTextCodec::codecForCStrings())
  1095. // #####
  1096. return QTextCodec::codecForCStrings()->toUnicode(&c, 1).at(0).unicode();
  1097. #endif
  1098. return QChar(ushort((uchar)c));
  1099. }
  1100. #ifndef QT_NO_DATASTREAM
  1101. /*!
  1102. \relates QChar
  1103. Writes the char \a chr to the stream \a out.
  1104. \sa {Serializing Qt Data Types}
  1105. */
  1106. QDataStream &operator<<(QDataStream &out, const QChar &chr)
  1107. {
  1108. out << quint16(chr.unicode());
  1109. return out;
  1110. }
  1111. /*!
  1112. \relates QChar
  1113. Reads a char from the stream \a in into char \a chr.
  1114. \sa {Serializing Qt Data Types}
  1115. */
  1116. QDataStream &operator>>(QDataStream &in, QChar &chr)
  1117. {
  1118. quint16 u;
  1119. in >> u;
  1120. chr.unicode() = ushort(u);
  1121. return in;
  1122. }
  1123. #endif // QT_NO_DATASTREAM
  1124. /*!
  1125. \fn ushort & QChar::unicode()
  1126. Returns a reference to the numeric Unicode value of the QChar.
  1127. */
  1128. /*!
  1129. \fn ushort QChar::unicode() const
  1130. \overload
  1131. */
  1132. /*****************************************************************************
  1133. Documentation of QChar related functions
  1134. *****************************************************************************/
  1135. /*!
  1136. \fn bool operator==(QChar c1, QChar c2)
  1137. \relates QChar
  1138. Returns true if \a c1 and \a c2 are the same Unicode character;
  1139. otherwise returns false.
  1140. */
  1141. /*!
  1142. \fn int operator!=(QChar c1, QChar c2)
  1143. \relates QChar
  1144. Returns true if \a c1 and \a c2 are not the same Unicode
  1145. character; otherwise returns false.
  1146. */
  1147. /*!
  1148. \fn int operator<=(QChar c1, QChar c2)
  1149. \relates QChar
  1150. Returns true if the numeric Unicode value of \a c1 is less than
  1151. or equal to that of \a c2; otherwise returns false.
  1152. */
  1153. /*!
  1154. \fn int operator>=(QChar c1, QChar c2)
  1155. \relates QChar
  1156. Returns true if the numeric Unicode value of \a c1 is greater than
  1157. or equal to that of \a c2; otherwise returns false.
  1158. */
  1159. /*!
  1160. \fn int operator<(QChar c1, QChar c2)
  1161. \relates QChar
  1162. Returns true if the numeric Unicode value of \a c1 is less than
  1163. that of \a c2; otherwise returns false.
  1164. */
  1165. /*!
  1166. \fn int operator>(QChar c1, QChar c2)
  1167. \relates QChar
  1168. Returns true if the numeric Unicode value of \a c1 is greater than
  1169. that of \a c2; otherwise returns false.
  1170. */
  1171. /*!
  1172. \fn bool QChar::mirrored() const
  1173. Use hasMirrored() instead.
  1174. */
  1175. /*!
  1176. \fn QChar QChar::lower() const
  1177. Use toLower() instead.
  1178. */
  1179. /*!
  1180. \fn QChar QChar::upper() const
  1181. Use toUpper() instead.
  1182. */
  1183. /*!
  1184. \fn bool QChar::networkOrdered()
  1185. See if QSysInfo::ByteOrder == QSysInfo::BigEndian instead.
  1186. */
  1187. // ---------------------------------------------------------------------------
  1188. static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
  1189. {
  1190. unsigned short buffer[3];
  1191. QString &s = *str;
  1192. const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
  1193. const unsigned short *uc = utf16 + s.length();
  1194. while (uc != utf16 + from) {
  1195. uint ucs4 = *(--uc);
  1196. if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
  1197. ushort high = *(uc - 1);
  1198. if (QChar(high).isHighSurrogate()) {
  1199. --uc;
  1200. ucs4 = QChar::surrogateToUcs4(high, ucs4);
  1201. }
  1202. }
  1203. QChar::UnicodeVersion v = QChar::unicodeVersion(ucs4);
  1204. if (v == QChar::Unicode_Unassigned || v > version)
  1205. continue;
  1206. int length;
  1207. int tag;
  1208. const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
  1209. if (!d || (canonical && tag != QChar::Canonical))
  1210. continue;
  1211. int pos = uc - utf16;
  1212. s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
  1213. // since the insert invalidates the pointers and we do decomposition recursive
  1214. utf16 = reinterpret_cast<unsigned short *>(s.data());
  1215. uc = utf16 + pos + length;
  1216. }
  1217. }
  1218. struct UCS2Pair {
  1219. ushort u1;
  1220. ushort u2;
  1221. };
  1222. inline bool operator<(ushort u1, const UCS2Pair &ligature)
  1223. { return u1 < ligature.u1; }
  1224. inline bool operator<(const UCS2Pair &ligature, ushort u1)
  1225. { return ligature.u1 < u1; }
  1226. static ushort ligatureHelper(ushort u1, ushort u2)
  1227. {
  1228. // hangul L-V pair
  1229. int LIndex = u1 - Hangul_LBase;
  1230. if (0 <= LIndex && LIndex < Hangul_LCount) {
  1231. int VIndex = u2 - Hangul_VBase;
  1232. if (0 <= VIndex && VIndex < Hangul_VCount)
  1233. return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
  1234. }
  1235. // hangul LV-T pair
  1236. int SIndex = u1 - Hangul_SBase;
  1237. if (0 <= SIndex && SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
  1238. int TIndex = u2 - Hangul_TBase;
  1239. if (0 <= TIndex && TIndex <= Hangul_TCount)
  1240. return u1 + TIndex;
  1241. }
  1242. const unsigned short index = GET_LIGATURE_INDEX(u2);
  1243. if (index == 0xffff)
  1244. return 0;
  1245. const unsigned short *ligatures = uc_ligature_map+index;
  1246. ushort length = *ligatures++;
  1247. {
  1248. const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures);
  1249. const UCS2Pair *r = qBinaryFind(data, data + length, u1);
  1250. if (r != data + length)
  1251. return r->u2;
  1252. }
  1253. return 0;
  1254. }
  1255. static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
  1256. {
  1257. QString &s = *str;
  1258. if (s.length() - from < 2)
  1259. return;
  1260. // the loop can partly ignore high Unicode as all ligatures are in the BMP
  1261. int starter = 0;
  1262. int lastCombining = 0;
  1263. int pos = from;
  1264. while (pos < s.length()) {
  1265. uint uc = s.at(pos).unicode();
  1266. if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
  1267. ushort low = s.at(pos+1).unicode();
  1268. if (QChar(low).isLowSurrogate()) {
  1269. uc = QChar::surrogateToUcs4(uc, low);
  1270. ++pos;
  1271. }
  1272. }
  1273. const QUnicodeTables::Properties *p = qGetProp(uc);
  1274. if (p->unicodeVersion == QChar::Unicode_Unassigned || p->unicodeVersion > version) {
  1275. starter = -1; // to prevent starter == pos - 1
  1276. lastCombining = 0;
  1277. ++pos;
  1278. continue;
  1279. }
  1280. int combining = p->combiningClass;
  1281. if (starter == pos - 1 || combining > lastCombining) {
  1282. // allowed to form ligature with S
  1283. QChar ligature = ligatureHelper(s.at(starter).unicode(), uc);
  1284. if (ligature.unicode()) {
  1285. s[starter] = ligature;
  1286. s.remove(pos, 1);
  1287. continue;
  1288. }
  1289. }
  1290. if (!combining)
  1291. starter = pos;
  1292. lastCombining = combining;
  1293. ++pos;
  1294. }
  1295. }
  1296. static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
  1297. {
  1298. QString &s = *str;
  1299. const int l = s.length()-1;
  1300. int pos = from;
  1301. while (pos < l) {
  1302. int p2 = pos+1;
  1303. uint u1 = s.at(pos).unicode();
  1304. if (QChar(u1).isHighSurrogate()) {
  1305. ushort low = s.at(p2).unicode();
  1306. if (QChar(low).isLowSurrogate()) {
  1307. u1 = QChar::surrogateToUcs4(u1, low);
  1308. if (p2 >= l)
  1309. break;
  1310. ++p2;
  1311. }
  1312. }
  1313. uint u2 = s.at(p2).unicode();
  1314. if (QChar(u2).isHighSurrogate() && p2 < l) {
  1315. ushort low = s.at(p2+1).unicode();
  1316. if (QChar(low).isLowSurrogate()) {
  1317. u2 = QChar::surrogateToUcs4(u2, low);
  1318. ++p2;
  1319. }
  1320. }
  1321. ushort c2 = 0;
  1322. {
  1323. const QUnicodeTables::Properties *p = qGetProp(u2);
  1324. if (p->unicodeVersion != QChar::Unicode_Unassigned && p->unicodeVersion <= version)
  1325. c2 = p->combiningClass;
  1326. }
  1327. if (c2 == 0) {
  1328. pos = p2+1;
  1329. continue;
  1330. }
  1331. ushort c1 = 0;
  1332. {
  1333. const QUnicodeTables::Properties *p = qGetProp(u1);
  1334. if (p->unicodeVersion != QChar::Unicode_Unassigned && p->unicodeVersion <= version)
  1335. c1 = p->combiningClass;
  1336. }
  1337. if (c1 > c2) {
  1338. QChar *uc = s.data();
  1339. int p = pos;
  1340. // exchange characters
  1341. if (!QChar::requiresSurrogates(u2)) {
  1342. uc[p++] = u2;
  1343. } else {
  1344. uc[p++] = QChar::highSurrogate(u2);
  1345. uc[p++] = QChar::lowSurrogate(u2);
  1346. }
  1347. if (!QChar::requiresSurrogates(u1)) {
  1348. uc[p++] = u1;
  1349. } else {
  1350. uc[p++] = QChar::highSurrogate(u1);
  1351. uc[p++] = QChar::lowSurrogate(u1);
  1352. }
  1353. if (pos > 0)
  1354. --pos;
  1355. if (pos > 0 && s.at(pos).isLowSurrogate())
  1356. --pos;
  1357. } else {
  1358. ++pos;
  1359. if (QChar::requiresSurrogates(u1))
  1360. ++pos;
  1361. }
  1362. }
  1363. }
  1364. QT_END_NAMESPACE