PageRenderTime 33ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/gcc/ada/a-zchuni.ads

https://bitbucket.org/codefirex/toolchain_gcc-4.9
Ada | 196 lines | 36 code | 19 blank | 141 comment | 0 complexity | b8a62642f07df9ceea1188a5010d412f MD5 | raw file
  1. ------------------------------------------------------------------------------
  2. -- --
  3. -- GNAT RUN-TIME COMPONENTS --
  4. -- --
  5. -- A D A . W I D E _ W I D E _ C H A R A C T E R T S . U N I C O D E --
  6. -- --
  7. -- S p e c --
  8. -- --
  9. -- Copyright (C) 2005-2012, Free Software Foundation, Inc. --
  10. -- --
  11. -- GNAT is free software; you can redistribute it and/or modify it under --
  12. -- terms of the GNU General Public License as published by the Free Soft- --
  13. -- ware Foundation; either version 3, or (at your option) any later ver- --
  14. -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
  15. -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
  16. -- or FITNESS FOR A PARTICULAR PURPOSE. --
  17. -- --
  18. -- As a special exception under Section 7 of GPL version 3, you are granted --
  19. -- additional permissions described in the GCC Runtime Library Exception, --
  20. -- version 3.1, as published by the Free Software Foundation. --
  21. -- --
  22. -- You should have received a copy of the GNU General Public License and --
  23. -- a copy of the GCC Runtime Library Exception along with this program; --
  24. -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
  25. -- <http://www.gnu.org/licenses/>. --
  26. -- --
  27. -- GNAT was originally developed by the GNAT team at New York University. --
  28. -- Extensive contributions were provided by Ada Core Technologies Inc. --
  29. -- --
  30. ------------------------------------------------------------------------------
  31. -- Unicode categorization routines for Wide_Wide_Character
  32. with System.UTF_32;
  33. package Ada.Wide_Wide_Characters.Unicode is
  34. pragma Pure;
  35. -- The following type defines the categories from the unicode definitions.
  36. -- The one addition we make is Fe, which represents the characters FFFE
  37. -- and FFFF in any of the planes.
  38. type Category is new System.UTF_32.Category;
  39. -- Cc Other, Control
  40. -- Cf Other, Format
  41. -- Cn Other, Not Assigned
  42. -- Co Other, Private Use
  43. -- Cs Other, Surrogate
  44. -- Ll Letter, Lowercase
  45. -- Lm Letter, Modifier
  46. -- Lo Letter, Other
  47. -- Lt Letter, Titlecase
  48. -- Lu Letter, Uppercase
  49. -- Mc Mark, Spacing Combining
  50. -- Me Mark, Enclosing
  51. -- Mn Mark, Nonspacing
  52. -- Nd Number, Decimal Digit
  53. -- Nl Number, Letter
  54. -- No Number, Other
  55. -- Pc Punctuation, Connector
  56. -- Pd Punctuation, Dash
  57. -- Pe Punctuation, Close
  58. -- Pf Punctuation, Final quote
  59. -- Pi Punctuation, Initial quote
  60. -- Po Punctuation, Other
  61. -- Ps Punctuation, Open
  62. -- Sc Symbol, Currency
  63. -- Sk Symbol, Modifier
  64. -- Sm Symbol, Math
  65. -- So Symbol, Other
  66. -- Zl Separator, Line
  67. -- Zp Separator, Paragraph
  68. -- Zs Separator, Space
  69. -- Fe relative position FFFE/FFFF in plane
  70. function Get_Category (U : Wide_Wide_Character) return Category;
  71. pragma Inline (Get_Category);
  72. -- Given a Wide_Wide_Character, returns corresponding Category, or Cn if
  73. -- the code does not have an assigned unicode category.
  74. -- The following functions perform category tests corresponding to lexical
  75. -- classes defined in the Ada standard. There are two interfaces for each
  76. -- function. The second takes a Category (e.g. returned by Get_Category).
  77. -- The first takes a Wide_Wide_Character. The form taking the
  78. -- Wide_Wide_Character is typically more efficient than calling
  79. -- Get_Category, but if several different tests are to be performed on the
  80. -- same code, it is more efficient to use Get_Category to get the category,
  81. -- then test the resulting category.
  82. function Is_Letter (U : Wide_Wide_Character) return Boolean;
  83. function Is_Letter (C : Category) return Boolean;
  84. pragma Inline (Is_Letter);
  85. -- Returns true iff U is a letter that can be used to start an identifier,
  86. -- or if C is one of the corresponding categories, which are the following:
  87. -- Letter, Uppercase (Lu)
  88. -- Letter, Lowercase (Ll)
  89. -- Letter, Titlecase (Lt)
  90. -- Letter, Modifier (Lm)
  91. -- Letter, Other (Lo)
  92. -- Number, Letter (Nl)
  93. function Is_Digit (U : Wide_Wide_Character) return Boolean;
  94. function Is_Digit (C : Category) return Boolean;
  95. pragma Inline (Is_Digit);
  96. -- Returns true iff U is a digit that can be used to extend an identifer,
  97. -- or if C is one of the corresponding categories, which are the following:
  98. -- Number, Decimal_Digit (Nd)
  99. function Is_Line_Terminator (U : Wide_Wide_Character) return Boolean;
  100. pragma Inline (Is_Line_Terminator);
  101. -- Returns true iff U is an allowed line terminator for source programs,
  102. -- if U is in the category Zp (Separator, Paragaph), or Zs (Separator,
  103. -- Line), or if U is a conventional line terminator (CR, LF, VT, FF).
  104. -- There is no category version for this function, since the set of
  105. -- characters does not correspond to a set of Unicode categories.
  106. function Is_Mark (U : Wide_Wide_Character) return Boolean;
  107. function Is_Mark (C : Category) return Boolean;
  108. pragma Inline (Is_Mark);
  109. -- Returns true iff U is a mark character which can be used to extend an
  110. -- identifier, or if C is one of the corresponding categories, which are
  111. -- the following:
  112. -- Mark, Non-Spacing (Mn)
  113. -- Mark, Spacing Combining (Mc)
  114. function Is_Other (U : Wide_Wide_Character) return Boolean;
  115. function Is_Other (C : Category) return Boolean;
  116. pragma Inline (Is_Other);
  117. -- Returns true iff U is an other format character, which means that it
  118. -- can be used to extend an identifier, but is ignored for the purposes of
  119. -- matching of identiers, or if C is one of the corresponding categories,
  120. -- which are the following:
  121. -- Other, Format (Cf)
  122. function Is_Punctuation (U : Wide_Wide_Character) return Boolean;
  123. function Is_Punctuation (C : Category) return Boolean;
  124. pragma Inline (Is_Punctuation);
  125. -- Returns true iff U is a punctuation character that can be used to
  126. -- separate pices of an identifier, or if C is one of the corresponding
  127. -- categories, which are the following:
  128. -- Punctuation, Connector (Pc)
  129. function Is_Space (U : Wide_Wide_Character) return Boolean;
  130. function Is_Space (C : Category) return Boolean;
  131. pragma Inline (Is_Space);
  132. -- Returns true iff U is considered a space to be ignored, or if C is one
  133. -- of the corresponding categories, which are the following:
  134. -- Separator, Space (Zs)
  135. function Is_Non_Graphic (U : Wide_Wide_Character) return Boolean;
  136. function Is_Non_Graphic (C : Category) return Boolean;
  137. pragma Inline (Is_Non_Graphic);
  138. -- Returns true iff U is considered to be a non-graphic character, or if C
  139. -- is one of the corresponding categories, which are the following:
  140. -- Other, Control (Cc)
  141. -- Other, Private Use (Co)
  142. -- Other, Surrogate (Cs)
  143. -- Separator, Line (Zl)
  144. -- Separator, Paragraph (Zp)
  145. -- FFFE or FFFF positions in any plane (Fe)
  146. --
  147. -- Note that the Ada category format effector is subsumed by the above
  148. -- list of Unicode categories.
  149. --
  150. -- Note that Other, Unassiged (Cn) is quite deliberately not included
  151. -- in the list of categories above. This means that should any of these
  152. -- code positions be defined in future with graphic characters they will
  153. -- be allowed without a need to change implementations or the standard.
  154. --
  155. -- Note that Other, Format (Cf) is also quite deliberately not included
  156. -- in the list of categories above. This means that these characters can
  157. -- be included in character and string literals.
  158. -- The following function is used to fold to upper case, as required by
  159. -- the Ada 2005 standard rules for identifier case folding. Two
  160. -- identifiers are equivalent if they are identical after folding all
  161. -- letters to upper case using this routine. A fold to lower routine is
  162. -- also provided.
  163. function To_Lower_Case
  164. (U : Wide_Wide_Character) return Wide_Wide_Character;
  165. pragma Inline (To_Lower_Case);
  166. -- If U represents an upper case letter, returns the corresponding lower
  167. -- case letter, otherwise U is returned unchanged. The folding is locale
  168. -- independent as defined by documents referenced in the note in section
  169. -- 1 of ISO/IEC 10646:2003
  170. function To_Upper_Case
  171. (U : Wide_Wide_Character) return Wide_Wide_Character;
  172. pragma Inline (To_Upper_Case);
  173. -- If U represents a lower case letter, returns the corresponding upper
  174. -- case letter, otherwise U is returned unchanged. The folding is locale
  175. -- independent as defined by documents referenced in the note in section
  176. -- 1 of ISO/IEC 10646:2003
  177. end Ada.Wide_Wide_Characters.Unicode;