/src/contrib/boost/spirit/home/support/char_encoding/ascii.hpp

http://pythonocc.googlecode.com/ · C++ Header · 313 lines · 260 code · 28 blank · 25 comment · 7 complexity · 022747837c5ad4b1195b345eec09f1db MD5 · raw file

  1. /*=============================================================================
  2. Copyright (c) 2001-2010 Hartmut Kaiser
  3. Copyright (c) 2001-2010 Joel de Guzman
  4. Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. =============================================================================*/
  7. #if !defined(BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM)
  8. #define BOOST_SPIRIT_ASCII_APRIL_26_2006_1106PM
  9. #if defined(_MSC_VER)
  10. #pragma once
  11. #endif
  12. #include <climits>
  13. #include <boost/assert.hpp>
  14. #include <boost/cstdint.hpp>
  15. ///////////////////////////////////////////////////////////////////////////////
  16. // constants used to classify the single characters
  17. ///////////////////////////////////////////////////////////////////////////////
  18. #define BOOST_CC_DIGIT 0x0001
  19. #define BOOST_CC_XDIGIT 0x0002
  20. #define BOOST_CC_ALPHA 0x0004
  21. #define BOOST_CC_CTRL 0x0008
  22. #define BOOST_CC_LOWER 0x0010
  23. #define BOOST_CC_UPPER 0x0020
  24. #define BOOST_CC_SPACE 0x0040
  25. #define BOOST_CC_PUNCT 0x0080
  26. namespace boost { namespace spirit { namespace char_encoding
  27. {
  28. // The detection of isgraph(), isprint() and isblank() is done programmatically
  29. // to keep the character type table small. Additionally, these functions are
  30. // rather seldom used and the programmatic detection is very simple.
  31. ///////////////////////////////////////////////////////////////////////////
  32. // ASCII character classification table
  33. ///////////////////////////////////////////////////////////////////////////
  34. const unsigned char ascii_char_types[] =
  35. {
  36. /* NUL 0 0 */ BOOST_CC_CTRL,
  37. /* SOH 1 1 */ BOOST_CC_CTRL,
  38. /* STX 2 2 */ BOOST_CC_CTRL,
  39. /* ETX 3 3 */ BOOST_CC_CTRL,
  40. /* EOT 4 4 */ BOOST_CC_CTRL,
  41. /* ENQ 5 5 */ BOOST_CC_CTRL,
  42. /* ACK 6 6 */ BOOST_CC_CTRL,
  43. /* BEL 7 7 */ BOOST_CC_CTRL,
  44. /* BS 8 8 */ BOOST_CC_CTRL,
  45. /* HT 9 9 */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  46. /* NL 10 a */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  47. /* VT 11 b */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  48. /* NP 12 c */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  49. /* CR 13 d */ BOOST_CC_CTRL|BOOST_CC_SPACE,
  50. /* SO 14 e */ BOOST_CC_CTRL,
  51. /* SI 15 f */ BOOST_CC_CTRL,
  52. /* DLE 16 10 */ BOOST_CC_CTRL,
  53. /* DC1 17 11 */ BOOST_CC_CTRL,
  54. /* DC2 18 12 */ BOOST_CC_CTRL,
  55. /* DC3 19 13 */ BOOST_CC_CTRL,
  56. /* DC4 20 14 */ BOOST_CC_CTRL,
  57. /* NAK 21 15 */ BOOST_CC_CTRL,
  58. /* SYN 22 16 */ BOOST_CC_CTRL,
  59. /* ETB 23 17 */ BOOST_CC_CTRL,
  60. /* CAN 24 18 */ BOOST_CC_CTRL,
  61. /* EM 25 19 */ BOOST_CC_CTRL,
  62. /* SUB 26 1a */ BOOST_CC_CTRL,
  63. /* ESC 27 1b */ BOOST_CC_CTRL,
  64. /* FS 28 1c */ BOOST_CC_CTRL,
  65. /* GS 29 1d */ BOOST_CC_CTRL,
  66. /* RS 30 1e */ BOOST_CC_CTRL,
  67. /* US 31 1f */ BOOST_CC_CTRL,
  68. /* SP 32 20 */ BOOST_CC_SPACE,
  69. /* ! 33 21 */ BOOST_CC_PUNCT,
  70. /* " 34 22 */ BOOST_CC_PUNCT,
  71. /* # 35 23 */ BOOST_CC_PUNCT,
  72. /* $ 36 24 */ BOOST_CC_PUNCT,
  73. /* % 37 25 */ BOOST_CC_PUNCT,
  74. /* & 38 26 */ BOOST_CC_PUNCT,
  75. /* ' 39 27 */ BOOST_CC_PUNCT,
  76. /* ( 40 28 */ BOOST_CC_PUNCT,
  77. /* ) 41 29 */ BOOST_CC_PUNCT,
  78. /* * 42 2a */ BOOST_CC_PUNCT,
  79. /* + 43 2b */ BOOST_CC_PUNCT,
  80. /* , 44 2c */ BOOST_CC_PUNCT,
  81. /* - 45 2d */ BOOST_CC_PUNCT,
  82. /* . 46 2e */ BOOST_CC_PUNCT,
  83. /* / 47 2f */ BOOST_CC_PUNCT,
  84. /* 0 48 30 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  85. /* 1 49 31 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  86. /* 2 50 32 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  87. /* 3 51 33 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  88. /* 4 52 34 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  89. /* 5 53 35 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  90. /* 6 54 36 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  91. /* 7 55 37 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  92. /* 8 56 38 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  93. /* 9 57 39 */ BOOST_CC_DIGIT|BOOST_CC_XDIGIT,
  94. /* : 58 3a */ BOOST_CC_PUNCT,
  95. /* ; 59 3b */ BOOST_CC_PUNCT,
  96. /* < 60 3c */ BOOST_CC_PUNCT,
  97. /* = 61 3d */ BOOST_CC_PUNCT,
  98. /* > 62 3e */ BOOST_CC_PUNCT,
  99. /* ? 63 3f */ BOOST_CC_PUNCT,
  100. /* @ 64 40 */ BOOST_CC_PUNCT,
  101. /* A 65 41 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  102. /* B 66 42 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  103. /* C 67 43 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  104. /* D 68 44 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  105. /* E 69 45 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  106. /* F 70 46 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_UPPER,
  107. /* G 71 47 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  108. /* H 72 48 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  109. /* I 73 49 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  110. /* J 74 4a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  111. /* K 75 4b */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  112. /* L 76 4c */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  113. /* M 77 4d */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  114. /* N 78 4e */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  115. /* O 79 4f */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  116. /* P 80 50 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  117. /* Q 81 51 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  118. /* R 82 52 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  119. /* S 83 53 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  120. /* T 84 54 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  121. /* U 85 55 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  122. /* V 86 56 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  123. /* W 87 57 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  124. /* X 88 58 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  125. /* Y 89 59 */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  126. /* Z 90 5a */ BOOST_CC_ALPHA|BOOST_CC_UPPER,
  127. /* [ 91 5b */ BOOST_CC_PUNCT,
  128. /* \ 92 5c */ BOOST_CC_PUNCT,
  129. /* ] 93 5d */ BOOST_CC_PUNCT,
  130. /* ^ 94 5e */ BOOST_CC_PUNCT,
  131. /* _ 95 5f */ BOOST_CC_PUNCT,
  132. /* ` 96 60 */ BOOST_CC_PUNCT,
  133. /* a 97 61 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  134. /* b 98 62 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  135. /* c 99 63 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  136. /* d 100 64 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  137. /* e 101 65 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  138. /* f 102 66 */ BOOST_CC_ALPHA|BOOST_CC_XDIGIT|BOOST_CC_LOWER,
  139. /* g 103 67 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  140. /* h 104 68 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  141. /* i 105 69 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  142. /* j 106 6a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  143. /* k 107 6b */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  144. /* l 108 6c */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  145. /* m 109 6d */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  146. /* n 110 6e */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  147. /* o 111 6f */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  148. /* p 112 70 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  149. /* q 113 71 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  150. /* r 114 72 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  151. /* s 115 73 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  152. /* t 116 74 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  153. /* u 117 75 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  154. /* v 118 76 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  155. /* w 119 77 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  156. /* x 120 78 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  157. /* y 121 79 */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  158. /* z 122 7a */ BOOST_CC_ALPHA|BOOST_CC_LOWER,
  159. /* { 123 7b */ BOOST_CC_PUNCT,
  160. /* | 124 7c */ BOOST_CC_PUNCT,
  161. /* } 125 7d */ BOOST_CC_PUNCT,
  162. /* ~ 126 7e */ BOOST_CC_PUNCT,
  163. /* DEL 127 7f */ BOOST_CC_CTRL,
  164. };
  165. ///////////////////////////////////////////////////////////////////////////
  166. // Test characters for specified conditions (using ASCII)
  167. ///////////////////////////////////////////////////////////////////////////
  168. struct ascii
  169. {
  170. typedef char char_type;
  171. static bool
  172. isascii_(int ch)
  173. {
  174. return 0 == (ch & ~0x7f);
  175. }
  176. static bool
  177. ischar(int ch)
  178. {
  179. return isascii_(ch);
  180. }
  181. static int
  182. isalnum(int ch)
  183. {
  184. BOOST_ASSERT(isascii_(ch));
  185. return (ascii_char_types[ch] & BOOST_CC_ALPHA)
  186. || (ascii_char_types[ch] & BOOST_CC_DIGIT);
  187. }
  188. static int
  189. isalpha(int ch)
  190. {
  191. BOOST_ASSERT(isascii_(ch));
  192. return (ascii_char_types[ch] & BOOST_CC_ALPHA);
  193. }
  194. static int
  195. isdigit(int ch)
  196. {
  197. BOOST_ASSERT(isascii_(ch));
  198. return (ascii_char_types[ch] & BOOST_CC_DIGIT);
  199. }
  200. static int
  201. isxdigit(int ch)
  202. {
  203. BOOST_ASSERT(isascii_(ch));
  204. return (ascii_char_types[ch] & BOOST_CC_XDIGIT);
  205. }
  206. static int
  207. iscntrl(int ch)
  208. {
  209. BOOST_ASSERT(isascii_(ch));
  210. return (ascii_char_types[ch] & BOOST_CC_CTRL);
  211. }
  212. static int
  213. isgraph(int ch)
  214. {
  215. return ('\x21' <= ch && ch <= '\x7e');
  216. }
  217. static int
  218. islower(int ch)
  219. {
  220. BOOST_ASSERT(isascii_(ch));
  221. return (ascii_char_types[ch] & BOOST_CC_LOWER);
  222. }
  223. static int
  224. isprint(int ch)
  225. {
  226. return ('\x20' <= ch && ch <= '\x7e');
  227. }
  228. static int
  229. ispunct(int ch)
  230. {
  231. BOOST_ASSERT(isascii_(ch));
  232. return (ascii_char_types[ch] & BOOST_CC_PUNCT);
  233. }
  234. static int
  235. isspace(int ch)
  236. {
  237. BOOST_ASSERT(isascii_(ch));
  238. return (ascii_char_types[ch] & BOOST_CC_SPACE);
  239. }
  240. static int
  241. isblank BOOST_PREVENT_MACRO_SUBSTITUTION (int ch)
  242. {
  243. return ('\x09' == ch || '\x20' == ch);
  244. }
  245. static int
  246. isupper(int ch)
  247. {
  248. BOOST_ASSERT(isascii_(ch));
  249. return (ascii_char_types[ch] & BOOST_CC_UPPER);
  250. }
  251. ///////////////////////////////////////////////////////////////////////
  252. // Simple character conversions
  253. ///////////////////////////////////////////////////////////////////////
  254. static int
  255. tolower(int ch)
  256. {
  257. BOOST_ASSERT(isascii_(ch));
  258. return isupper(ch) ? (ch - 'A' + 'a') : ch;
  259. }
  260. static int
  261. toupper(int ch)
  262. {
  263. BOOST_ASSERT(isascii_(ch));
  264. return islower(ch) ? (ch - 'a' + 'A') : ch;
  265. }
  266. static ::boost::uint32_t
  267. toucs4(int ch)
  268. {
  269. return ch;
  270. }
  271. };
  272. }}}
  273. ///////////////////////////////////////////////////////////////////////////////
  274. // undefine macros
  275. ///////////////////////////////////////////////////////////////////////////////
  276. #undef BOOST_CC_DIGIT
  277. #undef BOOST_CC_XDIGIT
  278. #undef BOOST_CC_ALPHA
  279. #undef BOOST_CC_CTRL
  280. #undef BOOST_CC_LOWER
  281. #undef BOOST_CC_UPPER
  282. #undef BOOST_CC_PUNCT
  283. #undef BOOST_CC_SPACE
  284. #endif