/Src/Dependencies/Boost/libs/regex/src/wc_regex_traits.cpp

http://hadesmem.googlecode.com/ · C++ · 320 lines · 258 code · 33 blank · 29 comment · 70 complexity · bfd1070aa7dc15815f6cd859dc9d0276 MD5 · raw file

  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE: wc_regex_traits.cpp
  14. * VERSION: see <boost/version.hpp>
  15. * DESCRIPTION: Implements out of line members for c_regex_traits<wchar_t>
  16. */
  17. #define BOOST_REGEX_SOURCE
  18. #include <boost/detail/workaround.hpp>
  19. #include <memory>
  20. #include <string>
  21. #if defined(_DLL_CPPLIB) && !defined(_M_CEE_PURE) && defined(_NATIVE_WCHAR_T_DEFINED) \
  22. && !(defined(__SGI_STL_PORT) || defined(_STLPORT_VERSION) || defined(__STD_RWCOMPILER_H__) || defined(_RWSTD_VER))\
  23. && BOOST_WORKAROUND(BOOST_MSVC, <1600)
  24. //
  25. // This is a horrible workaround, but without declaring these symbols extern we get
  26. // duplicate symbol errors when linking if the application is built without
  27. // /Zc:wchar_t
  28. //
  29. #ifdef _CRTIMP2_PURE
  30. # define BOOST_REGEX_STDLIB_DECL _CRTIMP2_PURE
  31. #else
  32. # define BOOST_REGEX_STDLIB_DECL _CRTIMP2
  33. #endif
  34. namespace std{
  35. #if BOOST_WORKAROUND(BOOST_MSVC, >= 1400)
  36. template class BOOST_REGEX_STDLIB_DECL allocator<unsigned short>;
  37. template class BOOST_REGEX_STDLIB_DECL _String_val<unsigned short, allocator<unsigned short> >;
  38. template class BOOST_REGEX_STDLIB_DECL basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >;
  39. #endif
  40. #if BOOST_WORKAROUND(BOOST_MSVC, > 1300) && BOOST_WORKAROUND(BOOST_MSVC, BOOST_TESTED_AT(1400))
  41. template<> BOOST_REGEX_STDLIB_DECL std::size_t __cdecl char_traits<unsigned short>::length(unsigned short const*);
  42. #endif
  43. template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==(
  44. const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&,
  45. const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&);
  46. template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==(
  47. const unsigned short *,
  48. const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&);
  49. template BOOST_REGEX_STDLIB_DECL bool __cdecl operator==(
  50. const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&,
  51. const unsigned short *);
  52. template BOOST_REGEX_STDLIB_DECL bool __cdecl operator<(
  53. const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&,
  54. const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&);
  55. template BOOST_REGEX_STDLIB_DECL bool __cdecl operator>(
  56. const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&,
  57. const basic_string<unsigned short, char_traits<unsigned short>, allocator<unsigned short> >&);
  58. }
  59. #endif
  60. #include <boost/regex/config.hpp>
  61. #include <boost/detail/workaround.hpp>
  62. #if !BOOST_WORKAROUND(__BORLANDC__, < 0x560)
  63. #include <boost/regex/v4/c_regex_traits.hpp>
  64. #ifndef BOOST_NO_WREGEX
  65. #include <boost/regex/v4/primary_transform.hpp>
  66. #include <boost/regex/v4/regex_traits_defaults.hpp>
  67. #if defined(BOOST_NO_STDC_NAMESPACE)
  68. namespace std{
  69. using ::wcstol;
  70. }
  71. #endif
  72. namespace boost{
  73. c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform(const wchar_t* p1, const wchar_t* p2)
  74. {
  75. std::size_t r;
  76. std::size_t s = 10;
  77. std::wstring src(p1, p2);
  78. std::wstring result(s, L' ');
  79. while(s < (r = std::wcsxfrm(&*result.begin(), src.c_str(), s)))
  80. {
  81. result.append(r - s + 3, L' ');
  82. s = result.size();
  83. }
  84. result.erase(r);
  85. return result;
  86. }
  87. c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::transform_primary(const wchar_t* p1, const wchar_t* p2)
  88. {
  89. static wchar_t s_delim;
  90. static const int s_collate_type = ::boost::re_detail::find_sort_syntax(static_cast<const c_regex_traits<wchar_t>*>(0), &s_delim);
  91. std::wstring result;
  92. //
  93. // What we do here depends upon the format of the sort key returned by
  94. // sort key returned by this->transform:
  95. //
  96. switch(s_collate_type)
  97. {
  98. case ::boost::re_detail::sort_C:
  99. case ::boost::re_detail::sort_unknown:
  100. // the best we can do is translate to lower case, then get a regular sort key:
  101. {
  102. result.assign(p1, p2);
  103. for(std::wstring::size_type i = 0; i < result.size(); ++i)
  104. result[i] = (std::towlower)(result[i]);
  105. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  106. break;
  107. }
  108. case ::boost::re_detail::sort_fixed:
  109. {
  110. // get a regular sort key, and then truncate it:
  111. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  112. result.erase(s_delim);
  113. break;
  114. }
  115. case ::boost::re_detail::sort_delim:
  116. // get a regular sort key, and then truncate everything after the delim:
  117. result = c_regex_traits<wchar_t>::transform(&*result.begin(), &*result.begin() + result.size());
  118. if(result.size() && (result[0] == s_delim))
  119. break;
  120. std::size_t i;
  121. for(i = 0; i < result.size(); ++i)
  122. {
  123. if(result[i] == s_delim)
  124. break;
  125. }
  126. result.erase(i);
  127. break;
  128. }
  129. if(result.empty())
  130. result = std::wstring(1, char(0));
  131. return result;
  132. }
  133. enum
  134. {
  135. char_class_space=1<<0,
  136. char_class_print=1<<1,
  137. char_class_cntrl=1<<2,
  138. char_class_upper=1<<3,
  139. char_class_lower=1<<4,
  140. char_class_alpha=1<<5,
  141. char_class_digit=1<<6,
  142. char_class_punct=1<<7,
  143. char_class_xdigit=1<<8,
  144. char_class_alnum=char_class_alpha|char_class_digit,
  145. char_class_graph=char_class_alnum|char_class_punct,
  146. char_class_blank=1<<9,
  147. char_class_word=1<<10,
  148. char_class_unicode=1<<11,
  149. char_class_horizontal=1<<12,
  150. char_class_vertical=1<<13
  151. };
  152. c_regex_traits<wchar_t>::char_class_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_classname(const wchar_t* p1, const wchar_t* p2)
  153. {
  154. static const char_class_type masks[] =
  155. {
  156. 0,
  157. char_class_alnum,
  158. char_class_alpha,
  159. char_class_blank,
  160. char_class_cntrl,
  161. char_class_digit,
  162. char_class_digit,
  163. char_class_graph,
  164. char_class_horizontal,
  165. char_class_lower,
  166. char_class_lower,
  167. char_class_print,
  168. char_class_punct,
  169. char_class_space,
  170. char_class_space,
  171. char_class_upper,
  172. char_class_unicode,
  173. char_class_upper,
  174. char_class_vertical,
  175. char_class_alnum | char_class_word,
  176. char_class_alnum | char_class_word,
  177. char_class_xdigit,
  178. };
  179. int idx = ::boost::re_detail::get_default_class_id(p1, p2);
  180. if(idx < 0)
  181. {
  182. std::wstring s(p1, p2);
  183. for(std::wstring::size_type i = 0; i < s.size(); ++i)
  184. s[i] = (std::towlower)(s[i]);
  185. idx = ::boost::re_detail::get_default_class_id(&*s.begin(), &*s.begin() + s.size());
  186. }
  187. BOOST_ASSERT(idx+1 < static_cast<int>(sizeof(masks) / sizeof(masks[0])));
  188. return masks[idx+1];
  189. }
  190. bool BOOST_REGEX_CALL c_regex_traits<wchar_t>::isctype(wchar_t c, char_class_type mask)
  191. {
  192. return
  193. ((mask & char_class_space) && (std::iswspace)(c))
  194. || ((mask & char_class_print) && (std::iswprint)(c))
  195. || ((mask & char_class_cntrl) && (std::iswcntrl)(c))
  196. || ((mask & char_class_upper) && (std::iswupper)(c))
  197. || ((mask & char_class_lower) && (std::iswlower)(c))
  198. || ((mask & char_class_alpha) && (std::iswalpha)(c))
  199. || ((mask & char_class_digit) && (std::iswdigit)(c))
  200. || ((mask & char_class_punct) && (std::iswpunct)(c))
  201. || ((mask & char_class_xdigit) && (std::iswxdigit)(c))
  202. || ((mask & char_class_blank) && (std::iswspace)(c) && !::boost::re_detail::is_separator(c))
  203. || ((mask & char_class_word) && (c == '_'))
  204. || ((mask & char_class_unicode) && (c & ~static_cast<wchar_t>(0xff)))
  205. || ((mask & char_class_vertical) && (::boost::re_detail::is_separator(c) || (c == L'\v')))
  206. || ((mask & char_class_horizontal) && (std::iswspace)(c) && !::boost::re_detail::is_separator(c) && (c != L'\v'));
  207. }
  208. c_regex_traits<wchar_t>::string_type BOOST_REGEX_CALL c_regex_traits<wchar_t>::lookup_collatename(const wchar_t* p1, const wchar_t* p2)
  209. {
  210. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
  211. && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\
  212. && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
  213. std::string name(p1, p2);
  214. #else
  215. std::string name;
  216. const wchar_t* p0 = p1;
  217. while(p0 != p2)
  218. name.append(1, char(*p0++));
  219. #endif
  220. name = ::boost::re_detail::lookup_default_collate_name(name);
  221. #if !defined(BOOST_NO_TEMPLATED_ITERATOR_CONSTRUCTORS)\
  222. && !BOOST_WORKAROUND(BOOST_MSVC, < 1300)\
  223. && !BOOST_WORKAROUND(__BORLANDC__, <= 0x0551)
  224. if(name.size())
  225. return string_type(name.begin(), name.end());
  226. #else
  227. if(name.size())
  228. {
  229. string_type result;
  230. typedef std::string::const_iterator iter;
  231. iter b = name.begin();
  232. iter e = name.end();
  233. while(b != e)
  234. result.append(1, wchar_t(*b++));
  235. return result;
  236. }
  237. #endif
  238. if(p2 - p1 == 1)
  239. return string_type(1, *p1);
  240. return string_type();
  241. }
  242. int BOOST_REGEX_CALL c_regex_traits<wchar_t>::value(wchar_t c, int radix)
  243. {
  244. #ifdef __BORLANDC__
  245. // workaround for broken wcstol:
  246. if((std::iswxdigit)(c) == 0)
  247. return -1;
  248. #endif
  249. wchar_t b[2] = { c, '\0', };
  250. wchar_t* ep;
  251. int result = std::wcstol(b, &ep, radix);
  252. if(ep == b)
  253. return -1;
  254. return result;
  255. }
  256. #ifdef BOOST_REGEX_HAS_OTHER_WCHAR_T
  257. c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::transform(const unsigned short* p1, const unsigned short* p2)
  258. {
  259. std::wstring result = c_regex_traits<wchar_t>::transform((const wchar_t*)p1, (const wchar_t*)p2);
  260. return string_type(result.begin(), result.end());
  261. }
  262. c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::transform_primary(const unsigned short* p1, const unsigned short* p2)
  263. {
  264. std::wstring result = c_regex_traits<wchar_t>::transform_primary((const wchar_t*)p1, (const wchar_t*)p2);
  265. return string_type(result.begin(), result.end());
  266. }
  267. c_regex_traits<unsigned short>::char_class_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::lookup_classname(const unsigned short* p1, const unsigned short* p2)
  268. {
  269. return c_regex_traits<wchar_t>::lookup_classname((const wchar_t*)p1, (const wchar_t*)p2);
  270. }
  271. c_regex_traits<unsigned short>::string_type BOOST_REGEX_CALL c_regex_traits<unsigned short>::lookup_collatename(const unsigned short* p1, const unsigned short* p2)
  272. {
  273. std::wstring result = c_regex_traits<wchar_t>::lookup_collatename((const wchar_t*)p1, (const wchar_t*)p2);
  274. return string_type(result.begin(), result.end());
  275. }
  276. bool BOOST_REGEX_CALL c_regex_traits<unsigned short>::isctype(unsigned short c, char_class_type m)
  277. {
  278. return c_regex_traits<wchar_t>::isctype(c, m);
  279. }
  280. int BOOST_REGEX_CALL c_regex_traits<unsigned short>::value(unsigned short c, int radix)
  281. {
  282. return c_regex_traits<wchar_t>::value(c, radix);
  283. }
  284. #endif
  285. }
  286. #endif // BOOST_NO_WREGEX
  287. #endif // __BORLANDC__