/Src/Dependencies/Boost/boost/xpressive/detail/dynamic/parser_traits.hpp

http://hadesmem.googlecode.com/ · C++ Header · 474 lines · 367 code · 50 blank · 57 comment · 85 complexity · 2f0ce60cf5f7158f3206d35ac5aec7cf MD5 · raw file

  1. ///////////////////////////////////////////////////////////////////////////////
  2. // detail/dynamic/parser_traits.hpp
  3. //
  4. // Copyright 2008 Eric Niebler. Distributed under the Boost
  5. // Software License, Version 1.0. (See accompanying file
  6. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. #ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
  8. #define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
  9. // MS compatible compilers support #pragma once
  10. #if defined(_MSC_VER) && (_MSC_VER >= 1020)
  11. # pragma once
  12. #endif
  13. #include <string>
  14. #include <climits>
  15. #include <boost/assert.hpp>
  16. #include <boost/throw_exception.hpp>
  17. #include <boost/xpressive/regex_error.hpp>
  18. #include <boost/xpressive/regex_traits.hpp>
  19. #include <boost/xpressive/detail/detail_fwd.hpp>
  20. #include <boost/xpressive/detail/dynamic/matchable.hpp>
  21. #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
  22. #include <boost/xpressive/detail/utility/literals.hpp>
  23. #include <boost/xpressive/detail/utility/algorithm.hpp>
  24. namespace boost { namespace xpressive
  25. {
  26. ///////////////////////////////////////////////////////////////////////////////
  27. // compiler_traits
  28. // this works for char and wchar_t. it must be specialized for anything else.
  29. //
  30. template<typename RegexTraits>
  31. struct compiler_traits
  32. {
  33. typedef RegexTraits regex_traits;
  34. typedef typename regex_traits::char_type char_type;
  35. typedef typename regex_traits::string_type string_type;
  36. typedef typename regex_traits::locale_type locale_type;
  37. ///////////////////////////////////////////////////////////////////////////////
  38. // constructor
  39. explicit compiler_traits(RegexTraits const &traits = RegexTraits())
  40. : traits_(traits)
  41. , flags_(regex_constants::ECMAScript)
  42. , space_(lookup_classname(traits_, "space"))
  43. , alnum_(lookup_classname(traits_, "alnum"))
  44. {
  45. }
  46. ///////////////////////////////////////////////////////////////////////////////
  47. // flags
  48. regex_constants::syntax_option_type flags() const
  49. {
  50. return this->flags_;
  51. }
  52. ///////////////////////////////////////////////////////////////////////////////
  53. // flags
  54. void flags(regex_constants::syntax_option_type flags)
  55. {
  56. this->flags_ = flags;
  57. }
  58. ///////////////////////////////////////////////////////////////////////////////
  59. // traits
  60. regex_traits &traits()
  61. {
  62. return this->traits_;
  63. }
  64. regex_traits const &traits() const
  65. {
  66. return this->traits_;
  67. }
  68. ///////////////////////////////////////////////////////////////////////////////
  69. // imbue
  70. locale_type imbue(locale_type const &loc)
  71. {
  72. locale_type oldloc = this->traits().imbue(loc);
  73. this->space_ = lookup_classname(this->traits(), "space");
  74. this->alnum_ = lookup_classname(this->traits(), "alnum");
  75. return oldloc;
  76. }
  77. ///////////////////////////////////////////////////////////////////////////////
  78. // getloc
  79. locale_type getloc() const
  80. {
  81. return this->traits().getloc();
  82. }
  83. ///////////////////////////////////////////////////////////////////////////////
  84. // get_token
  85. // get a token and advance the iterator
  86. template<typename FwdIter>
  87. regex_constants::compiler_token_type get_token(FwdIter &begin, FwdIter end)
  88. {
  89. using namespace regex_constants;
  90. if(this->eat_ws_(begin, end) == end)
  91. {
  92. return regex_constants::token_end_of_pattern;
  93. }
  94. switch(*begin)
  95. {
  96. case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end);
  97. case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any;
  98. case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line;
  99. case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line;
  100. case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin;
  101. case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end;
  102. case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate;
  103. case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin;
  104. case BOOST_XPR_CHAR_(char_type, '*'):
  105. case BOOST_XPR_CHAR_(char_type, '+'):
  106. case BOOST_XPR_CHAR_(char_type, '?'):
  107. return token_invalid_quantifier;
  108. case BOOST_XPR_CHAR_(char_type, ']'):
  109. case BOOST_XPR_CHAR_(char_type, '{'):
  110. default:
  111. return token_literal;
  112. }
  113. }
  114. ///////////////////////////////////////////////////////////////////////////////
  115. // get_quant_spec
  116. template<typename FwdIter>
  117. bool get_quant_spec(FwdIter &begin, FwdIter end, detail::quant_spec &spec)
  118. {
  119. using namespace regex_constants;
  120. FwdIter old_begin;
  121. if(this->eat_ws_(begin, end) == end)
  122. {
  123. return false;
  124. }
  125. switch(*begin)
  126. {
  127. case BOOST_XPR_CHAR_(char_type, '*'):
  128. spec.min_ = 0;
  129. spec.max_ = (std::numeric_limits<unsigned int>::max)();
  130. break;
  131. case BOOST_XPR_CHAR_(char_type, '+'):
  132. spec.min_ = 1;
  133. spec.max_ = (std::numeric_limits<unsigned int>::max)();
  134. break;
  135. case BOOST_XPR_CHAR_(char_type, '?'):
  136. spec.min_ = 0;
  137. spec.max_ = 1;
  138. break;
  139. case BOOST_XPR_CHAR_(char_type, '{'):
  140. old_begin = this->eat_ws_(++begin, end);
  141. spec.min_ = spec.max_ = detail::toi(begin, end, this->traits());
  142. BOOST_XPR_ENSURE_
  143. (
  144. begin != old_begin && begin != end, error_brace, "invalid quantifier"
  145. );
  146. if(*begin == BOOST_XPR_CHAR_(char_type, ','))
  147. {
  148. old_begin = this->eat_ws_(++begin, end);
  149. spec.max_ = detail::toi(begin, end, this->traits());
  150. BOOST_XPR_ENSURE_
  151. (
  152. begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin
  153. , error_brace, "invalid quantifier"
  154. );
  155. if(begin == old_begin)
  156. {
  157. spec.max_ = (std::numeric_limits<unsigned int>::max)();
  158. }
  159. else
  160. {
  161. BOOST_XPR_ENSURE_
  162. (
  163. spec.min_ <= spec.max_, error_badbrace, "invalid quantification range"
  164. );
  165. }
  166. }
  167. else
  168. {
  169. BOOST_XPR_ENSURE_
  170. (
  171. BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier"
  172. );
  173. }
  174. break;
  175. default:
  176. return false;
  177. }
  178. spec.greedy_ = true;
  179. if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
  180. {
  181. ++begin;
  182. spec.greedy_ = false;
  183. }
  184. return true;
  185. }
  186. ///////////////////////////////////////////////////////////////////////////
  187. // get_group_type
  188. template<typename FwdIter>
  189. regex_constants::compiler_token_type get_group_type(FwdIter &begin, FwdIter end, string_type &name)
  190. {
  191. using namespace regex_constants;
  192. if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
  193. {
  194. this->eat_ws_(++begin, end);
  195. BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
  196. switch(*begin)
  197. {
  198. case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark;
  199. case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression;
  200. case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment;
  201. case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead;
  202. case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead;
  203. case BOOST_XPR_CHAR_(char_type, 'R'): ++begin; return token_recurse;
  204. case BOOST_XPR_CHAR_(char_type, '$'):
  205. this->get_name_(++begin, end, name);
  206. BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
  207. if(BOOST_XPR_CHAR_(char_type, '=') == *begin)
  208. {
  209. ++begin;
  210. return token_rule_assign;
  211. }
  212. return token_rule_ref;
  213. case BOOST_XPR_CHAR_(char_type, '<'):
  214. this->eat_ws_(++begin, end);
  215. BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
  216. switch(*begin)
  217. {
  218. case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind;
  219. case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind;
  220. default:
  221. BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
  222. }
  223. case BOOST_XPR_CHAR_(char_type, 'P'):
  224. this->eat_ws_(++begin, end);
  225. BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
  226. switch(*begin)
  227. {
  228. case BOOST_XPR_CHAR_(char_type, '<'):
  229. this->get_name_(++begin, end, name);
  230. BOOST_XPR_ENSURE_(begin != end && BOOST_XPR_CHAR_(char_type, '>') == *begin++, error_paren, "incomplete extension");
  231. return token_named_mark;
  232. case BOOST_XPR_CHAR_(char_type, '='):
  233. this->get_name_(++begin, end, name);
  234. BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
  235. return token_named_mark_ref;
  236. default:
  237. BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
  238. }
  239. case BOOST_XPR_CHAR_(char_type, 'i'):
  240. case BOOST_XPR_CHAR_(char_type, 'm'):
  241. case BOOST_XPR_CHAR_(char_type, 's'):
  242. case BOOST_XPR_CHAR_(char_type, 'x'):
  243. case BOOST_XPR_CHAR_(char_type, '-'):
  244. return this->parse_mods_(begin, end);
  245. default:
  246. BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
  247. }
  248. }
  249. return token_literal;
  250. }
  251. //////////////////////////////////////////////////////////////////////////
  252. // get_charset_token
  253. // NOTE: white-space is *never* ignored in a charset.
  254. template<typename FwdIter>
  255. regex_constants::compiler_token_type get_charset_token(FwdIter &begin, FwdIter end)
  256. {
  257. using namespace regex_constants;
  258. BOOST_ASSERT(begin != end);
  259. switch(*begin)
  260. {
  261. case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert;
  262. case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen;
  263. case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
  264. case BOOST_XPR_CHAR_(char_type, '['):
  265. {
  266. FwdIter next = begin; ++next;
  267. if(next != end)
  268. {
  269. BOOST_XPR_ENSURE_(
  270. *next != BOOST_XPR_CHAR_(char_type, '=')
  271. , error_collate
  272. , "equivalence classes are not yet supported"
  273. );
  274. BOOST_XPR_ENSURE_(
  275. *next != BOOST_XPR_CHAR_(char_type, '.')
  276. , error_collate
  277. , "collation sequences are not yet supported"
  278. );
  279. if(*next == BOOST_XPR_CHAR_(char_type, ':'))
  280. {
  281. begin = ++next;
  282. return token_posix_charset_begin;
  283. }
  284. }
  285. }
  286. break;
  287. case BOOST_XPR_CHAR_(char_type, ':'):
  288. {
  289. FwdIter next = begin; ++next;
  290. if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']'))
  291. {
  292. begin = ++next;
  293. return token_posix_charset_end;
  294. }
  295. }
  296. break;
  297. case BOOST_XPR_CHAR_(char_type, '\\'):
  298. if(++begin != end)
  299. {
  300. switch(*begin)
  301. {
  302. case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace;
  303. default:;
  304. }
  305. }
  306. return token_escape;
  307. default:;
  308. }
  309. return token_literal;
  310. }
  311. //////////////////////////////////////////////////////////////////////////
  312. // get_escape_token
  313. template<typename FwdIter>
  314. regex_constants::compiler_token_type get_escape_token(FwdIter &begin, FwdIter end)
  315. {
  316. using namespace regex_constants;
  317. if(begin != end)
  318. {
  319. switch(*begin)
  320. {
  321. //case BOOST_XPR_CHAR_(char_type, 'a'): ++begin; return token_escape_bell;
  322. //case BOOST_XPR_CHAR_(char_type, 'c'): ++begin; return token_escape_control;
  323. //case BOOST_XPR_CHAR_(char_type, 'e'): ++begin; return token_escape_escape;
  324. //case BOOST_XPR_CHAR_(char_type, 'f'): ++begin; return token_escape_formfeed;
  325. //case BOOST_XPR_CHAR_(char_type, 'n'): ++begin; return token_escape_newline;
  326. //case BOOST_XPR_CHAR_(char_type, 't'): ++begin; return token_escape_horizontal_tab;
  327. //case BOOST_XPR_CHAR_(char_type, 'v'): ++begin; return token_escape_vertical_tab;
  328. case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence;
  329. case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary;
  330. case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary;
  331. case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end;
  332. case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin;
  333. case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence;
  334. // Non-standard extension to ECMAScript syntax
  335. case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin;
  336. case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end;
  337. default:; // fall-through
  338. }
  339. }
  340. return token_escape;
  341. }
  342. private:
  343. //////////////////////////////////////////////////////////////////////////
  344. // parse_mods_
  345. template<typename FwdIter>
  346. regex_constants::compiler_token_type parse_mods_(FwdIter &begin, FwdIter end)
  347. {
  348. using namespace regex_constants;
  349. bool set = true;
  350. do switch(*begin)
  351. {
  352. case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break;
  353. case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break;
  354. case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break;
  355. case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break;
  356. case BOOST_XPR_CHAR_(char_type, ':'): ++begin; // fall-through
  357. case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark;
  358. case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; // else fall-through
  359. default: BOOST_THROW_EXCEPTION(regex_error(error_paren, "unknown pattern modifier"));
  360. }
  361. while(BOOST_XPR_ENSURE_(++begin != end, error_paren, "incomplete extension"));
  362. // this return is technically unreachable, but this must
  363. // be here to work around a bug in gcc 4.0
  364. return token_no_mark;
  365. }
  366. ///////////////////////////////////////////////////////////////////////////////
  367. // flag_
  368. void flag_(bool set, regex_constants::syntax_option_type flag)
  369. {
  370. this->flags_ = set ? (this->flags_ | flag) : (this->flags_ & ~flag);
  371. }
  372. ///////////////////////////////////////////////////////////////////////////
  373. // is_space_
  374. bool is_space_(char_type ch) const
  375. {
  376. return 0 != this->space_ && this->traits().isctype(ch, this->space_);
  377. }
  378. ///////////////////////////////////////////////////////////////////////////
  379. // is_alnum_
  380. bool is_alnum_(char_type ch) const
  381. {
  382. return 0 != this->alnum_ && this->traits().isctype(ch, this->alnum_);
  383. }
  384. ///////////////////////////////////////////////////////////////////////////
  385. // get_name_
  386. template<typename FwdIter>
  387. void get_name_(FwdIter &begin, FwdIter end, string_type &name)
  388. {
  389. this->eat_ws_(begin, end);
  390. for(name.clear(); begin != end && this->is_alnum_(*begin); ++begin)
  391. {
  392. name.push_back(*begin);
  393. }
  394. this->eat_ws_(begin, end);
  395. BOOST_XPR_ENSURE_(!name.empty(), regex_constants::error_paren, "incomplete extension");
  396. }
  397. ///////////////////////////////////////////////////////////////////////////////
  398. // eat_ws_
  399. template<typename FwdIter>
  400. FwdIter &eat_ws_(FwdIter &begin, FwdIter end)
  401. {
  402. if(0 != (regex_constants::ignore_white_space & this->flags()))
  403. {
  404. while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == *begin || this->is_space_(*begin)))
  405. {
  406. if(BOOST_XPR_CHAR_(char_type, '#') == *begin++)
  407. {
  408. while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {}
  409. }
  410. else
  411. {
  412. for(; end != begin && this->is_space_(*begin); ++begin) {}
  413. }
  414. }
  415. }
  416. return begin;
  417. }
  418. regex_traits traits_;
  419. regex_constants::syntax_option_type flags_;
  420. typename regex_traits::char_class_type space_;
  421. typename regex_traits::char_class_type alnum_;
  422. };
  423. }} // namespace boost::xpressive
  424. #endif