/Src/Dependencies/Boost/boost/xpressive/regex_compiler.hpp

http://hadesmem.googlecode.com/ · C++ Header · 744 lines · 522 code · 93 blank · 129 comment · 80 complexity · c58e393e81e5decf3e2526df30023611 MD5 · raw file

  1. ///////////////////////////////////////////////////////////////////////////////
  2. /// \file regex_compiler.hpp
  3. /// Contains the definition of regex_compiler, a factory for building regex objects
  4. /// from strings.
  5. //
  6. // Copyright 2008 Eric Niebler. Distributed under the Boost
  7. // Software License, Version 1.0. (See accompanying file
  8. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. #ifndef BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
  10. #define BOOST_XPRESSIVE_REGEX_COMPILER_HPP_EAN_10_04_2005
  11. // MS compatible compilers support #pragma once
  12. #if defined(_MSC_VER) && (_MSC_VER >= 1020)
  13. # pragma once
  14. #endif
  15. #include <map>
  16. #include <boost/assert.hpp>
  17. #include <boost/next_prior.hpp>
  18. #include <boost/range/begin.hpp>
  19. #include <boost/range/end.hpp>
  20. #include <boost/mpl/assert.hpp>
  21. #include <boost/throw_exception.hpp>
  22. #include <boost/type_traits/is_same.hpp>
  23. #include <boost/type_traits/is_pointer.hpp>
  24. #include <boost/utility/enable_if.hpp>
  25. #include <boost/iterator/iterator_traits.hpp>
  26. #include <boost/xpressive/basic_regex.hpp>
  27. #include <boost/xpressive/detail/dynamic/parser.hpp>
  28. #include <boost/xpressive/detail/dynamic/parse_charset.hpp>
  29. #include <boost/xpressive/detail/dynamic/parser_enum.hpp>
  30. #include <boost/xpressive/detail/dynamic/parser_traits.hpp>
  31. #include <boost/xpressive/detail/core/linker.hpp>
  32. #include <boost/xpressive/detail/core/optimize.hpp>
  33. namespace boost { namespace xpressive
  34. {
  35. ///////////////////////////////////////////////////////////////////////////////
  36. // regex_compiler
  37. //
  38. /// \brief Class template regex_compiler is a factory for building basic_regex objects from a string.
  39. ///
  40. /// Class template regex_compiler is used to construct a basic_regex object from a string. The string
  41. /// should contain a valid regular expression. You can imbue a regex_compiler object with a locale,
  42. /// after which all basic_regex objects created with that regex_compiler object will use that locale.
  43. /// After creating a regex_compiler object, and optionally imbueing it with a locale, you can call the
  44. /// compile() method to construct a basic_regex object, passing it the string representing the regular
  45. /// expression. You can call compile() multiple times on the same regex_compiler object. Two basic_regex
  46. /// objects compiled from the same string will have different regex_id's.
  47. template<typename BidiIter, typename RegexTraits, typename CompilerTraits>
  48. struct regex_compiler
  49. {
  50. typedef BidiIter iterator_type;
  51. typedef typename iterator_value<BidiIter>::type char_type;
  52. typedef regex_constants::syntax_option_type flag_type;
  53. typedef RegexTraits traits_type;
  54. typedef typename traits_type::string_type string_type;
  55. typedef typename traits_type::locale_type locale_type;
  56. typedef typename traits_type::char_class_type char_class_type;
  57. explicit regex_compiler(RegexTraits const &traits = RegexTraits())
  58. : mark_count_(0)
  59. , hidden_mark_count_(0)
  60. , traits_(traits)
  61. , upper_(0)
  62. , self_()
  63. , rules_()
  64. {
  65. this->upper_ = lookup_classname(this->rxtraits(), "upper");
  66. }
  67. ///////////////////////////////////////////////////////////////////////////
  68. // imbue
  69. /// Specify the locale to be used by a regex_compiler.
  70. ///
  71. /// \param loc The locale that this regex_compiler should use.
  72. /// \return The previous locale.
  73. locale_type imbue(locale_type loc)
  74. {
  75. locale_type oldloc = this->traits_.imbue(loc);
  76. this->upper_ = lookup_classname(this->rxtraits(), "upper");
  77. return oldloc;
  78. }
  79. ///////////////////////////////////////////////////////////////////////////
  80. // getloc
  81. /// Get the locale used by a regex_compiler.
  82. ///
  83. /// \return The locale used by this regex_compiler.
  84. locale_type getloc() const
  85. {
  86. return this->traits_.getloc();
  87. }
  88. ///////////////////////////////////////////////////////////////////////////
  89. // compile
  90. /// Builds a basic_regex object from a range of characters.
  91. ///
  92. /// \param begin The beginning of a range of characters representing the
  93. /// regular expression to compile.
  94. /// \param end The end of a range of characters representing the
  95. /// regular expression to compile.
  96. /// \param flags Optional bitmask that determines how the pat string is
  97. /// interpreted. (See syntax_option_type.)
  98. /// \return A basic_regex object corresponding to the regular expression
  99. /// represented by the character range.
  100. /// \pre InputIter is a model of the InputIterator concept.
  101. /// \pre [begin,end) is a valid range.
  102. /// \pre The range of characters specified by [begin,end) contains a
  103. /// valid string-based representation of a regular expression.
  104. /// \throw regex_error when the range of characters has invalid regular
  105. /// expression syntax.
  106. template<typename InputIter>
  107. basic_regex<BidiIter>
  108. compile(InputIter begin, InputIter end, flag_type flags = regex_constants::ECMAScript)
  109. {
  110. typedef typename iterator_category<InputIter>::type category;
  111. return this->compile_(begin, end, flags, category());
  112. }
  113. /// \overload
  114. ///
  115. template<typename InputRange>
  116. typename disable_if<is_pointer<InputRange>, basic_regex<BidiIter> >::type
  117. compile(InputRange const &pat, flag_type flags = regex_constants::ECMAScript)
  118. {
  119. return this->compile(boost::begin(pat), boost::end(pat), flags);
  120. }
  121. /// \overload
  122. ///
  123. basic_regex<BidiIter>
  124. compile(char_type const *begin, flag_type flags = regex_constants::ECMAScript)
  125. {
  126. BOOST_ASSERT(0 != begin);
  127. char_type const *end = begin + std::char_traits<char_type>::length(begin);
  128. return this->compile(begin, end, flags);
  129. }
  130. /// \overload
  131. ///
  132. basic_regex<BidiIter> compile(char_type const *begin, std::size_t size, flag_type flags)
  133. {
  134. BOOST_ASSERT(0 != begin);
  135. char_type const *end = begin + size;
  136. return this->compile(begin, end, flags);
  137. }
  138. ///////////////////////////////////////////////////////////////////////////
  139. // operator[]
  140. /// Return a reference to the named regular expression. If no such named
  141. /// regular expression exists, create a new regular expression and return
  142. /// a reference to it.
  143. ///
  144. /// \param name A std::string containing the name of the regular expression.
  145. /// \pre The string is not empty.
  146. /// \throw bad_alloc on allocation failure.
  147. basic_regex<BidiIter> &operator [](string_type const &name)
  148. {
  149. BOOST_ASSERT(!name.empty());
  150. return this->rules_[name];
  151. }
  152. /// \overload
  153. ///
  154. basic_regex<BidiIter> const &operator [](string_type const &name) const
  155. {
  156. BOOST_ASSERT(!name.empty());
  157. return this->rules_[name];
  158. }
  159. private:
  160. typedef detail::escape_value<char_type, char_class_type> escape_value;
  161. typedef detail::alternate_matcher<detail::alternates_vector<BidiIter>, RegexTraits> alternate_matcher;
  162. ///////////////////////////////////////////////////////////////////////////
  163. // compile_
  164. /// INTERNAL ONLY
  165. template<typename FwdIter>
  166. basic_regex<BidiIter> compile_(FwdIter begin, FwdIter end, flag_type flags, std::forward_iterator_tag)
  167. {
  168. BOOST_MPL_ASSERT((is_same<char_type, typename iterator_value<FwdIter>::type>));
  169. using namespace regex_constants;
  170. this->reset();
  171. this->traits_.flags(flags);
  172. basic_regex<BidiIter> rextmp, *prex = &rextmp;
  173. FwdIter tmp = begin;
  174. // Check if this regex is a named rule:
  175. string_type name;
  176. if(token_group_begin == this->traits_.get_token(tmp, end) &&
  177. BOOST_XPR_ENSURE_(tmp != end, error_paren, "mismatched parenthesis") &&
  178. token_rule_assign == this->traits_.get_group_type(tmp, end, name))
  179. {
  180. begin = tmp;
  181. BOOST_XPR_ENSURE_
  182. (
  183. begin != end && token_group_end == this->traits_.get_token(begin, end)
  184. , error_paren
  185. , "mismatched parenthesis"
  186. );
  187. prex = &this->rules_[name];
  188. }
  189. this->self_ = detail::core_access<BidiIter>::get_regex_impl(*prex);
  190. // at the top level, a regex is a sequence of alternates
  191. detail::sequence<BidiIter> seq = this->parse_alternates(begin, end);
  192. BOOST_XPR_ENSURE_(begin == end, error_paren, "mismatched parenthesis");
  193. // terminate the sequence
  194. seq += detail::make_dynamic<BidiIter>(detail::end_matcher());
  195. // bundle the regex information into a regex_impl object
  196. detail::common_compile(seq.xpr().matchable(), *this->self_, this->rxtraits());
  197. this->self_->traits_ = new detail::traits_holder<RegexTraits>(this->rxtraits());
  198. this->self_->mark_count_ = this->mark_count_;
  199. this->self_->hidden_mark_count_ = this->hidden_mark_count_;
  200. // References changed, update dependencies.
  201. this->self_->tracking_update();
  202. this->self_.reset();
  203. return *prex;
  204. }
  205. ///////////////////////////////////////////////////////////////////////////
  206. // compile_
  207. /// INTERNAL ONLY
  208. template<typename InputIter>
  209. basic_regex<BidiIter> compile_(InputIter begin, InputIter end, flag_type flags, std::input_iterator_tag)
  210. {
  211. string_type pat(begin, end);
  212. return this->compile_(boost::begin(pat), boost::end(pat), flags, std::forward_iterator_tag());
  213. }
  214. ///////////////////////////////////////////////////////////////////////////
  215. // reset
  216. /// INTERNAL ONLY
  217. void reset()
  218. {
  219. this->mark_count_ = 0;
  220. this->hidden_mark_count_ = 0;
  221. this->traits_.flags(regex_constants::ECMAScript);
  222. }
  223. ///////////////////////////////////////////////////////////////////////////
  224. // regex_traits
  225. /// INTERNAL ONLY
  226. traits_type &rxtraits()
  227. {
  228. return this->traits_.traits();
  229. }
  230. ///////////////////////////////////////////////////////////////////////////
  231. // regex_traits
  232. /// INTERNAL ONLY
  233. traits_type const &rxtraits() const
  234. {
  235. return this->traits_.traits();
  236. }
  237. ///////////////////////////////////////////////////////////////////////////
  238. // parse_alternates
  239. /// INTERNAL ONLY
  240. template<typename FwdIter>
  241. detail::sequence<BidiIter> parse_alternates(FwdIter &begin, FwdIter end)
  242. {
  243. using namespace regex_constants;
  244. int count = 0;
  245. FwdIter tmp = begin;
  246. detail::sequence<BidiIter> seq;
  247. do switch(++count)
  248. {
  249. case 1:
  250. seq = this->parse_sequence(tmp, end);
  251. break;
  252. case 2:
  253. seq = detail::make_dynamic<BidiIter>(alternate_matcher()) | seq;
  254. // fall-through
  255. default:
  256. seq |= this->parse_sequence(tmp, end);
  257. }
  258. while((begin = tmp) != end && token_alternate == this->traits_.get_token(tmp, end));
  259. return seq;
  260. }
  261. ///////////////////////////////////////////////////////////////////////////
  262. // parse_group
  263. /// INTERNAL ONLY
  264. template<typename FwdIter>
  265. detail::sequence<BidiIter> parse_group(FwdIter &begin, FwdIter end)
  266. {
  267. using namespace regex_constants;
  268. int mark_nbr = 0;
  269. bool keeper = false;
  270. bool lookahead = false;
  271. bool lookbehind = false;
  272. bool negative = false;
  273. string_type name;
  274. detail::sequence<BidiIter> seq, seq_end;
  275. FwdIter tmp = FwdIter();
  276. syntax_option_type old_flags = this->traits_.flags();
  277. switch(this->traits_.get_group_type(begin, end, name))
  278. {
  279. case token_no_mark:
  280. // Don't process empty groups like (?:) or (?i)
  281. // BUGBUG this doesn't handle the degenerate (?:)+ correctly
  282. if(token_group_end == this->traits_.get_token(tmp = begin, end))
  283. {
  284. return this->parse_atom(begin = tmp, end);
  285. }
  286. break;
  287. case token_negative_lookahead:
  288. negative = true; // fall-through
  289. case token_positive_lookahead:
  290. lookahead = true;
  291. break;
  292. case token_negative_lookbehind:
  293. negative = true; // fall-through
  294. case token_positive_lookbehind:
  295. lookbehind = true;
  296. break;
  297. case token_independent_sub_expression:
  298. keeper = true;
  299. break;
  300. case token_comment:
  301. while(BOOST_XPR_ENSURE_(begin != end, error_paren, "mismatched parenthesis"))
  302. {
  303. switch(this->traits_.get_token(begin, end))
  304. {
  305. case token_group_end: return this->parse_atom(begin, end);
  306. case token_escape: BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
  307. case token_literal: ++begin;
  308. default:;
  309. }
  310. }
  311. break;
  312. case token_recurse:
  313. BOOST_XPR_ENSURE_
  314. (
  315. begin != end && token_group_end == this->traits_.get_token(begin, end)
  316. , error_paren
  317. , "mismatched parenthesis"
  318. );
  319. return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(this->self_));
  320. case token_rule_assign:
  321. BOOST_THROW_EXCEPTION(
  322. regex_error(error_badrule, "rule assignments must be at the front of the regex")
  323. );
  324. break;
  325. case token_rule_ref:
  326. {
  327. typedef detail::core_access<BidiIter> access;
  328. BOOST_XPR_ENSURE_
  329. (
  330. begin != end && token_group_end == this->traits_.get_token(begin, end)
  331. , error_paren
  332. , "mismatched parenthesis"
  333. );
  334. basic_regex<BidiIter> &rex = this->rules_[name];
  335. shared_ptr<detail::regex_impl<BidiIter> > impl = access::get_regex_impl(rex);
  336. this->self_->track_reference(*impl);
  337. return detail::make_dynamic<BidiIter>(detail::regex_byref_matcher<BidiIter>(impl));
  338. }
  339. case token_named_mark:
  340. mark_nbr = static_cast<int>(++this->mark_count_);
  341. for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
  342. {
  343. BOOST_XPR_ENSURE_(this->self_->named_marks_[i].name_ != name, error_badmark, "named mark already exists");
  344. }
  345. this->self_->named_marks_.push_back(detail::named_mark<char_type>(name, this->mark_count_));
  346. seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
  347. seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
  348. break;
  349. case token_named_mark_ref:
  350. BOOST_XPR_ENSURE_
  351. (
  352. begin != end && token_group_end == this->traits_.get_token(begin, end)
  353. , error_paren
  354. , "mismatched parenthesis"
  355. );
  356. for(std::size_t i = 0; i < this->self_->named_marks_.size(); ++i)
  357. {
  358. if(this->self_->named_marks_[i].name_ == name)
  359. {
  360. mark_nbr = static_cast<int>(this->self_->named_marks_[i].mark_nbr_);
  361. return detail::make_backref_xpression<BidiIter>
  362. (
  363. mark_nbr, this->traits_.flags(), this->rxtraits()
  364. );
  365. }
  366. }
  367. BOOST_THROW_EXCEPTION(regex_error(error_badmark, "invalid named back-reference"));
  368. break;
  369. default:
  370. mark_nbr = static_cast<int>(++this->mark_count_);
  371. seq = detail::make_dynamic<BidiIter>(detail::mark_begin_matcher(mark_nbr));
  372. seq_end = detail::make_dynamic<BidiIter>(detail::mark_end_matcher(mark_nbr));
  373. break;
  374. }
  375. // alternates
  376. seq += this->parse_alternates(begin, end);
  377. seq += seq_end;
  378. BOOST_XPR_ENSURE_
  379. (
  380. begin != end && token_group_end == this->traits_.get_token(begin, end)
  381. , error_paren
  382. , "mismatched parenthesis"
  383. );
  384. typedef detail::shared_matchable<BidiIter> xpr_type;
  385. if(lookahead)
  386. {
  387. seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
  388. detail::lookahead_matcher<xpr_type> lookahead(seq.xpr(), negative, seq.pure());
  389. seq = detail::make_dynamic<BidiIter>(lookahead);
  390. }
  391. else if(lookbehind)
  392. {
  393. seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
  394. detail::lookbehind_matcher<xpr_type> lookbehind(seq.xpr(), seq.width().value(), negative, seq.pure());
  395. seq = detail::make_dynamic<BidiIter>(lookbehind);
  396. }
  397. else if(keeper) // independent sub-expression
  398. {
  399. seq += detail::make_independent_end_xpression<BidiIter>(seq.pure());
  400. detail::keeper_matcher<xpr_type> keeper(seq.xpr(), seq.pure());
  401. seq = detail::make_dynamic<BidiIter>(keeper);
  402. }
  403. // restore the modifiers
  404. this->traits_.flags(old_flags);
  405. return seq;
  406. }
  407. ///////////////////////////////////////////////////////////////////////////
  408. // parse_charset
  409. /// INTERNAL ONLY
  410. template<typename FwdIter>
  411. detail::sequence<BidiIter> parse_charset(FwdIter &begin, FwdIter end)
  412. {
  413. detail::compound_charset<traits_type> chset;
  414. // call out to a helper to actually parse the character set
  415. detail::parse_charset(begin, end, chset, this->traits_);
  416. return detail::make_charset_xpression<BidiIter>
  417. (
  418. chset
  419. , this->rxtraits()
  420. , this->traits_.flags()
  421. );
  422. }
  423. ///////////////////////////////////////////////////////////////////////////
  424. // parse_atom
  425. /// INTERNAL ONLY
  426. template<typename FwdIter>
  427. detail::sequence<BidiIter> parse_atom(FwdIter &begin, FwdIter end)
  428. {
  429. using namespace regex_constants;
  430. escape_value esc = { 0, 0, 0, detail::escape_char };
  431. FwdIter old_begin = begin;
  432. switch(this->traits_.get_token(begin, end))
  433. {
  434. case token_literal:
  435. return detail::make_literal_xpression<BidiIter>
  436. (
  437. this->parse_literal(begin, end), this->traits_.flags(), this->rxtraits()
  438. );
  439. case token_any:
  440. return detail::make_any_xpression<BidiIter>(this->traits_.flags(), this->rxtraits());
  441. case token_assert_begin_sequence:
  442. return detail::make_dynamic<BidiIter>(detail::assert_bos_matcher());
  443. case token_assert_end_sequence:
  444. return detail::make_dynamic<BidiIter>(detail::assert_eos_matcher());
  445. case token_assert_begin_line:
  446. return detail::make_assert_begin_line<BidiIter>(this->traits_.flags(), this->rxtraits());
  447. case token_assert_end_line:
  448. return detail::make_assert_end_line<BidiIter>(this->traits_.flags(), this->rxtraits());
  449. case token_assert_word_boundary:
  450. return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::true_>(), this->rxtraits());
  451. case token_assert_not_word_boundary:
  452. return detail::make_assert_word<BidiIter>(detail::word_boundary<mpl::false_>(), this->rxtraits());
  453. case token_assert_word_begin:
  454. return detail::make_assert_word<BidiIter>(detail::word_begin(), this->rxtraits());
  455. case token_assert_word_end:
  456. return detail::make_assert_word<BidiIter>(detail::word_end(), this->rxtraits());
  457. case token_escape:
  458. esc = this->parse_escape(begin, end);
  459. switch(esc.type_)
  460. {
  461. case detail::escape_mark:
  462. return detail::make_backref_xpression<BidiIter>
  463. (
  464. esc.mark_nbr_, this->traits_.flags(), this->rxtraits()
  465. );
  466. case detail::escape_char:
  467. return detail::make_char_xpression<BidiIter>
  468. (
  469. esc.ch_, this->traits_.flags(), this->rxtraits()
  470. );
  471. case detail::escape_class:
  472. return detail::make_posix_charset_xpression<BidiIter>
  473. (
  474. esc.class_
  475. , this->is_upper_(*begin++)
  476. , this->traits_.flags()
  477. , this->rxtraits()
  478. );
  479. }
  480. case token_group_begin:
  481. return this->parse_group(begin, end);
  482. case token_charset_begin:
  483. return this->parse_charset(begin, end);
  484. case token_invalid_quantifier:
  485. BOOST_THROW_EXCEPTION(regex_error(error_badrepeat, "quantifier not expected"));
  486. break;
  487. case token_quote_meta_begin:
  488. return detail::make_literal_xpression<BidiIter>
  489. (
  490. this->parse_quote_meta(begin, end), this->traits_.flags(), this->rxtraits()
  491. );
  492. case token_quote_meta_end:
  493. BOOST_THROW_EXCEPTION(
  494. regex_error(
  495. error_escape
  496. , "found quote-meta end without corresponding quote-meta begin"
  497. )
  498. );
  499. break;
  500. case token_end_of_pattern:
  501. break;
  502. default:
  503. begin = old_begin;
  504. break;
  505. }
  506. return detail::sequence<BidiIter>();
  507. }
  508. ///////////////////////////////////////////////////////////////////////////
  509. // parse_quant
  510. /// INTERNAL ONLY
  511. template<typename FwdIter>
  512. detail::sequence<BidiIter> parse_quant(FwdIter &begin, FwdIter end)
  513. {
  514. BOOST_ASSERT(begin != end);
  515. detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
  516. detail::sequence<BidiIter> seq = this->parse_atom(begin, end);
  517. // BUGBUG this doesn't handle the degenerate (?:)+ correctly
  518. if(!seq.empty() && begin != end && detail::quant_none != seq.quant())
  519. {
  520. if(this->traits_.get_quant_spec(begin, end, spec))
  521. {
  522. BOOST_ASSERT(spec.min_ <= spec.max_);
  523. if(0 == spec.max_) // quant {0,0} is degenerate -- matches nothing.
  524. {
  525. seq = this->parse_quant(begin, end);
  526. }
  527. else
  528. {
  529. seq.repeat(spec);
  530. }
  531. }
  532. }
  533. return seq;
  534. }
  535. ///////////////////////////////////////////////////////////////////////////
  536. // parse_sequence
  537. /// INTERNAL ONLY
  538. template<typename FwdIter>
  539. detail::sequence<BidiIter> parse_sequence(FwdIter &begin, FwdIter end)
  540. {
  541. detail::sequence<BidiIter> seq;
  542. while(begin != end)
  543. {
  544. detail::sequence<BidiIter> seq_quant = this->parse_quant(begin, end);
  545. // did we find a quantified atom?
  546. if(seq_quant.empty())
  547. break;
  548. // chain it to the end of the xpression sequence
  549. seq += seq_quant;
  550. }
  551. return seq;
  552. }
  553. ///////////////////////////////////////////////////////////////////////////
  554. // parse_literal
  555. // scan ahead looking for char literals to be globbed together into a string literal
  556. /// INTERNAL ONLY
  557. template<typename FwdIter>
  558. string_type parse_literal(FwdIter &begin, FwdIter end)
  559. {
  560. using namespace regex_constants;
  561. BOOST_ASSERT(begin != end);
  562. BOOST_ASSERT(token_literal == this->traits_.get_token(begin, end));
  563. escape_value esc = { 0, 0, 0, detail::escape_char };
  564. string_type literal(1, *begin);
  565. for(FwdIter prev = begin, tmp = ++begin; begin != end; prev = begin, begin = tmp)
  566. {
  567. detail::quant_spec spec = { 0, 0, false, &this->hidden_mark_count_ };
  568. if(this->traits_.get_quant_spec(tmp, end, spec))
  569. {
  570. if(literal.size() != 1)
  571. {
  572. begin = prev;
  573. literal.erase(boost::prior(literal.end()));
  574. }
  575. return literal;
  576. }
  577. else switch(this->traits_.get_token(tmp, end))
  578. {
  579. case token_escape:
  580. esc = this->parse_escape(tmp, end);
  581. if(detail::escape_char != esc.type_) return literal;
  582. literal.insert(literal.end(), esc.ch_);
  583. break;
  584. case token_literal:
  585. literal.insert(literal.end(), *tmp++);
  586. break;
  587. default:
  588. return literal;
  589. }
  590. }
  591. return literal;
  592. }
  593. ///////////////////////////////////////////////////////////////////////////
  594. // parse_quote_meta
  595. // scan ahead looking for char literals to be globbed together into a string literal
  596. /// INTERNAL ONLY
  597. template<typename FwdIter>
  598. string_type parse_quote_meta(FwdIter &begin, FwdIter end)
  599. {
  600. using namespace regex_constants;
  601. FwdIter old_begin = begin, old_end;
  602. while(end != (old_end = begin))
  603. {
  604. switch(this->traits_.get_token(begin, end))
  605. {
  606. case token_quote_meta_end: return string_type(old_begin, old_end);
  607. case token_escape: BOOST_XPR_ENSURE_(begin != end, error_escape, "incomplete escape sequence");
  608. case token_invalid_quantifier:
  609. case token_literal: ++begin;
  610. default:;
  611. }
  612. }
  613. return string_type(old_begin, begin);
  614. }
  615. ///////////////////////////////////////////////////////////////////////////////
  616. // parse_escape
  617. /// INTERNAL ONLY
  618. template<typename FwdIter>
  619. escape_value parse_escape(FwdIter &begin, FwdIter end)
  620. {
  621. BOOST_XPR_ENSURE_(begin != end, regex_constants::error_escape, "incomplete escape sequence");
  622. // first, check to see if this can be a backreference
  623. if(0 < this->rxtraits().value(*begin, 10))
  624. {
  625. // Parse at most 3 decimal digits.
  626. FwdIter tmp = begin;
  627. int mark_nbr = detail::toi(tmp, end, this->rxtraits(), 10, 999);
  628. // If the resulting number could conceivably be a backref, then it is.
  629. if(10 > mark_nbr || mark_nbr <= static_cast<int>(this->mark_count_))
  630. {
  631. begin = tmp;
  632. escape_value esc = {0, mark_nbr, 0, detail::escape_mark};
  633. return esc;
  634. }
  635. }
  636. // Not a backreference, defer to the parse_escape helper
  637. return detail::parse_escape(begin, end, this->traits_);
  638. }
  639. bool is_upper_(char_type ch) const
  640. {
  641. return 0 != this->upper_ && this->rxtraits().isctype(ch, this->upper_);
  642. }
  643. std::size_t mark_count_;
  644. std::size_t hidden_mark_count_;
  645. CompilerTraits traits_;
  646. typename RegexTraits::char_class_type upper_;
  647. shared_ptr<detail::regex_impl<BidiIter> > self_;
  648. std::map<string_type, basic_regex<BidiIter> > rules_;
  649. };
  650. }} // namespace boost::xpressive
  651. #endif