/Src/Dependencies/Boost/boost/spirit/home/lex/lexer/lexertl/iterator_tokenizer.hpp

http://hadesmem.googlecode.com/ · C++ Header · 255 lines · 211 code · 36 blank · 8 comment · 46 complexity · 5c4988e2cf625a8a8d81ef9921451295 MD5 · raw file

  1. // Copyright (c) 2001-2011 Hartmut Kaiser
  2. //
  3. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  4. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #if !defined(BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM)
  6. #define BOOST_SPIRIT_LEXERTL_ITERATOR_TOKENISER_MARCH_22_2007_0859AM
  7. #if defined(_MSC_VER)
  8. #pragma once
  9. #endif
  10. #include <boost/detail/iterator.hpp>
  11. #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
  12. #include <boost/spirit/home/support/detail/lexer/consts.hpp>
  13. #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
  14. #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
  15. #include <vector>
  16. namespace boost { namespace spirit { namespace lex { namespace lexertl
  17. {
  18. ///////////////////////////////////////////////////////////////////////////
  19. template<typename Iterator>
  20. class basic_iterator_tokeniser
  21. {
  22. public:
  23. typedef std::vector<std::size_t> size_t_vector;
  24. typedef typename boost::detail::iterator_traits<Iterator>::value_type
  25. char_type;
  26. static std::size_t next (
  27. boost::lexer::basic_state_machine<char_type> const& state_machine_
  28. , std::size_t &dfa_state_, bool& bol_, Iterator &start_token_
  29. , Iterator const& end_, std::size_t& unique_id_)
  30. {
  31. if (start_token_ == end_)
  32. {
  33. unique_id_ = boost::lexer::npos;
  34. return 0;
  35. }
  36. bool bol = bol_;
  37. boost::lexer::detail::internals const& internals_ =
  38. state_machine_.data();
  39. again:
  40. std::size_t const* lookup_ = &internals_._lookup[dfa_state_]->
  41. front ();
  42. std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_state_];
  43. std::size_t const* dfa_ = &internals_._dfa[dfa_state_]->front ();
  44. std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
  45. Iterator curr_ = start_token_;
  46. bool end_state_ = *ptr_ != 0;
  47. std::size_t id_ = *(ptr_ + boost::lexer::id_index);
  48. std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
  49. std::size_t end_start_state_ = dfa_state_;
  50. bool end_bol_ = bol_;
  51. Iterator end_token_ = start_token_;
  52. while (curr_ != end_)
  53. {
  54. std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
  55. std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
  56. if (BOL_state_ && bol)
  57. {
  58. ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
  59. }
  60. else if (EOL_state_ && *curr_ == '\n')
  61. {
  62. ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
  63. }
  64. else
  65. {
  66. typedef typename
  67. boost::detail::iterator_traits<Iterator>::value_type
  68. value_type;
  69. typedef typename
  70. boost::lexer::char_traits<value_type>::index_type
  71. index_type;
  72. index_type index =
  73. boost::lexer::char_traits<value_type>::call(*curr_++);
  74. bol = (index == '\n') ? true : false;
  75. std::size_t const state_ = ptr_[
  76. lookup_[static_cast<std::size_t>(index)]];
  77. if (state_ == 0)
  78. {
  79. break;
  80. }
  81. ptr_ = &dfa_[state_ * dfa_alphabet_];
  82. }
  83. if (*ptr_)
  84. {
  85. end_state_ = true;
  86. id_ = *(ptr_ + boost::lexer::id_index);
  87. uid_ = *(ptr_ + boost::lexer::unique_id_index);
  88. end_start_state_ = *(ptr_ + boost::lexer::state_index);
  89. end_bol_ = bol;
  90. end_token_ = curr_;
  91. }
  92. }
  93. std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
  94. if (EOL_state_ && curr_ == end_)
  95. {
  96. ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
  97. if (*ptr_)
  98. {
  99. end_state_ = true;
  100. id_ = *(ptr_ + boost::lexer::id_index);
  101. uid_ = *(ptr_ + boost::lexer::unique_id_index);
  102. end_start_state_ = *(ptr_ + boost::lexer::state_index);
  103. end_bol_ = bol;
  104. end_token_ = curr_;
  105. }
  106. }
  107. if (end_state_) {
  108. // return longest match
  109. dfa_state_ = end_start_state_;
  110. start_token_ = end_token_;
  111. if (id_ == 0)
  112. {
  113. bol = end_bol_;
  114. goto again;
  115. }
  116. else
  117. {
  118. bol_ = end_bol_;
  119. }
  120. }
  121. else {
  122. bol_ = (*start_token_ == '\n') ? true : false;
  123. id_ = boost::lexer::npos;
  124. uid_ = boost::lexer::npos;
  125. }
  126. unique_id_ = uid_;
  127. return id_;
  128. }
  129. ///////////////////////////////////////////////////////////////////////
  130. static std::size_t next (
  131. boost::lexer::basic_state_machine<char_type> const& state_machine_
  132. , bool& bol_, Iterator &start_token_, Iterator const& end_
  133. , std::size_t& unique_id_)
  134. {
  135. if (start_token_ == end_)
  136. {
  137. unique_id_ = boost::lexer::npos;
  138. return 0;
  139. }
  140. bool bol = bol_;
  141. std::size_t const* lookup_ = &state_machine_.data()._lookup[0]->front();
  142. std::size_t dfa_alphabet_ = state_machine_.data()._dfa_alphabet[0];
  143. std::size_t const* dfa_ = &state_machine_.data()._dfa[0]->front ();
  144. std::size_t const* ptr_ = dfa_ + dfa_alphabet_;
  145. Iterator curr_ = start_token_;
  146. bool end_state_ = *ptr_ != 0;
  147. std::size_t id_ = *(ptr_ + boost::lexer::id_index);
  148. std::size_t uid_ = *(ptr_ + boost::lexer::unique_id_index);
  149. bool end_bol_ = bol_;
  150. Iterator end_token_ = start_token_;
  151. while (curr_ != end_)
  152. {
  153. std::size_t const BOL_state_ = ptr_[boost::lexer::bol_index];
  154. std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
  155. if (BOL_state_ && bol)
  156. {
  157. ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];
  158. }
  159. else if (EOL_state_ && *curr_ == '\n')
  160. {
  161. ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
  162. }
  163. else
  164. {
  165. typedef typename
  166. boost::detail::iterator_traits<Iterator>::value_type
  167. value_type;
  168. typedef typename
  169. boost::lexer::char_traits<value_type>::index_type
  170. index_type;
  171. index_type index =
  172. boost::lexer::char_traits<value_type>::call(*curr_++);
  173. bol = (index == '\n') ? true : false;
  174. std::size_t const state_ = ptr_[
  175. lookup_[static_cast<std::size_t>(index)]];
  176. if (state_ == 0)
  177. {
  178. break;
  179. }
  180. ptr_ = &dfa_[state_ * dfa_alphabet_];
  181. }
  182. if (*ptr_)
  183. {
  184. end_state_ = true;
  185. id_ = *(ptr_ + boost::lexer::id_index);
  186. uid_ = *(ptr_ + boost::lexer::unique_id_index);
  187. end_bol_ = bol;
  188. end_token_ = curr_;
  189. }
  190. }
  191. std::size_t const EOL_state_ = ptr_[boost::lexer::eol_index];
  192. if (EOL_state_ && curr_ == end_)
  193. {
  194. ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];
  195. if (*ptr_)
  196. {
  197. end_state_ = true;
  198. id_ = *(ptr_ + boost::lexer::id_index);
  199. uid_ = *(ptr_ + boost::lexer::unique_id_index);
  200. end_bol_ = bol;
  201. end_token_ = curr_;
  202. }
  203. }
  204. if (end_state_) {
  205. // return longest match
  206. bol_ = end_bol_;
  207. start_token_ = end_token_;
  208. }
  209. else {
  210. bol_ = *start_token_ == '\n';
  211. id_ = boost::lexer::npos;
  212. uid_ = boost::lexer::npos;
  213. }
  214. unique_id_ = uid_;
  215. return id_;
  216. }
  217. };
  218. }}}}
  219. #endif