/Src/Dependencies/Boost/boost/spirit/home/lex/lexer/lexertl/generate_static.hpp

http://hadesmem.googlecode.com/ · C++ Header · 961 lines · 823 code · 110 blank · 28 comment · 131 complexity · 96ab24727985ba25cb0110dbed6dae16 MD5 · raw file

  1. // Copyright (c) 2008-2009 Ben Hanson
  2. // Copyright (c) 2008-2011 Hartmut Kaiser
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. #if !defined(BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM)
  7. #define BOOST_SPIRIT_LEX_LEXERTL_GENERATE_CPP_FEB_10_2008_0855PM
  8. #if defined(_MSC_VER)
  9. #pragma once
  10. #endif
  11. #include <boost/spirit/home/support/detail/lexer/char_traits.hpp>
  12. #include <boost/spirit/home/support/detail/lexer/consts.hpp>
  13. #include <boost/spirit/home/support/detail/lexer/rules.hpp>
  14. #include <boost/spirit/home/support/detail/lexer/size_t.hpp>
  15. #include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
  16. #include <boost/spirit/home/support/detail/lexer/debug.hpp>
  17. #include <boost/spirit/home/lex/lexer/lexertl/static_version.hpp>
  18. #include <boost/algorithm/string.hpp>
  19. #include <boost/lexical_cast.hpp>
  20. ///////////////////////////////////////////////////////////////////////////////
  21. namespace boost { namespace spirit { namespace lex { namespace lexertl
  22. {
  23. namespace detail
  24. {
  25. inline bool
  26. generate_delimiter(std::ostream &os_)
  27. {
  28. os_ << std::string(80, '/') << "\n";
  29. return os_.good();
  30. }
  31. ///////////////////////////////////////////////////////////////////////////
  32. // Generate a table of the names of the used lexer states, which is a bit
  33. // tricky, because the table stored with the rules is sorted based on the
  34. // names, but we need it sorted using the state ids.
  35. template <typename Char>
  36. inline bool
  37. generate_cpp_state_info (boost::lexer::basic_rules<Char> const& rules_
  38. , std::ostream &os_, char const* name_suffix)
  39. {
  40. // we need to re-sort the state names in ascending order of the state
  41. // ids, filling possible gaps in between later
  42. typedef typename
  43. boost::lexer::basic_rules<Char>::string_size_t_map::const_iterator
  44. state_iterator;
  45. typedef std::map<std::size_t, char const*> reverse_state_map_type;
  46. reverse_state_map_type reverse_state_map;
  47. state_iterator send = rules_.statemap().end();
  48. for (state_iterator sit = rules_.statemap().begin(); sit != send; ++sit)
  49. {
  50. typedef typename reverse_state_map_type::value_type value_type;
  51. reverse_state_map.insert(value_type((*sit).second, (*sit).first.c_str()));
  52. }
  53. generate_delimiter(os_);
  54. os_ << "// this table defines the names of the lexer states\n";
  55. os_ << "char const* const lexer_state_names"
  56. << (name_suffix[0] ? "_" : "") << name_suffix
  57. << "[" << rules_.statemap().size() << "] = \n{\n";
  58. typedef typename reverse_state_map_type::iterator iterator;
  59. iterator rend = reverse_state_map.end();
  60. std::size_t last_id = 0;
  61. for (iterator rit = reverse_state_map.begin(); rit != rend; ++last_id)
  62. {
  63. for (/**/; last_id < (*rit).first; ++last_id)
  64. {
  65. os_ << " 0, // \"<undefined state>\"\n";
  66. }
  67. os_ << " \"" << (*rit).second << "\"";
  68. if (++rit != rend)
  69. os_ << ",\n";
  70. else
  71. os_ << "\n"; // don't generate the final comma
  72. }
  73. os_ << "};\n\n";
  74. generate_delimiter(os_);
  75. os_ << "// this variable defines the number of lexer states\n";
  76. os_ << "std::size_t const lexer_state_count"
  77. << (name_suffix[0] ? "_" : "") << name_suffix
  78. << " = " << rules_.statemap().size() << ";\n\n";
  79. return os_.good();
  80. }
  81. inline bool
  82. generate_cpp_state_table (std::ostream &os_, char const* name_suffix
  83. , bool bol, bool eol)
  84. {
  85. std::string suffix(name_suffix[0] ? "_" : "");
  86. suffix += name_suffix;
  87. generate_delimiter(os_);
  88. os_ << "// this defines a generic accessors for the information above\n";
  89. os_ << "struct lexer" << suffix << "\n{\n";
  90. os_ << " // version number and feature-set of compatible static lexer engine\n";
  91. os_ << " enum\n";
  92. os_ << " {\n static_version = "
  93. << boost::lexical_cast<std::string>(SPIRIT_STATIC_LEXER_VERSION) << ",\n";
  94. os_ << " supports_bol = " << std::boolalpha << bol << ",\n";
  95. os_ << " supports_eol = " << std::boolalpha << eol << "\n";
  96. os_ << " };\n\n";
  97. os_ << " // return the number of lexer states\n";
  98. os_ << " static std::size_t state_count()\n";
  99. os_ << " {\n return lexer_state_count" << suffix << "; \n }\n\n";
  100. os_ << " // return the name of the lexer state as given by 'idx'\n";
  101. os_ << " static char const* state_name(std::size_t idx)\n";
  102. os_ << " {\n return lexer_state_names" << suffix << "[idx]; \n }\n\n";
  103. os_ << " // return the next matched token\n";
  104. os_ << " template<typename Iterator>\n";
  105. os_ << " static std::size_t next(std::size_t &start_state_, bool& bol_\n";
  106. os_ << " , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_)\n";
  107. os_ << " {\n return next_token" << suffix
  108. << "(start_state_, bol_, start_token_, end_, unique_id_);\n }\n";
  109. os_ << "};\n\n";
  110. return os_.good();
  111. }
  112. ///////////////////////////////////////////////////////////////////////////
  113. // generate function body based on traversing the DFA tables
  114. template <typename Char>
  115. bool generate_function_body_dfa(std::ostream & os_
  116. , boost::lexer::basic_state_machine<Char> const &sm_)
  117. {
  118. std::size_t const dfas_ = sm_.data()._dfa->size();
  119. std::size_t const lookups_ = sm_.data()._lookup->front()->size();
  120. os_ << " enum {end_state_index, id_index, unique_id_index, "
  121. "state_index, bol_index,\n";
  122. os_ << " eol_index, dead_state_index, dfa_offset};\n\n";
  123. os_ << " static std::size_t const npos = "
  124. "static_cast<std::size_t>(~0);\n";
  125. if (dfas_ > 1)
  126. {
  127. for (std::size_t state_ = 0; state_ < dfas_; ++state_)
  128. {
  129. std::size_t i_ = 0;
  130. std::size_t j_ = 1;
  131. std::size_t count_ = lookups_ / 8;
  132. std::size_t const* lookup_ = &sm_.data()._lookup[state_]->front();
  133. std::size_t const* dfa_ = &sm_.data()._dfa[state_]->front();
  134. os_ << " static std::size_t const lookup" << state_
  135. << "_[" << lookups_ << "] = {\n ";
  136. for (/**/; i_ < count_; ++i_)
  137. {
  138. std::size_t const index_ = i_ * 8;
  139. os_ << lookup_[index_];
  140. for (/**/; j_ < 8; ++j_)
  141. {
  142. os_ << ", " << lookup_[index_ + j_];
  143. }
  144. if (i_ < count_ - 1)
  145. {
  146. os_ << ",\n ";
  147. }
  148. j_ = 1;
  149. }
  150. os_ << " };\n";
  151. count_ = sm_.data()._dfa[state_]->size ();
  152. os_ << " static const std::size_t dfa" << state_ << "_["
  153. << count_ << "] = {\n ";
  154. count_ /= 8;
  155. for (i_ = 0; i_ < count_; ++i_)
  156. {
  157. std::size_t const index_ = i_ * 8;
  158. os_ << dfa_[index_];
  159. for (j_ = 1; j_ < 8; ++j_)
  160. {
  161. os_ << ", " << dfa_[index_ + j_];
  162. }
  163. if (i_ < count_ - 1)
  164. {
  165. os_ << ",\n ";
  166. }
  167. }
  168. std::size_t const mod_ = sm_.data()._dfa[state_]->size () % 8;
  169. if (mod_)
  170. {
  171. std::size_t const index_ = count_ * 8;
  172. if (count_)
  173. {
  174. os_ << ",\n ";
  175. }
  176. os_ << dfa_[index_];
  177. for (j_ = 1; j_ < mod_; ++j_)
  178. {
  179. os_ << ", " << dfa_[index_ + j_];
  180. }
  181. }
  182. os_ << " };\n";
  183. }
  184. std::size_t count_ = sm_.data()._dfa_alphabet.size();
  185. std::size_t i_ = 1;
  186. os_ << " static std::size_t const* lookup_arr_[" << count_
  187. << "] = { lookup0_";
  188. for (i_ = 1; i_ < count_; ++i_)
  189. {
  190. os_ << ", " << "lookup" << i_ << "_";
  191. }
  192. os_ << " };\n";
  193. os_ << " static std::size_t const dfa_alphabet_arr_["
  194. << count_ << "] = { ";
  195. os_ << sm_.data()._dfa_alphabet.front ();
  196. for (i_ = 1; i_ < count_; ++i_)
  197. {
  198. os_ << ", " << sm_.data()._dfa_alphabet[i_];
  199. }
  200. os_ << " };\n";
  201. os_ << " static std::size_t const* dfa_arr_[" << count_
  202. << "] = { ";
  203. os_ << "dfa0_";
  204. for (i_ = 1; i_ < count_; ++i_)
  205. {
  206. os_ << ", " << "dfa" << i_ << "_";
  207. }
  208. os_ << " };\n";
  209. }
  210. else
  211. {
  212. std::size_t const* lookup_ = &sm_.data()._lookup[0]->front();
  213. std::size_t const* dfa_ = &sm_.data()._dfa[0]->front();
  214. std::size_t i_ = 0;
  215. std::size_t j_ = 1;
  216. std::size_t count_ = lookups_ / 8;
  217. os_ << " static std::size_t const lookup_[";
  218. os_ << sm_.data()._lookup[0]->size() << "] = {\n ";
  219. for (/**/; i_ < count_; ++i_)
  220. {
  221. const std::size_t index_ = i_ * 8;
  222. os_ << lookup_[index_];
  223. for (/**/; j_ < 8; ++j_)
  224. {
  225. os_ << ", " << lookup_[index_ + j_];
  226. }
  227. if (i_ < count_ - 1)
  228. {
  229. os_ << ",\n ";
  230. }
  231. j_ = 1;
  232. }
  233. os_ << " };\n";
  234. os_ << " static std::size_t const dfa_alphabet_ = "
  235. << sm_.data()._dfa_alphabet.front () << ";\n";
  236. os_ << " static std::size_t const dfa_["
  237. << sm_.data()._dfa[0]->size () << "] = {\n ";
  238. count_ = sm_.data()._dfa[0]->size () / 8;
  239. for (i_ = 0; i_ < count_; ++i_)
  240. {
  241. const std::size_t index_ = i_ * 8;
  242. os_ << dfa_[index_];
  243. for (j_ = 1; j_ < 8; ++j_)
  244. {
  245. os_ << ", " << dfa_[index_ + j_];
  246. }
  247. if (i_ < count_ - 1)
  248. {
  249. os_ << ",\n ";
  250. }
  251. }
  252. const std::size_t mod_ = sm_.data()._dfa[0]->size () % 8;
  253. if (mod_)
  254. {
  255. const std::size_t index_ = count_ * 8;
  256. if (count_)
  257. {
  258. os_ << ",\n ";
  259. }
  260. os_ << dfa_[index_];
  261. for (j_ = 1; j_ < mod_; ++j_)
  262. {
  263. os_ << ", " << dfa_[index_ + j_];
  264. }
  265. }
  266. os_ << " };\n";
  267. }
  268. os_ << "\n if (start_token_ == end_)\n";
  269. os_ << " {\n";
  270. os_ << " unique_id_ = npos;\n";
  271. os_ << " return 0;\n";
  272. os_ << " }\n\n";
  273. if (sm_.data()._seen_BOL_assertion)
  274. {
  275. os_ << " bool bol = bol_;\n\n";
  276. }
  277. if (dfas_ > 1)
  278. {
  279. os_ << "again:\n";
  280. os_ << " std::size_t const* lookup_ = lookup_arr_[start_state_];\n";
  281. os_ << " std::size_t dfa_alphabet_ = dfa_alphabet_arr_[start_state_];\n";
  282. os_ << " std::size_t const*dfa_ = dfa_arr_[start_state_];\n";
  283. }
  284. os_ << " std::size_t const* ptr_ = dfa_ + dfa_alphabet_;\n";
  285. os_ << " Iterator curr_ = start_token_;\n";
  286. os_ << " bool end_state_ = *ptr_ != 0;\n";
  287. os_ << " std::size_t id_ = *(ptr_ + id_index);\n";
  288. os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n";
  289. if (dfas_ > 1)
  290. {
  291. os_ << " std::size_t end_start_state_ = start_state_;\n";
  292. }
  293. if (sm_.data()._seen_BOL_assertion)
  294. {
  295. os_ << " bool end_bol_ = bol_;\n";
  296. }
  297. os_ << " Iterator end_token_ = start_token_;\n\n";
  298. os_ << " while (curr_ != end_)\n";
  299. os_ << " {\n";
  300. if (sm_.data()._seen_BOL_assertion)
  301. {
  302. os_ << " std::size_t const BOL_state_ = ptr_[bol_index];\n\n";
  303. }
  304. if (sm_.data()._seen_EOL_assertion)
  305. {
  306. os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
  307. }
  308. if (sm_.data()._seen_BOL_assertion && sm_.data()._seen_EOL_assertion)
  309. {
  310. os_ << " if (BOL_state_ && bol)\n";
  311. os_ << " {\n";
  312. os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
  313. os_ << " }\n";
  314. os_ << " else if (EOL_state_ && *curr_ == '\\n')\n";
  315. os_ << " {\n";
  316. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
  317. os_ << " }\n";
  318. os_ << " else\n";
  319. os_ << " {\n";
  320. if (lookups_ == 256)
  321. {
  322. os_ << " unsigned char index = \n";
  323. os_ << " static_cast<unsigned char>(*curr_++);\n";
  324. }
  325. else
  326. {
  327. os_ << " std::size_t index = *curr_++\n";
  328. }
  329. os_ << " bol = (index == '\n') ? true : false;\n";
  330. os_ << " std::size_t const state_ = ptr_[\n";
  331. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  332. os_ << '\n';
  333. os_ << " if (state_ == 0) break;\n";
  334. os_ << '\n';
  335. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  336. os_ << " }\n\n";
  337. }
  338. else if (sm_.data()._seen_BOL_assertion)
  339. {
  340. os_ << " if (BOL_state_ && bol)\n";
  341. os_ << " {\n";
  342. os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
  343. os_ << " }\n";
  344. os_ << " else\n";
  345. os_ << " {\n";
  346. if (lookups_ == 256)
  347. {
  348. os_ << " unsigned char index = \n";
  349. os_ << " static_cast<unsigned char>(*curr_++);\n";
  350. }
  351. else
  352. {
  353. os_ << " std::size_t index = *curr_++\n";
  354. }
  355. os_ << " bol = (index == '\n') ? true : false;\n";
  356. os_ << " std::size_t const state_ = ptr_[\n";
  357. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  358. os_ << '\n';
  359. os_ << " if (state_ == 0) break;\n";
  360. os_ << '\n';
  361. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  362. os_ << " }\n\n";
  363. }
  364. else if (sm_.data()._seen_EOL_assertion)
  365. {
  366. os_ << " if (EOL_state_ && *curr_ == '\\n')\n";
  367. os_ << " {\n";
  368. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
  369. os_ << " }\n";
  370. os_ << " else\n";
  371. os_ << " {\n";
  372. if (lookups_ == 256)
  373. {
  374. os_ << " unsigned char index = \n";
  375. os_ << " static_cast<unsigned char>(*curr_++);\n";
  376. }
  377. else
  378. {
  379. os_ << " std::size_t index = *curr_++\n";
  380. }
  381. os_ << " bol = (index == '\n') ? true : false;\n";
  382. os_ << " std::size_t const state_ = ptr_[\n";
  383. os_ << " lookup_[static_cast<std::size_t>(index)]];\n";
  384. os_ << '\n';
  385. os_ << " if (state_ == 0) break;\n";
  386. os_ << '\n';
  387. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n";
  388. os_ << " }\n\n";
  389. }
  390. else
  391. {
  392. os_ << " std::size_t const state_ =\n";
  393. if (lookups_ == 256)
  394. {
  395. os_ << " ptr_[lookup_["
  396. "static_cast<unsigned char>(*curr_++)]];\n";
  397. }
  398. else
  399. {
  400. os_ << " ptr_[lookup_[*curr_++]];\n";
  401. }
  402. os_ << '\n';
  403. os_ << " if (state_ == 0) break;\n";
  404. os_ << '\n';
  405. os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n\n";
  406. }
  407. os_ << " if (*ptr_)\n";
  408. os_ << " {\n";
  409. os_ << " end_state_ = true;\n";
  410. os_ << " id_ = *(ptr_ + id_index);\n";
  411. os_ << " uid_ = *(ptr_ + unique_id_index);\n";
  412. if (dfas_ > 1)
  413. {
  414. os_ << " end_start_state_ = *(ptr_ + state_index);\n";
  415. }
  416. if (sm_.data()._seen_BOL_assertion)
  417. {
  418. os_ << " end_bol_ = bol;\n";
  419. }
  420. os_ << " end_token_ = curr_;\n";
  421. os_ << " }\n";
  422. os_ << " }\n\n";
  423. if (sm_.data()._seen_EOL_assertion)
  424. {
  425. os_ << " std::size_t const EOL_state_ = ptr_[eol_index];\n\n";
  426. os_ << " if (EOL_state_ && curr_ == end_)\n";
  427. os_ << " {\n";
  428. os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n\n";
  429. os_ << " if (*ptr_)\n";
  430. os_ << " {\n";
  431. os_ << " end_state_ = true;\n";
  432. os_ << " id_ = *(ptr_ + id_index);\n";
  433. os_ << " uid_ = *(ptr_ + unique_id_index);\n";
  434. if (dfas_ > 1)
  435. {
  436. os_ << " end_start_state_ = *(ptr_ + state_index);\n";
  437. }
  438. if (sm_.data()._seen_BOL_assertion)
  439. {
  440. os_ << " end_bol_ = bol;\n";
  441. }
  442. os_ << " end_token_ = curr_;\n";
  443. os_ << " }\n";
  444. os_ << " }\n\n";
  445. }
  446. os_ << " if (end_state_)\n";
  447. os_ << " {\n";
  448. os_ << " // return longest match\n";
  449. os_ << " start_token_ = end_token_;\n";
  450. if (dfas_ > 1)
  451. {
  452. os_ << " start_state_ = end_start_state_;\n";
  453. os_ << " if (id_ == 0)\n";
  454. os_ << " {\n";
  455. if (sm_.data()._seen_BOL_assertion)
  456. {
  457. os_ << " bol = end_bol_;\n";
  458. }
  459. os_ << " goto again;\n";
  460. os_ << " }\n";
  461. if (sm_.data()._seen_BOL_assertion)
  462. {
  463. os_ << " else\n";
  464. os_ << " {\n";
  465. os_ << " bol_ = end_bol_;\n";
  466. os_ << " }\n";
  467. }
  468. }
  469. else if (sm_.data()._seen_BOL_assertion)
  470. {
  471. os_ << " bol_ = end_bol_;\n";
  472. }
  473. os_ << " }\n";
  474. os_ << " else\n";
  475. os_ << " {\n";
  476. if (sm_.data()._seen_BOL_assertion)
  477. {
  478. os_ << " bol_ = (*start_token_ == '\n') ? true : false;\n";
  479. }
  480. os_ << " id_ = npos;\n";
  481. os_ << " uid_ = npos;\n";
  482. os_ << " }\n\n";
  483. os_ << " unique_id_ = uid_;\n";
  484. os_ << " return id_;\n";
  485. return os_.good();
  486. }
  487. ///////////////////////////////////////////////////////////////////////////
  488. template <typename Char>
  489. inline std::string get_charlit(Char ch)
  490. {
  491. std::basic_string<Char> result;
  492. boost::lexer::basic_string_token<Char>::escape_char (ch, result);
  493. return result;
  494. }
  495. // check whether state0_0 is referenced from any of the other states
  496. template <typename Char>
  497. bool need_label0_0(boost::lexer::basic_state_machine<Char> const &sm_)
  498. {
  499. typedef typename boost::lexer::basic_state_machine<Char>::iterator
  500. iterator_type;
  501. iterator_type iter_ = sm_.begin();
  502. std::size_t const states_ = iter_->states;
  503. for (std::size_t state_ = 0; state_ < states_; ++state_)
  504. {
  505. if (0 == iter_->bol_index || 0 == iter_->eol_index)
  506. {
  507. return true;
  508. }
  509. std::size_t const transitions_ = iter_->transitions;
  510. for (std::size_t t_ = 0; t_ < transitions_; ++t_)
  511. {
  512. if (0 == iter_->goto_state)
  513. {
  514. return true;
  515. }
  516. ++iter_;
  517. }
  518. if (transitions_ == 0) ++iter_;
  519. }
  520. return false;
  521. }
  522. ///////////////////////////////////////////////////////////////////////////
  523. template <typename Char>
  524. bool generate_function_body_switch(std::ostream & os_
  525. , boost::lexer::basic_state_machine<Char> const &sm_)
  526. {
  527. typedef typename boost::lexer::basic_state_machine<Char>::iterator
  528. iterator_type;
  529. std::size_t const lookups_ = sm_.data()._lookup->front ()->size ();
  530. iterator_type iter_ = sm_.begin();
  531. iterator_type labeliter_ = iter_;
  532. iterator_type end_ = sm_.end();
  533. std::size_t const dfas_ = sm_.data()._dfa->size ();
  534. os_ << " static std::size_t const npos = "
  535. "static_cast<std::size_t>(~0);\n";
  536. os_ << "\n if (start_token_ == end_)\n";
  537. os_ << " {\n";
  538. os_ << " unique_id_ = npos;\n";
  539. os_ << " return 0;\n";
  540. os_ << " }\n\n";
  541. if (sm_.data()._seen_BOL_assertion)
  542. {
  543. os_ << " bool bol = bol_;\n";
  544. }
  545. if (dfas_ > 1)
  546. {
  547. os_ << "again:\n";
  548. }
  549. os_ << " Iterator curr_ = start_token_;\n";
  550. os_ << " bool end_state_ = false;\n";
  551. os_ << " std::size_t id_ = npos;\n";
  552. os_ << " std::size_t uid_ = npos;\n";
  553. if (dfas_ > 1)
  554. {
  555. os_ << " std::size_t end_start_state_ = start_state_;\n";
  556. }
  557. if (sm_.data()._seen_BOL_assertion)
  558. {
  559. os_ << " bool end_bol_ = bol_;\n";
  560. }
  561. os_ << " Iterator end_token_ = start_token_;\n";
  562. os_ << '\n';
  563. os_ << " " << ((lookups_ == 256) ? "char" : "wchar_t")
  564. << " ch_ = 0;\n\n";
  565. if (dfas_ > 1)
  566. {
  567. os_ << " switch (start_state_)\n";
  568. os_ << " {\n";
  569. for (std::size_t i_ = 0; i_ < dfas_; ++i_)
  570. {
  571. os_ << " case " << i_ << ":\n";
  572. os_ << " goto state" << i_ << "_0;\n";
  573. os_ << " break;\n";
  574. }
  575. os_ << " default:\n";
  576. os_ << " goto end;\n";
  577. os_ << " break;\n";
  578. os_ << " }\n";
  579. }
  580. bool need_state0_0_label = need_label0_0(sm_);
  581. for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
  582. {
  583. std::size_t const states_ = iter_->states;
  584. for (std::size_t state_ = 0; state_ < states_; ++state_)
  585. {
  586. std::size_t const transitions_ = iter_->transitions;
  587. std::size_t t_ = 0;
  588. if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
  589. {
  590. os_ << "\nstate" << dfa_ << '_' << state_ << ":\n";
  591. }
  592. if (iter_->end_state)
  593. {
  594. os_ << " end_state_ = true;\n";
  595. os_ << " id_ = " << iter_->id << ";\n";
  596. os_ << " uid_ = " << iter_->unique_id << ";\n";
  597. os_ << " end_token_ = curr_;\n";
  598. if (dfas_ > 1)
  599. {
  600. os_ << " end_start_state_ = " << iter_->goto_dfa <<
  601. ";\n";
  602. }
  603. if (sm_.data()._seen_BOL_assertion)
  604. {
  605. os_ << " end_bol_ = bol;\n";
  606. }
  607. if (transitions_) os_ << '\n';
  608. }
  609. if (t_ < transitions_ ||
  610. iter_->bol_index != boost::lexer::npos ||
  611. iter_->eol_index != boost::lexer::npos)
  612. {
  613. os_ << " if (curr_ == end_) goto end;\n";
  614. os_ << " ch_ = *curr_;\n";
  615. if (iter_->bol_index != boost::lexer::npos)
  616. {
  617. os_ << "\n if (bol) goto state" << dfa_ << '_'
  618. << iter_->bol_index << ";\n";
  619. }
  620. if (iter_->eol_index != boost::lexer::npos)
  621. {
  622. os_ << "\n if (ch_ == '\n') goto state" << dfa_
  623. << '_' << iter_->eol_index << ";\n";
  624. }
  625. os_ << " ++curr_;\n";
  626. }
  627. for (/**/; t_ < transitions_; ++t_)
  628. {
  629. char const *ptr_ = iter_->token._charset.c_str();
  630. char const *end_ = ptr_ + iter_->token._charset.size();
  631. char start_char_ = 0;
  632. char curr_char_ = 0;
  633. bool range_ = false;
  634. bool first_char_ = true;
  635. os_ << "\n if (";
  636. while (ptr_ != end_)
  637. {
  638. curr_char_ = *ptr_++;
  639. if (*ptr_ == curr_char_ + 1)
  640. {
  641. if (!range_)
  642. {
  643. start_char_ = curr_char_;
  644. }
  645. range_ = true;
  646. }
  647. else
  648. {
  649. if (!first_char_)
  650. {
  651. os_ << ((iter_->token._negated) ? " && " : " || ");
  652. }
  653. else
  654. {
  655. first_char_ = false;
  656. }
  657. if (range_)
  658. {
  659. if (iter_->token._negated)
  660. {
  661. os_ << "!";
  662. }
  663. os_ << "(ch_ >= '" << get_charlit(start_char_)
  664. << "' && ch_ <= '"
  665. << get_charlit(curr_char_) << "')";
  666. range_ = false;
  667. }
  668. else
  669. {
  670. os_ << "ch_ "
  671. << ((iter_->token._negated) ? "!=" : "==")
  672. << " '" << get_charlit(curr_char_) << "'";
  673. }
  674. }
  675. }
  676. os_ << ") goto state" << dfa_ << '_' << iter_->goto_state
  677. << ";\n";
  678. ++iter_;
  679. }
  680. if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
  681. {
  682. os_ << " goto end;\n";
  683. }
  684. if (transitions_ == 0) ++iter_;
  685. }
  686. }
  687. os_ << "\nend:\n";
  688. os_ << " if (end_state_)\n";
  689. os_ << " {\n";
  690. os_ << " // return longest match\n";
  691. os_ << " start_token_ = end_token_;\n";
  692. if (dfas_ > 1)
  693. {
  694. os_ << " start_state_ = end_start_state_;\n";
  695. os_ << "\n if (id_ == 0)\n";
  696. os_ << " {\n";
  697. if (sm_.data()._seen_BOL_assertion)
  698. {
  699. os_ << " bol = end_bol_;\n";
  700. }
  701. os_ << " goto again;\n";
  702. os_ << " }\n";
  703. if (sm_.data()._seen_BOL_assertion)
  704. {
  705. os_ << " else\n";
  706. os_ << " {\n";
  707. os_ << " bol_ = end_bol_;\n";
  708. os_ << " }\n";
  709. }
  710. }
  711. else if (sm_.data()._seen_BOL_assertion)
  712. {
  713. os_ << " bol_ = end_bol_;\n";
  714. }
  715. os_ << " }\n";
  716. os_ << " else\n";
  717. os_ << " {\n";
  718. if (sm_.data()._seen_BOL_assertion)
  719. {
  720. os_ << " bol_ = (*start_token_ == '\\n') ? true : false;\n";
  721. }
  722. os_ << " id_ = npos;\n";
  723. os_ << " uid_ = npos;\n";
  724. os_ << " }\n\n";
  725. os_ << " unique_id_ = uid_;\n";
  726. os_ << " return id_;\n";
  727. return os_.good();
  728. }
  729. ///////////////////////////////////////////////////////////////////////////
  730. // Generate a tokenizer for the given state machine.
  731. template <typename Char, typename F>
  732. inline bool
  733. generate_cpp (boost::lexer::basic_state_machine<Char> const& sm_
  734. , boost::lexer::basic_rules<Char> const& rules_
  735. , std::ostream &os_, char const* name_suffix, F generate_function_body)
  736. {
  737. if (sm_.data()._lookup->empty())
  738. return false;
  739. std::size_t const dfas_ = sm_.data()._dfa->size();
  740. // std::size_t const lookups_ = sm_.data()._lookup->front()->size();
  741. os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
  742. os_ << "// Copyright (c) 2008-2011 Hartmut Kaiser\n";
  743. os_ << "//\n";
  744. os_ << "// Distributed under the Boost Software License, "
  745. "Version 1.0. (See accompanying\n";
  746. os_ << "// file licence_1_0.txt or copy at "
  747. "http://www.boost.org/LICENSE_1_0.txt)\n\n";
  748. os_ << "// Auto-generated by boost::lexer, do not edit\n\n";
  749. std::string guard(name_suffix);
  750. guard += name_suffix[0] ? "_" : "";
  751. guard += __DATE__ "_" __TIME__;
  752. std::string::size_type p = guard.find_first_of(": ");
  753. while (std::string::npos != p)
  754. {
  755. guard.replace(p, 1, "_");
  756. p = guard.find_first_of(": ", p);
  757. }
  758. boost::to_upper(guard);
  759. os_ << "#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << ")\n";
  760. os_ << "#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_" << guard << "\n\n";
  761. os_ << "#include <boost/detail/iterator.hpp>\n";
  762. os_ << "#include <boost/spirit/home/support/detail/lexer/char_traits.hpp>\n\n";
  763. generate_delimiter(os_);
  764. os_ << "// the generated table of state names and the tokenizer have to be\n"
  765. "// defined in the boost::spirit::lex::lexertl::static_ namespace\n";
  766. os_ << "namespace boost { namespace spirit { namespace lex { "
  767. "namespace lexertl { namespace static_ {\n\n";
  768. // generate the lexer state information variables
  769. if (!generate_cpp_state_info(rules_, os_, name_suffix))
  770. return false;
  771. generate_delimiter(os_);
  772. os_ << "// this function returns the next matched token\n";
  773. os_ << "template<typename Iterator>\n";
  774. os_ << "std::size_t next_token" << (name_suffix[0] ? "_" : "")
  775. << name_suffix << " (";
  776. if (dfas_ > 1)
  777. {
  778. os_ << "std::size_t& start_state_, ";
  779. }
  780. else
  781. {
  782. os_ << "std::size_t& /*start_state_*/, ";
  783. }
  784. if (sm_.data()._seen_BOL_assertion)
  785. {
  786. os_ << "bool& bol_, ";
  787. }
  788. else
  789. {
  790. os_ << "bool& /*bol_*/, ";
  791. }
  792. os_ << "\n ";
  793. os_ << "Iterator &start_token_, Iterator const& end_, ";
  794. os_ << "std::size_t& unique_id_)\n";
  795. os_ << "{\n";
  796. if (!generate_function_body(os_, sm_))
  797. return false;
  798. os_ << "}\n\n";
  799. if (!generate_cpp_state_table(os_, name_suffix
  800. , sm_.data()._seen_BOL_assertion, sm_.data()._seen_EOL_assertion))
  801. {
  802. return false;
  803. }
  804. os_ << "}}}}} // namespace boost::spirit::lex::lexertl::static_\n\n";
  805. os_ << "#endif\n";
  806. return os_.good();
  807. }
  808. } // namespace detail
  809. ///////////////////////////////////////////////////////////////////////////
  810. template <typename Lexer, typename F>
  811. inline bool
  812. generate_static(Lexer const& lexer, std::ostream& os
  813. , char const* name_suffix, F f)
  814. {
  815. if (!lexer.init_dfa(true)) // always minimize DFA for static lexers
  816. return false;
  817. return detail::generate_cpp(lexer.state_machine_, lexer.rules_, os
  818. , name_suffix, f);
  819. }
  820. ///////////////////////////////////////////////////////////////////////////
  821. // deprecated function, will be removed in the future (this has been
  822. // replaced by the function generate_static_dfa - see below).
  823. template <typename Lexer>
  824. inline bool
  825. generate_static(Lexer const& lexer, std::ostream& os
  826. , char const* name_suffix = "")
  827. {
  828. return generate_static(lexer, os, name_suffix
  829. , &detail::generate_function_body_dfa<typename Lexer::char_type>);
  830. }
  831. ///////////////////////////////////////////////////////////////////////////
  832. template <typename Lexer>
  833. inline bool
  834. generate_static_dfa(Lexer const& lexer, std::ostream& os
  835. , char const* name_suffix = "")
  836. {
  837. return generate_static(lexer, os, name_suffix
  838. , &detail::generate_function_body_dfa<typename Lexer::char_type>);
  839. }
  840. ///////////////////////////////////////////////////////////////////////////
  841. template <typename Lexer>
  842. inline bool
  843. generate_static_switch(Lexer const& lexer, std::ostream& os
  844. , char const* name_suffix = "")
  845. {
  846. return generate_static(lexer, os, name_suffix
  847. , &detail::generate_function_body_switch<typename Lexer::char_type>);
  848. }
  849. ///////////////////////////////////////////////////////////////////////////////
  850. }}}}
  851. #endif