/src/contrib/boost/spirit/home/support/detail/lexer/generator.hpp

http://pythonocc.googlecode.com/ · C++ Header · 858 lines · 702 code · 131 blank · 25 comment · 117 complexity · aedf4becb909e352dcca5faf95c3a0cd MD5 · raw file

  1. // generator.hpp
  2. // Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  5. // file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6. #ifndef BOOST_LEXER_GENERATOR_HPP
  7. #define BOOST_LEXER_GENERATOR_HPP
  8. #include "char_traits.hpp"
  9. // memcmp()
  10. #include <cstring>
  11. #include "partition/charset.hpp"
  12. #include "partition/equivset.hpp"
  13. #include <memory>
  14. #include "parser/tree/node.hpp"
  15. #include "parser/parser.hpp"
  16. #include "containers/ptr_list.hpp"
  17. #include "rules.hpp"
  18. #include "state_machine.hpp"
  19. namespace boost
  20. {
  21. namespace lexer
  22. {
  23. template<typename CharT, typename Traits = char_traits<CharT> >
  24. class basic_generator
  25. {
  26. public:
  27. typedef typename detail::internals::size_t_vector size_t_vector;
  28. typedef basic_rules<CharT> rules;
  29. static void build (const rules &rules_,
  30. basic_state_machine<CharT> &state_machine_)
  31. {
  32. std::size_t index_ = 0;
  33. std::size_t size_ = rules_.statemap ().size ();
  34. node_ptr_vector node_ptr_vector_;
  35. detail::internals &internals_ = const_cast<detail::internals &>
  36. (state_machine_.data ());
  37. bool seen_BOL_assertion_ = false;
  38. bool seen_EOL_assertion_ = false;
  39. state_machine_.clear ();
  40. for (; index_ < size_; ++index_)
  41. {
  42. internals_._lookup->push_back (static_cast<size_t_vector *>(0));
  43. internals_._lookup->back () = new size_t_vector;
  44. internals_._dfa_alphabet.push_back (0);
  45. internals_._dfa->push_back (static_cast<size_t_vector *>(0));
  46. internals_._dfa->back () = new size_t_vector;
  47. }
  48. for (index_ = 0, size_ = internals_._lookup->size ();
  49. index_ < size_; ++index_)
  50. {
  51. internals_._lookup[index_]->resize (sizeof (CharT) == 1 ?
  52. num_chars : num_wchar_ts, dead_state_index);
  53. if (!rules_.regexes ()[index_].empty ())
  54. {
  55. // vector mapping token indexes to partitioned token index sets
  56. index_set_vector set_mapping_;
  57. // syntax tree
  58. detail::node *root_ = build_tree (rules_, index_,
  59. node_ptr_vector_, internals_, set_mapping_);
  60. build_dfa (root_, set_mapping_,
  61. internals_._dfa_alphabet[index_],
  62. *internals_._dfa[index_]);
  63. if (internals_._seen_BOL_assertion)
  64. {
  65. seen_BOL_assertion_ = true;
  66. }
  67. if (internals_._seen_EOL_assertion)
  68. {
  69. seen_EOL_assertion_ = true;
  70. }
  71. internals_._seen_BOL_assertion = false;
  72. internals_._seen_EOL_assertion = false;
  73. }
  74. }
  75. internals_._seen_BOL_assertion = seen_BOL_assertion_;
  76. internals_._seen_EOL_assertion = seen_EOL_assertion_;
  77. }
  78. static void minimise (basic_state_machine<CharT> &state_machine_)
  79. {
  80. detail::internals &internals_ = const_cast<detail::internals &>
  81. (state_machine_.data ());
  82. const std::size_t machines_ = internals_._dfa->size ();
  83. for (std::size_t i_ = 0; i_ < machines_; ++i_)
  84. {
  85. const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[i_];
  86. size_t_vector *dfa_ = internals_._dfa[i_];
  87. if (dfa_alphabet_ != 0)
  88. {
  89. std::size_t size_ = 0;
  90. do
  91. {
  92. size_ = dfa_->size ();
  93. minimise_dfa (dfa_alphabet_, *dfa_, size_);
  94. } while (dfa_->size () != size_);
  95. }
  96. }
  97. }
  98. protected:
  99. typedef detail::basic_charset<CharT> charset;
  100. typedef detail::ptr_list<charset> charset_list;
  101. typedef std::auto_ptr<charset> charset_ptr;
  102. typedef detail::equivset equivset;
  103. typedef detail::ptr_list<equivset> equivset_list;
  104. typedef std::auto_ptr<equivset> equivset_ptr;
  105. typedef typename charset::index_set index_set;
  106. typedef std::vector<index_set> index_set_vector;
  107. typedef detail::basic_parser<CharT> parser;
  108. typedef typename parser::node_ptr_vector node_ptr_vector;
  109. typedef std::set<const detail::node *> node_set;
  110. typedef detail::ptr_vector<node_set> node_set_vector;
  111. typedef std::vector<const detail::node *> node_vector;
  112. typedef detail::ptr_vector<node_vector> node_vector_vector;
  113. typedef typename parser::string string;
  114. typedef std::pair<string, string> string_pair;
  115. typedef typename parser::tokeniser::string_token string_token;
  116. typedef std::deque<string_pair> macro_deque;
  117. typedef std::pair<string, const detail::node *> macro_pair;
  118. typedef typename parser::macro_map::iterator macro_iter;
  119. typedef std::pair<macro_iter, bool> macro_iter_pair;
  120. typedef typename parser::tokeniser::token_map token_map;
  121. static detail::node *build_tree (const rules &rules_,
  122. const std::size_t state_, node_ptr_vector &node_ptr_vector_,
  123. detail::internals &internals_, index_set_vector &set_mapping_)
  124. {
  125. size_t_vector *lookup_ = internals_._lookup[state_];
  126. const typename rules::string_deque_deque &regexes_ =
  127. rules_.regexes ();
  128. const typename rules::id_vector_deque &ids_ = rules_.ids ();
  129. const typename rules::id_vector_deque &unique_ids_ =
  130. rules_.unique_ids ();
  131. const typename rules::id_vector_deque &states_ = rules_.states ();
  132. typename rules::string_deque::const_iterator regex_iter_ =
  133. regexes_[state_].begin ();
  134. typename rules::string_deque::const_iterator regex_iter_end_ =
  135. regexes_[state_].end ();
  136. typename rules::id_vector::const_iterator ids_iter_ =
  137. ids_[state_].begin ();
  138. typename rules::id_vector::const_iterator unique_ids_iter_ =
  139. unique_ids_[state_].begin ();
  140. typename rules::id_vector::const_iterator states_iter_ =
  141. states_[state_].begin ();
  142. const typename rules::string &regex_ = *regex_iter_;
  143. // map of regex charset tokens (strings) to index
  144. token_map token_map_;
  145. const typename rules::string_pair_deque &macrodeque_ =
  146. rules_.macrodeque ();
  147. typename parser::macro_map macromap_;
  148. typename detail::node::node_vector tree_vector_;
  149. build_macros (token_map_, macrodeque_, macromap_,
  150. rules_.flags (), rules_.locale (), node_ptr_vector_,
  151. internals_._seen_BOL_assertion, internals_._seen_EOL_assertion);
  152. detail::node *root_ = parser::parse (regex_.c_str (),
  153. regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_,
  154. *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
  155. macromap_, token_map_, internals_._seen_BOL_assertion,
  156. internals_._seen_EOL_assertion);
  157. ++regex_iter_;
  158. ++ids_iter_;
  159. ++unique_ids_iter_;
  160. ++states_iter_;
  161. tree_vector_.push_back (root_);
  162. // build syntax trees
  163. while (regex_iter_ != regex_iter_end_)
  164. {
  165. // re-declare var, otherwise we perform an assignment..!
  166. const typename rules::string &regex_ = *regex_iter_;
  167. root_ = parser::parse (regex_.c_str (),
  168. regex_.c_str () + regex_.size (), *ids_iter_,
  169. *unique_ids_iter_, *states_iter_, rules_.flags (),
  170. rules_.locale (), node_ptr_vector_, macromap_, token_map_,
  171. internals_._seen_BOL_assertion,
  172. internals_._seen_EOL_assertion);
  173. tree_vector_.push_back (root_);
  174. ++regex_iter_;
  175. ++ids_iter_;
  176. ++unique_ids_iter_;
  177. ++states_iter_;
  178. }
  179. if (internals_._seen_BOL_assertion)
  180. {
  181. // Fixup BOLs
  182. typename detail::node::node_vector::iterator iter_ =
  183. tree_vector_.begin ();
  184. typename detail::node::node_vector::iterator end_ =
  185. tree_vector_.end ();
  186. for (; iter_ != end_; ++iter_)
  187. {
  188. fixup_bol (*iter_, node_ptr_vector_);
  189. }
  190. }
  191. // join trees
  192. {
  193. typename detail::node::node_vector::iterator iter_ =
  194. tree_vector_.begin ();
  195. typename detail::node::node_vector::iterator end_ =
  196. tree_vector_.end ();
  197. if (iter_ != end_)
  198. {
  199. root_ = *iter_;
  200. ++iter_;
  201. }
  202. for (; iter_ != end_; ++iter_)
  203. {
  204. node_ptr_vector_->push_back (static_cast<detail::selection_node *>(0));
  205. node_ptr_vector_->back () = new detail::selection_node
  206. (root_, *iter_);
  207. root_ = node_ptr_vector_->back ();
  208. }
  209. }
  210. // partitioned token list
  211. charset_list token_list_;
  212. set_mapping_.resize (token_map_.size ());
  213. partition_tokens (token_map_, token_list_);
  214. typename charset_list::list::const_iterator iter_ =
  215. token_list_->begin ();
  216. typename charset_list::list::const_iterator end_ =
  217. token_list_->end ();
  218. std::size_t index_ = 0;
  219. for (; iter_ != end_; ++iter_, ++index_)
  220. {
  221. const charset *cs_ = *iter_;
  222. typename charset::index_set::const_iterator set_iter_ =
  223. cs_->_index_set.begin ();
  224. typename charset::index_set::const_iterator set_end_ =
  225. cs_->_index_set.end ();
  226. fill_lookup (cs_->_token, lookup_, index_);
  227. for (; set_iter_ != set_end_; ++set_iter_)
  228. {
  229. set_mapping_[*set_iter_].insert (index_);
  230. }
  231. }
  232. internals_._dfa_alphabet[state_] = token_list_->size () + dfa_offset;
  233. return root_;
  234. }
  235. static void build_macros (token_map &token_map_,
  236. const macro_deque &macrodeque_,
  237. typename parser::macro_map &macromap_, const regex_flags flags_,
  238. const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
  239. bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
  240. {
  241. for (typename macro_deque::const_iterator iter_ =
  242. macrodeque_.begin (), end_ = macrodeque_.end ();
  243. iter_ != end_; ++iter_)
  244. {
  245. const typename rules::string &name_ = iter_->first;
  246. const typename rules::string &regex_ = iter_->second;
  247. detail::node *node_ = parser::parse (regex_.c_str (),
  248. regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
  249. locale_, node_ptr_vector_, macromap_, token_map_,
  250. seen_BOL_assertion_, seen_EOL_assertion_);
  251. macro_iter_pair map_iter_ = macromap_.
  252. insert (macro_pair (name_, static_cast<const detail::node *>
  253. (0)));
  254. map_iter_.first->second = node_;
  255. }
  256. }
  257. static void build_dfa (detail::node *root_,
  258. const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,
  259. size_t_vector &dfa_)
  260. {
  261. typename detail::node::node_vector *followpos_ =
  262. &root_->firstpos ();
  263. node_set_vector seen_sets_;
  264. node_vector_vector seen_vectors_;
  265. size_t_vector hash_vector_;
  266. // 'jam' state
  267. dfa_.resize (dfa_alphabet_, 0);
  268. closure (followpos_, seen_sets_, seen_vectors_,
  269. hash_vector_, dfa_alphabet_, dfa_);
  270. std::size_t *ptr_ = 0;
  271. for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_)
  272. {
  273. equivset_list equiv_list_;
  274. build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_);
  275. for (typename equivset_list::list::const_iterator iter_ =
  276. equiv_list_->begin (), end_ = equiv_list_->end ();
  277. iter_ != end_; ++iter_)
  278. {
  279. equivset *equivset_ = *iter_;
  280. const std::size_t transition_ = closure
  281. (&equivset_->_followpos, seen_sets_, seen_vectors_,
  282. hash_vector_, dfa_alphabet_, dfa_);
  283. if (transition_ != npos)
  284. {
  285. ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);
  286. // Prune abstemious transitions from end states.
  287. if (*ptr_ && !equivset_->_greedy) continue;
  288. for (typename detail::equivset::index_vector::const_iterator
  289. equiv_iter_ = equivset_->_index_vector.begin (),
  290. equiv_end_ = equivset_->_index_vector.end ();
  291. equiv_iter_ != equiv_end_; ++equiv_iter_)
  292. {
  293. const std::size_t index_ = *equiv_iter_;
  294. if (index_ == bol_token)
  295. {
  296. if (ptr_[eol_index] == 0)
  297. {
  298. ptr_[bol_index] = transition_;
  299. }
  300. }
  301. else if (index_ == eol_token)
  302. {
  303. if (ptr_[bol_index] == 0)
  304. {
  305. ptr_[eol_index] = transition_;
  306. }
  307. }
  308. else
  309. {
  310. ptr_[index_ + dfa_offset] = transition_;
  311. }
  312. }
  313. }
  314. }
  315. }
  316. }
  317. static std::size_t closure (typename detail::node::node_vector *followpos_,
  318. node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
  319. size_t_vector &hash_vector_, const std::size_t size_,
  320. size_t_vector &dfa_)
  321. {
  322. bool end_state_ = false;
  323. std::size_t id_ = 0;
  324. std::size_t unique_id_ = npos;
  325. std::size_t state_ = 0;
  326. std::size_t hash_ = 0;
  327. if (followpos_->empty ()) return npos;
  328. std::size_t index_ = 0;
  329. std::auto_ptr<node_set> set_ptr_ (new node_set);
  330. std::auto_ptr<node_vector> vector_ptr_ (new node_vector);
  331. for (typename detail::node::node_vector::const_iterator iter_ =
  332. followpos_->begin (), end_ = followpos_->end ();
  333. iter_ != end_; ++iter_)
  334. {
  335. closure_ex (*iter_, end_state_, id_, unique_id_, state_,
  336. set_ptr_.get (), vector_ptr_.get (), hash_);
  337. }
  338. bool found_ = false;
  339. typename size_t_vector::const_iterator hash_iter_ =
  340. hash_vector_.begin ();
  341. typename size_t_vector::const_iterator hash_end_ =
  342. hash_vector_.end ();
  343. typename node_set_vector::vector::const_iterator set_iter_ =
  344. seen_sets_->begin ();
  345. for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
  346. {
  347. found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
  348. ++index_;
  349. if (found_) break;
  350. }
  351. if (!found_)
  352. {
  353. seen_sets_->push_back (static_cast<node_set *>(0));
  354. seen_sets_->back () = set_ptr_.release ();
  355. seen_vectors_->push_back (static_cast<node_vector *>(0));
  356. seen_vectors_->back () = vector_ptr_.release ();
  357. hash_vector_.push_back (hash_);
  358. // State 0 is the jam state...
  359. index_ = seen_sets_->size ();
  360. const std::size_t old_size_ = dfa_.size ();
  361. dfa_.resize (old_size_ + size_, 0);
  362. if (end_state_)
  363. {
  364. dfa_[old_size_] |= end_state;
  365. dfa_[old_size_ + id_index] = id_;
  366. dfa_[old_size_ + unique_id_index] = unique_id_;
  367. dfa_[old_size_ + state_index] = state_;
  368. }
  369. }
  370. return index_;
  371. }
  372. static void closure_ex (detail::node *node_, bool &end_state_,
  373. std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
  374. node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
  375. {
  376. const bool temp_end_state_ = node_->end_state ();
  377. if (temp_end_state_)
  378. {
  379. if (!end_state_)
  380. {
  381. end_state_ = true;
  382. id_ = node_->id ();
  383. unique_id_ = node_->unique_id ();
  384. state_ = node_->lexer_state ();
  385. }
  386. }
  387. if (set_ptr_->insert (node_).second)
  388. {
  389. vector_ptr_->push_back (node_);
  390. hash_ += reinterpret_cast<std::size_t> (node_);
  391. }
  392. }
  393. static void partition_tokens (const token_map &map_,
  394. charset_list &lhs_)
  395. {
  396. charset_list rhs_;
  397. fill_rhs_list (map_, rhs_);
  398. if (!rhs_->empty ())
  399. {
  400. typename charset_list::list::iterator iter_;
  401. typename charset_list::list::iterator end_;
  402. charset_ptr overlap_ (new charset);
  403. lhs_->push_back (static_cast<charset *>(0));
  404. lhs_->back () = rhs_->front ();
  405. rhs_->pop_front ();
  406. while (!rhs_->empty ())
  407. {
  408. charset_ptr r_ (rhs_->front ());
  409. rhs_->pop_front ();
  410. iter_ = lhs_->begin ();
  411. end_ = lhs_->end ();
  412. while (!r_->empty () && iter_ != end_)
  413. {
  414. typename charset_list::list::iterator l_iter_ = iter_;
  415. (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
  416. if (overlap_->empty ())
  417. {
  418. ++iter_;
  419. }
  420. else if ((*l_iter_)->empty ())
  421. {
  422. delete *l_iter_;
  423. *l_iter_ = overlap_.release ();
  424. // VC++ 6 Hack:
  425. charset_ptr temp_overlap_ (new charset);
  426. overlap_ = temp_overlap_;
  427. ++iter_;
  428. }
  429. else if (r_->empty ())
  430. {
  431. delete r_.release ();
  432. r_ = overlap_;
  433. // VC++ 6 Hack:
  434. charset_ptr temp_overlap_ (new charset);
  435. overlap_ = temp_overlap_;
  436. break;
  437. }
  438. else
  439. {
  440. iter_ = lhs_->insert (++iter_,
  441. static_cast<charset *>(0));
  442. *iter_ = overlap_.release ();
  443. // VC++ 6 Hack:
  444. charset_ptr temp_overlap_ (new charset);
  445. overlap_ = temp_overlap_;
  446. ++iter_;
  447. end_ = lhs_->end ();
  448. }
  449. }
  450. if (!r_->empty ())
  451. {
  452. lhs_->push_back (static_cast<charset *>(0));
  453. lhs_->back () = r_.release ();
  454. }
  455. }
  456. }
  457. }
  458. static void fill_rhs_list (const token_map &map_,
  459. charset_list &list_)
  460. {
  461. typename parser::tokeniser::token_map::const_iterator iter_ =
  462. map_.begin ();
  463. typename parser::tokeniser::token_map::const_iterator end_ =
  464. map_.end ();
  465. for (; iter_ != end_; ++iter_)
  466. {
  467. list_->push_back (static_cast<charset *>(0));
  468. list_->back () = new charset (iter_->first, iter_->second);
  469. }
  470. }
  471. static void fill_lookup (const string_token &token_,
  472. size_t_vector *lookup_, const std::size_t index_)
  473. {
  474. const CharT *curr_ = token_._charset.c_str ();
  475. const CharT *chars_end_ = curr_ + token_._charset.size ();
  476. std::size_t *ptr_ = &lookup_->front ();
  477. const std::size_t max_ = sizeof (CharT) == 1 ?
  478. num_chars : num_wchar_ts;
  479. if (token_._negated)
  480. {
  481. CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0;
  482. std::size_t i_ = 0;
  483. while (curr_ < chars_end_)
  484. {
  485. while (*curr_ > curr_char_)
  486. {
  487. ptr_[static_cast<typename Traits::index_type>
  488. (curr_char_)] = index_ + dfa_offset;
  489. ++curr_char_;
  490. ++i_;
  491. }
  492. ++curr_char_;
  493. ++curr_;
  494. ++i_;
  495. }
  496. for (; i_ < max_; ++i_)
  497. {
  498. ptr_[static_cast<typename Traits::index_type>(curr_char_)] =
  499. index_ + dfa_offset;
  500. ++curr_char_;
  501. }
  502. }
  503. else
  504. {
  505. while (curr_ < chars_end_)
  506. {
  507. ptr_[static_cast<typename Traits::index_type>(*curr_)] =
  508. index_ + dfa_offset;
  509. ++curr_;
  510. }
  511. }
  512. }
  513. static void build_equiv_list (const node_vector *vector_,
  514. const index_set_vector &set_mapping_, equivset_list &lhs_)
  515. {
  516. equivset_list rhs_;
  517. fill_rhs_list (vector_, set_mapping_, rhs_);
  518. if (!rhs_->empty ())
  519. {
  520. typename equivset_list::list::iterator iter_;
  521. typename equivset_list::list::iterator end_;
  522. equivset_ptr overlap_ (new equivset);
  523. lhs_->push_back (static_cast<equivset *>(0));
  524. lhs_->back () = rhs_->front ();
  525. rhs_->pop_front ();
  526. while (!rhs_->empty ())
  527. {
  528. equivset_ptr r_ (rhs_->front ());
  529. rhs_->pop_front ();
  530. iter_ = lhs_->begin ();
  531. end_ = lhs_->end ();
  532. while (!r_->empty () && iter_ != end_)
  533. {
  534. typename equivset_list::list::iterator l_iter_ = iter_;
  535. (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
  536. if (overlap_->empty ())
  537. {
  538. ++iter_;
  539. }
  540. else if ((*l_iter_)->empty ())
  541. {
  542. delete *l_iter_;
  543. *l_iter_ = overlap_.release ();
  544. // VC++ 6 Hack:
  545. equivset_ptr temp_overlap_ (new equivset);
  546. overlap_ = temp_overlap_;
  547. ++iter_;
  548. }
  549. else if (r_->empty ())
  550. {
  551. delete r_.release ();
  552. r_ = overlap_;
  553. // VC++ 6 Hack:
  554. equivset_ptr temp_overlap_ (new equivset);
  555. overlap_ = temp_overlap_;
  556. break;
  557. }
  558. else
  559. {
  560. iter_ = lhs_->insert (++iter_,
  561. static_cast<equivset *>(0));
  562. *iter_ = overlap_.release ();
  563. // VC++ 6 Hack:
  564. equivset_ptr temp_overlap_ (new equivset);
  565. overlap_ = temp_overlap_;
  566. ++iter_;
  567. end_ = lhs_->end ();
  568. }
  569. }
  570. if (!r_->empty ())
  571. {
  572. lhs_->push_back (static_cast<equivset *>(0));
  573. lhs_->back () = r_.release ();
  574. }
  575. }
  576. }
  577. }
  578. static void fill_rhs_list (const node_vector *vector_,
  579. const index_set_vector &set_mapping_, equivset_list &list_)
  580. {
  581. typename node_vector::const_iterator iter_ =
  582. vector_->begin ();
  583. typename node_vector::const_iterator end_ =
  584. vector_->end ();
  585. for (; iter_ != end_; ++iter_)
  586. {
  587. const detail::node *node_ = *iter_;
  588. if (!node_->end_state ())
  589. {
  590. const std::size_t token_ = node_->token ();
  591. if (token_ != null_token)
  592. {
  593. list_->push_back (static_cast<equivset *>(0));
  594. if (token_ == bol_token || token_ == eol_token)
  595. {
  596. std::set<std::size_t> index_set_;
  597. index_set_.insert (token_);
  598. list_->back () = new equivset (index_set_,
  599. node_->greedy (), token_, node_->followpos ());
  600. }
  601. else
  602. {
  603. list_->back () = new equivset (set_mapping_[token_],
  604. node_->greedy (), token_, node_->followpos ());
  605. }
  606. }
  607. }
  608. }
  609. }
  610. static void fixup_bol (detail::node * &root_,
  611. node_ptr_vector &node_ptr_vector_)
  612. {
  613. typename detail::node::node_vector *first_ = &root_->firstpos ();
  614. bool found_ = false;
  615. typename detail::node::node_vector::const_iterator iter_ =
  616. first_->begin ();
  617. typename detail::node::node_vector::const_iterator end_ =
  618. first_->end ();
  619. for (; iter_ != end_; ++iter_)
  620. {
  621. const detail::node *node_ = *iter_;
  622. found_ = !node_->end_state () && node_->token () == bol_token;
  623. if (found_) break;
  624. }
  625. if (!found_)
  626. {
  627. node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
  628. node_ptr_vector_->back () = new detail::leaf_node
  629. (bol_token, true);
  630. detail::node *lhs_ = node_ptr_vector_->back ();
  631. node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
  632. node_ptr_vector_->back () = new detail::leaf_node
  633. (null_token, true);
  634. detail::node *rhs_ = node_ptr_vector_->back ();
  635. node_ptr_vector_->push_back
  636. (static_cast<detail::selection_node *>(0));
  637. node_ptr_vector_->back () =
  638. new detail::selection_node (lhs_, rhs_);
  639. lhs_ = node_ptr_vector_->back ();
  640. node_ptr_vector_->push_back
  641. (static_cast<detail::sequence_node *>(0));
  642. node_ptr_vector_->back () =
  643. new detail::sequence_node (lhs_, root_);
  644. root_ = node_ptr_vector_->back ();
  645. }
  646. }
  647. static void minimise_dfa (const std::size_t dfa_alphabet_,
  648. size_t_vector &dfa_, std::size_t size_)
  649. {
  650. const std::size_t *first_ = &dfa_.front ();
  651. const std::size_t *second_ = 0;
  652. const std::size_t *end_ = first_ + size_;
  653. std::size_t index_ = 1;
  654. std::size_t new_index_ = 1;
  655. std::size_t curr_index_ = 0;
  656. index_set index_set_;
  657. size_t_vector lookup_;
  658. std::size_t *lookup_ptr_ = 0;
  659. lookup_.resize (size_ / dfa_alphabet_, null_token);
  660. lookup_ptr_ = &lookup_.front ();
  661. *lookup_ptr_ = 0;
  662. // Only one 'jam' state, so skip it.
  663. first_ += dfa_alphabet_;
  664. for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
  665. {
  666. for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1;
  667. second_ < end_; second_ += dfa_alphabet_, ++curr_index_)
  668. {
  669. if (index_set_.find (curr_index_) != index_set_.end ())
  670. {
  671. continue;
  672. }
  673. // Some systems have memcmp in namespace std.
  674. using namespace std;
  675. if (memcmp (first_, second_, sizeof (std::size_t) *
  676. dfa_alphabet_) == 0)
  677. {
  678. index_set_.insert (curr_index_);
  679. lookup_ptr_[curr_index_] = new_index_;
  680. }
  681. }
  682. if (lookup_ptr_[index_] == null_token)
  683. {
  684. lookup_ptr_[index_] = new_index_;
  685. ++new_index_;
  686. }
  687. }
  688. if (!index_set_.empty ())
  689. {
  690. const std::size_t *front_ = &dfa_.front ();
  691. size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_);
  692. typename index_set::iterator set_end_ =
  693. index_set_.end ();
  694. const std::size_t *ptr_ = front_ + dfa_alphabet_;
  695. std::size_t *new_ptr_ = 0;
  696. new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
  697. new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
  698. size_ /= dfa_alphabet_;
  699. for (index_ = 1; index_ < size_; ++index_)
  700. {
  701. if (index_set_.find (index_) != set_end_)
  702. {
  703. ptr_ += dfa_alphabet_;
  704. continue;
  705. }
  706. new_ptr_[end_state_index] = ptr_[end_state_index];
  707. new_ptr_[id_index] = ptr_[id_index];
  708. new_ptr_[unique_id_index] = ptr_[unique_id_index];
  709. new_ptr_[state_index] = ptr_[state_index];
  710. new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
  711. new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
  712. new_ptr_ += dfa_offset;
  713. ptr_ += dfa_offset;
  714. for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_)
  715. {
  716. *new_ptr_++ = lookup_ptr_[*ptr_++];
  717. }
  718. }
  719. dfa_.swap (new_dfa_);
  720. }
  721. }
  722. };
  723. typedef basic_generator<char> generator;
  724. typedef basic_generator<wchar_t> wgenerator;
  725. }
  726. }
  727. #endif