PageRenderTime 119ms CodeModel.GetById 15ms app.highlight 95ms RepoModel.GetById 2ms app.codeStats 0ms

/src/contrib/boost/spirit/home/support/detail/lexer/generator.hpp

http://pythonocc.googlecode.com/
C++ Header | 858 lines | 702 code | 131 blank | 25 comment | 117 complexity | aedf4becb909e352dcca5faf95c3a0cd MD5 | raw file
  1// generator.hpp
  2// Copyright (c) 2007-2009 Ben Hanson (http://www.benhanson.net/)
  3//
  4// Distributed under the Boost Software License, Version 1.0. (See accompanying
  5// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  6#ifndef BOOST_LEXER_GENERATOR_HPP
  7#define BOOST_LEXER_GENERATOR_HPP
  8
  9#include "char_traits.hpp"
 10// memcmp()
 11#include <cstring>
 12#include "partition/charset.hpp"
 13#include "partition/equivset.hpp"
 14#include <memory>
 15#include "parser/tree/node.hpp"
 16#include "parser/parser.hpp"
 17#include "containers/ptr_list.hpp"
 18#include "rules.hpp"
 19#include "state_machine.hpp"
 20
 21namespace boost
 22{
 23namespace lexer
 24{
 25template<typename CharT, typename Traits = char_traits<CharT> >
 26class basic_generator
 27{
 28public:
 29    typedef typename detail::internals::size_t_vector size_t_vector;
 30    typedef basic_rules<CharT> rules;
 31
 32    static void build (const rules &rules_,
 33        basic_state_machine<CharT> &state_machine_)
 34    {
 35        std::size_t index_ = 0;
 36        std::size_t size_ = rules_.statemap ().size ();
 37        node_ptr_vector node_ptr_vector_;
 38        detail::internals &internals_ = const_cast<detail::internals &>
 39            (state_machine_.data ());
 40        bool seen_BOL_assertion_ = false;
 41        bool seen_EOL_assertion_ = false;
 42
 43        state_machine_.clear ();
 44
 45        for (; index_ < size_; ++index_)
 46        {
 47            internals_._lookup->push_back (static_cast<size_t_vector *>(0));
 48            internals_._lookup->back () = new size_t_vector;
 49            internals_._dfa_alphabet.push_back (0);
 50            internals_._dfa->push_back (static_cast<size_t_vector *>(0));
 51            internals_._dfa->back () = new size_t_vector;
 52        }
 53
 54        for (index_ = 0, size_ = internals_._lookup->size ();
 55            index_ < size_; ++index_)
 56        {
 57            internals_._lookup[index_]->resize (sizeof (CharT) == 1 ?
 58                num_chars : num_wchar_ts, dead_state_index);
 59
 60            if (!rules_.regexes ()[index_].empty ())
 61            {
 62                // vector mapping token indexes to partitioned token index sets
 63                index_set_vector set_mapping_;
 64                // syntax tree
 65                detail::node *root_ = build_tree (rules_, index_,
 66                    node_ptr_vector_, internals_, set_mapping_);
 67
 68                build_dfa (root_, set_mapping_,
 69                    internals_._dfa_alphabet[index_],
 70                    *internals_._dfa[index_]);
 71
 72                if (internals_._seen_BOL_assertion)
 73                {
 74                    seen_BOL_assertion_ = true;
 75                }
 76
 77                if (internals_._seen_EOL_assertion)
 78                {
 79                    seen_EOL_assertion_ = true;
 80                }
 81
 82                internals_._seen_BOL_assertion = false;
 83                internals_._seen_EOL_assertion = false;
 84            }
 85        }
 86
 87        internals_._seen_BOL_assertion = seen_BOL_assertion_;
 88        internals_._seen_EOL_assertion = seen_EOL_assertion_;
 89    }
 90
 91    static void minimise (basic_state_machine<CharT> &state_machine_)
 92    {
 93        detail::internals &internals_ = const_cast<detail::internals &>
 94            (state_machine_.data ());
 95        const std::size_t machines_ = internals_._dfa->size ();
 96
 97        for (std::size_t i_ = 0; i_ < machines_; ++i_)
 98        {
 99            const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[i_];
100            size_t_vector *dfa_ = internals_._dfa[i_];
101
102            if (dfa_alphabet_ != 0)
103            {
104                std::size_t size_ = 0;
105
106                do
107                {
108                    size_ = dfa_->size ();
109                    minimise_dfa (dfa_alphabet_, *dfa_, size_);
110                } while (dfa_->size () != size_);
111            }
112        }
113    }
114
115protected:
116    typedef detail::basic_charset<CharT> charset;
117    typedef detail::ptr_list<charset> charset_list;
118    typedef std::auto_ptr<charset> charset_ptr;
119    typedef detail::equivset equivset;
120    typedef detail::ptr_list<equivset> equivset_list;
121    typedef std::auto_ptr<equivset> equivset_ptr;
122    typedef typename charset::index_set index_set;
123    typedef std::vector<index_set> index_set_vector;
124    typedef detail::basic_parser<CharT> parser;
125    typedef typename parser::node_ptr_vector node_ptr_vector;
126    typedef std::set<const detail::node *> node_set;
127    typedef detail::ptr_vector<node_set> node_set_vector;
128    typedef std::vector<const detail::node *> node_vector;
129    typedef detail::ptr_vector<node_vector> node_vector_vector;
130    typedef typename parser::string string;
131    typedef std::pair<string, string> string_pair;
132    typedef typename parser::tokeniser::string_token string_token;
133    typedef std::deque<string_pair> macro_deque;
134    typedef std::pair<string, const detail::node *> macro_pair;
135    typedef typename parser::macro_map::iterator macro_iter;
136    typedef std::pair<macro_iter, bool> macro_iter_pair;
137    typedef typename parser::tokeniser::token_map token_map;
138
139    static detail::node *build_tree (const rules &rules_,
140        const std::size_t state_, node_ptr_vector &node_ptr_vector_,
141        detail::internals &internals_, index_set_vector &set_mapping_)
142    {
143        size_t_vector *lookup_ = internals_._lookup[state_];
144        const typename rules::string_deque_deque &regexes_ =
145            rules_.regexes ();
146        const typename rules::id_vector_deque &ids_ = rules_.ids ();
147        const typename rules::id_vector_deque &unique_ids_ =
148            rules_.unique_ids ();
149        const typename rules::id_vector_deque &states_ = rules_.states ();
150        typename rules::string_deque::const_iterator regex_iter_ =
151            regexes_[state_].begin ();
152        typename rules::string_deque::const_iterator regex_iter_end_ =
153            regexes_[state_].end ();
154        typename rules::id_vector::const_iterator ids_iter_ =
155            ids_[state_].begin ();
156        typename rules::id_vector::const_iterator unique_ids_iter_ =
157            unique_ids_[state_].begin ();
158        typename rules::id_vector::const_iterator states_iter_ =
159            states_[state_].begin ();
160        const typename rules::string &regex_ = *regex_iter_;
161        // map of regex charset tokens (strings) to index
162        token_map token_map_;
163        const typename rules::string_pair_deque &macrodeque_ =
164            rules_.macrodeque ();
165        typename parser::macro_map macromap_;
166        typename detail::node::node_vector tree_vector_;
167
168        build_macros (token_map_, macrodeque_, macromap_,
169            rules_.flags (), rules_.locale (), node_ptr_vector_,
170            internals_._seen_BOL_assertion, internals_._seen_EOL_assertion);
171
172        detail::node *root_ = parser::parse (regex_.c_str (),
173            regex_.c_str () + regex_.size (), *ids_iter_, *unique_ids_iter_,
174            *states_iter_, rules_.flags (), rules_.locale (), node_ptr_vector_,
175            macromap_, token_map_, internals_._seen_BOL_assertion,
176            internals_._seen_EOL_assertion);
177
178        ++regex_iter_;
179        ++ids_iter_;
180        ++unique_ids_iter_;
181        ++states_iter_;
182        tree_vector_.push_back (root_);
183
184        // build syntax trees
185        while (regex_iter_ != regex_iter_end_)
186        {
187            // re-declare var, otherwise we perform an assignment..!
188            const typename rules::string &regex_ = *regex_iter_;
189
190            root_ = parser::parse (regex_.c_str (),
191                regex_.c_str () + regex_.size (), *ids_iter_,
192                *unique_ids_iter_, *states_iter_, rules_.flags (),
193                rules_.locale (), node_ptr_vector_, macromap_, token_map_,
194                internals_._seen_BOL_assertion,
195                internals_._seen_EOL_assertion);
196            tree_vector_.push_back (root_);
197            ++regex_iter_;
198            ++ids_iter_;
199            ++unique_ids_iter_;
200            ++states_iter_;
201        }
202
203        if (internals_._seen_BOL_assertion)
204        {
205            // Fixup BOLs
206            typename detail::node::node_vector::iterator iter_ =
207                tree_vector_.begin ();
208            typename detail::node::node_vector::iterator end_ =
209                tree_vector_.end ();
210
211            for (; iter_ != end_; ++iter_)
212            {
213                fixup_bol (*iter_, node_ptr_vector_);
214            }
215        }
216
217        // join trees
218        {
219            typename detail::node::node_vector::iterator iter_ =
220                tree_vector_.begin ();
221            typename detail::node::node_vector::iterator end_ =
222                tree_vector_.end ();
223
224            if (iter_ != end_)
225            {
226                root_ = *iter_;
227                ++iter_;
228            }
229
230            for (; iter_ != end_; ++iter_)
231            {
232                node_ptr_vector_->push_back (static_cast<detail::selection_node *>(0));
233                node_ptr_vector_->back () = new detail::selection_node
234                    (root_, *iter_);
235                root_ = node_ptr_vector_->back ();
236            }
237        }
238
239        // partitioned token list
240        charset_list token_list_;
241
242        set_mapping_.resize (token_map_.size ());
243        partition_tokens (token_map_, token_list_);
244
245        typename charset_list::list::const_iterator iter_ =
246            token_list_->begin ();
247        typename charset_list::list::const_iterator end_ =
248            token_list_->end ();
249        std::size_t index_ = 0;
250
251        for (; iter_ != end_; ++iter_, ++index_)
252        {
253            const charset *cs_ = *iter_;
254            typename charset::index_set::const_iterator set_iter_ =
255                cs_->_index_set.begin ();
256            typename charset::index_set::const_iterator set_end_ =
257                cs_->_index_set.end ();
258
259            fill_lookup (cs_->_token, lookup_, index_);
260
261            for (; set_iter_ != set_end_; ++set_iter_)
262            {
263                set_mapping_[*set_iter_].insert (index_);
264            }
265        }
266
267        internals_._dfa_alphabet[state_] = token_list_->size () + dfa_offset;
268        return root_;
269    }
270
271    static void build_macros (token_map &token_map_,
272        const macro_deque &macrodeque_,
273        typename parser::macro_map &macromap_, const regex_flags flags_,
274        const std::locale &locale_, node_ptr_vector &node_ptr_vector_,
275        bool &seen_BOL_assertion_, bool &seen_EOL_assertion_)
276    {
277        for (typename macro_deque::const_iterator iter_ =
278            macrodeque_.begin (), end_ = macrodeque_.end ();
279            iter_ != end_; ++iter_)
280        {
281            const typename rules::string &name_ = iter_->first;
282            const typename rules::string &regex_ = iter_->second;
283            detail::node *node_ = parser::parse (regex_.c_str (),
284                regex_.c_str () + regex_.size (), 0, 0, 0, flags_,
285                locale_, node_ptr_vector_, macromap_, token_map_,
286                seen_BOL_assertion_, seen_EOL_assertion_);
287            macro_iter_pair map_iter_ = macromap_.
288                insert (macro_pair (name_, static_cast<const detail::node *>
289                (0)));
290
291            map_iter_.first->second = node_;
292        }
293    }
294
295    static void build_dfa (detail::node *root_,
296        const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,
297        size_t_vector &dfa_)
298    {
299        typename detail::node::node_vector *followpos_ =
300            &root_->firstpos ();
301        node_set_vector seen_sets_;
302        node_vector_vector seen_vectors_;
303        size_t_vector hash_vector_;
304
305        // 'jam' state
306        dfa_.resize (dfa_alphabet_, 0);
307        closure (followpos_, seen_sets_, seen_vectors_,
308            hash_vector_, dfa_alphabet_, dfa_);
309
310        std::size_t *ptr_ = 0;
311
312        for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_)
313        {
314            equivset_list equiv_list_;
315
316            build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_);
317
318            for (typename equivset_list::list::const_iterator iter_ =
319                equiv_list_->begin (), end_ = equiv_list_->end ();
320                iter_ != end_; ++iter_)
321            {
322                equivset *equivset_ = *iter_;
323                const std::size_t transition_ = closure
324                    (&equivset_->_followpos, seen_sets_, seen_vectors_,
325                    hash_vector_, dfa_alphabet_, dfa_);
326
327                if (transition_ != npos)
328                {
329                    ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);
330
331                    // Prune abstemious transitions from end states.
332                    if (*ptr_ && !equivset_->_greedy) continue;
333
334                    for (typename detail::equivset::index_vector::const_iterator
335                        equiv_iter_ = equivset_->_index_vector.begin (),
336                        equiv_end_ = equivset_->_index_vector.end ();
337                        equiv_iter_ != equiv_end_; ++equiv_iter_)
338                    {
339                        const std::size_t index_ = *equiv_iter_;
340
341                        if (index_ == bol_token)
342                        {
343                            if (ptr_[eol_index] == 0)
344                            {
345                                ptr_[bol_index] = transition_;
346                            }
347                        }
348                        else if (index_ == eol_token)
349                        {
350                            if (ptr_[bol_index] == 0)
351                            {
352                                ptr_[eol_index] = transition_;
353                            }
354                        }
355                        else
356                        {
357                            ptr_[index_ + dfa_offset] = transition_;
358                        }
359                    }
360                }
361            }
362        }
363    }
364
365    static std::size_t closure (typename detail::node::node_vector *followpos_,
366        node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,
367        size_t_vector &hash_vector_, const std::size_t size_,
368        size_t_vector &dfa_)
369    {
370        bool end_state_ = false;
371        std::size_t id_ = 0;
372        std::size_t unique_id_ = npos;
373        std::size_t state_ = 0;
374        std::size_t hash_ = 0;
375
376        if (followpos_->empty ()) return npos;
377
378        std::size_t index_ = 0;
379        std::auto_ptr<node_set> set_ptr_ (new node_set);
380        std::auto_ptr<node_vector> vector_ptr_ (new node_vector);
381
382        for (typename detail::node::node_vector::const_iterator iter_ =
383            followpos_->begin (), end_ = followpos_->end ();
384            iter_ != end_; ++iter_)
385        {
386            closure_ex (*iter_, end_state_, id_, unique_id_, state_,
387                set_ptr_.get (), vector_ptr_.get (), hash_);
388        }
389
390        bool found_ = false;
391        typename size_t_vector::const_iterator hash_iter_ =
392            hash_vector_.begin ();
393        typename size_t_vector::const_iterator hash_end_ =
394            hash_vector_.end ();
395        typename node_set_vector::vector::const_iterator set_iter_ =
396            seen_sets_->begin ();
397
398        for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)
399        {
400            found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;
401            ++index_;
402
403            if (found_) break;
404        }
405
406        if (!found_)
407        {
408            seen_sets_->push_back (static_cast<node_set *>(0));
409            seen_sets_->back () = set_ptr_.release ();
410            seen_vectors_->push_back (static_cast<node_vector *>(0));
411            seen_vectors_->back () = vector_ptr_.release ();
412            hash_vector_.push_back (hash_);
413            // State 0 is the jam state...
414            index_ = seen_sets_->size ();
415
416            const std::size_t old_size_ = dfa_.size ();
417
418            dfa_.resize (old_size_ + size_, 0);
419
420            if (end_state_)
421            {
422                dfa_[old_size_] |= end_state;
423                dfa_[old_size_ + id_index] = id_;
424                dfa_[old_size_ + unique_id_index] = unique_id_;
425                dfa_[old_size_ + state_index] = state_;
426            }
427        }
428
429        return index_;
430    }
431
432    static void closure_ex (detail::node *node_, bool &end_state_,
433        std::size_t &id_, std::size_t &unique_id_, std::size_t &state_,
434        node_set *set_ptr_, node_vector *vector_ptr_, std::size_t &hash_)
435    {
436        const bool temp_end_state_ = node_->end_state ();
437
438        if (temp_end_state_)
439        {
440            if (!end_state_)
441            {
442                end_state_ = true;
443                id_ = node_->id ();
444                unique_id_ = node_->unique_id ();
445                state_ = node_->lexer_state ();
446            }
447        }
448
449        if (set_ptr_->insert (node_).second)
450        {
451            vector_ptr_->push_back (node_);
452            hash_ += reinterpret_cast<std::size_t> (node_);
453        }
454    }
455
456    static void partition_tokens (const token_map &map_,
457        charset_list &lhs_)
458    {
459        charset_list rhs_;
460
461        fill_rhs_list (map_, rhs_);
462
463        if (!rhs_->empty ())
464        {
465            typename charset_list::list::iterator iter_;
466            typename charset_list::list::iterator end_;
467            charset_ptr overlap_ (new charset);
468
469            lhs_->push_back (static_cast<charset *>(0));
470            lhs_->back () = rhs_->front ();
471            rhs_->pop_front ();
472
473            while (!rhs_->empty ())
474            {
475                charset_ptr r_ (rhs_->front ());
476
477                rhs_->pop_front ();
478                iter_ = lhs_->begin ();
479                end_ = lhs_->end ();
480
481                while (!r_->empty () && iter_ != end_)
482                {
483                    typename charset_list::list::iterator l_iter_ = iter_;
484
485                    (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
486
487                    if (overlap_->empty ())
488                    {
489                        ++iter_;
490                    }
491                    else if ((*l_iter_)->empty ())
492                    {
493                        delete *l_iter_;
494                        *l_iter_ = overlap_.release ();
495
496                        // VC++ 6 Hack:
497                        charset_ptr temp_overlap_ (new charset);
498
499                        overlap_ = temp_overlap_;
500                        ++iter_;
501                    }
502                    else if (r_->empty ())
503                    {
504                        delete r_.release ();
505                        r_ = overlap_;
506
507                        // VC++ 6 Hack:
508                        charset_ptr temp_overlap_ (new charset);
509
510                        overlap_ = temp_overlap_;
511                        break;
512                    }
513                    else
514                    {
515                        iter_ = lhs_->insert (++iter_,
516                            static_cast<charset *>(0));
517                        *iter_ = overlap_.release ();
518
519                        // VC++ 6 Hack:
520                        charset_ptr temp_overlap_ (new charset);
521
522                        overlap_ = temp_overlap_;
523                        ++iter_;
524                        end_ = lhs_->end ();
525                    }
526                }
527
528                if (!r_->empty ())
529                {
530                    lhs_->push_back (static_cast<charset *>(0));
531                    lhs_->back () = r_.release ();
532                }
533            }
534        }
535    }
536
537    static void fill_rhs_list (const token_map &map_,
538        charset_list &list_)
539    {
540        typename parser::tokeniser::token_map::const_iterator iter_ =
541            map_.begin ();
542        typename parser::tokeniser::token_map::const_iterator end_ =
543            map_.end ();
544
545        for (; iter_ != end_; ++iter_)
546        {
547            list_->push_back (static_cast<charset *>(0));
548            list_->back () = new charset (iter_->first, iter_->second);
549        }
550    }
551
552    static void fill_lookup (const string_token &token_,
553        size_t_vector *lookup_, const std::size_t index_)
554    {
555        const CharT *curr_ = token_._charset.c_str ();
556        const CharT *chars_end_ = curr_ + token_._charset.size ();
557        std::size_t *ptr_ = &lookup_->front ();
558        const std::size_t max_ = sizeof (CharT) == 1 ?
559            num_chars : num_wchar_ts;
560
561        if (token_._negated)
562        {
563            CharT curr_char_ = sizeof (CharT) == 1 ? -128 : 0;
564            std::size_t i_ = 0;
565
566            while (curr_ < chars_end_)
567            {
568                while (*curr_ > curr_char_)
569                {
570                    ptr_[static_cast<typename Traits::index_type>
571                        (curr_char_)] = index_ + dfa_offset;
572                    ++curr_char_;
573                    ++i_;
574                }
575
576                ++curr_char_;
577                ++curr_;
578                ++i_;
579            }
580
581            for (; i_ < max_; ++i_)
582            {
583                ptr_[static_cast<typename Traits::index_type>(curr_char_)] =
584                    index_ + dfa_offset;
585                ++curr_char_;
586            }
587        }
588        else
589        {
590            while (curr_ < chars_end_)
591            {
592                ptr_[static_cast<typename Traits::index_type>(*curr_)] =
593                    index_ + dfa_offset;
594                ++curr_;
595            }
596        }
597    }
598
599    static void build_equiv_list (const node_vector *vector_,
600        const index_set_vector &set_mapping_, equivset_list &lhs_)
601    {
602        equivset_list rhs_;
603
604        fill_rhs_list (vector_, set_mapping_, rhs_);
605
606        if (!rhs_->empty ())
607        {
608            typename equivset_list::list::iterator iter_;
609            typename equivset_list::list::iterator end_;
610            equivset_ptr overlap_ (new equivset);
611
612            lhs_->push_back (static_cast<equivset *>(0));
613            lhs_->back () = rhs_->front ();
614            rhs_->pop_front ();
615
616            while (!rhs_->empty ())
617            {
618                equivset_ptr r_ (rhs_->front ());
619
620                rhs_->pop_front ();
621                iter_ = lhs_->begin ();
622                end_ = lhs_->end ();
623
624                while (!r_->empty () && iter_ != end_)
625                {
626                    typename equivset_list::list::iterator l_iter_ = iter_;
627
628                    (*l_iter_)->intersect (*r_.get (), *overlap_.get ());
629
630                    if (overlap_->empty ())
631                    {
632                        ++iter_;
633                    }
634                    else if ((*l_iter_)->empty ())
635                    {
636                        delete *l_iter_;
637                        *l_iter_ = overlap_.release ();
638
639                        // VC++ 6 Hack:
640                        equivset_ptr temp_overlap_ (new equivset);
641
642                        overlap_ = temp_overlap_;
643                        ++iter_;
644                    }
645                    else if (r_->empty ())
646                    {
647                        delete r_.release ();
648                        r_ = overlap_;
649
650                        // VC++ 6 Hack:
651                        equivset_ptr temp_overlap_ (new equivset);
652
653                        overlap_ = temp_overlap_;
654                        break;
655                    }
656                    else
657                    {
658                        iter_ = lhs_->insert (++iter_,
659                            static_cast<equivset *>(0));
660                        *iter_ = overlap_.release ();
661
662                        // VC++ 6 Hack:
663                        equivset_ptr temp_overlap_ (new equivset);
664
665                        overlap_ = temp_overlap_;
666                        ++iter_;
667                        end_ = lhs_->end ();
668                    }
669                }
670
671                if (!r_->empty ())
672                {
673                    lhs_->push_back (static_cast<equivset *>(0));
674                    lhs_->back () = r_.release ();
675                }
676            }
677        }
678    }
679
680    static void fill_rhs_list (const node_vector *vector_,
681        const index_set_vector &set_mapping_, equivset_list &list_)
682    {
683        typename node_vector::const_iterator iter_ =
684            vector_->begin ();
685        typename node_vector::const_iterator end_ =
686            vector_->end ();
687
688        for (; iter_ != end_; ++iter_)
689        {
690            const detail::node *node_ = *iter_;
691
692            if (!node_->end_state ())
693            {
694                const std::size_t token_ = node_->token ();
695
696                if (token_ != null_token)
697                {
698                    list_->push_back (static_cast<equivset *>(0));
699
700                    if (token_ == bol_token || token_ == eol_token)
701                    {
702                        std::set<std::size_t> index_set_;
703
704                        index_set_.insert (token_);
705                        list_->back () = new equivset (index_set_,
706                            node_->greedy (), token_, node_->followpos ());
707                    }
708                    else
709                    {
710                        list_->back () = new equivset (set_mapping_[token_],
711                            node_->greedy (), token_, node_->followpos ());
712                    }
713                }
714            }
715        }
716    }
717
718    static void fixup_bol (detail::node * &root_,
719        node_ptr_vector &node_ptr_vector_)
720    {
721        typename detail::node::node_vector *first_ = &root_->firstpos ();
722        bool found_ = false;
723        typename detail::node::node_vector::const_iterator iter_ =
724            first_->begin ();
725        typename detail::node::node_vector::const_iterator end_ =
726            first_->end ();
727
728        for (; iter_ != end_; ++iter_)
729        {
730            const detail::node *node_ = *iter_;
731
732            found_ = !node_->end_state () && node_->token () == bol_token;
733
734            if (found_) break;
735        }
736
737        if (!found_)
738        {
739            node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
740            node_ptr_vector_->back () = new detail::leaf_node
741                (bol_token, true);
742
743            detail::node *lhs_ = node_ptr_vector_->back ();
744
745            node_ptr_vector_->push_back (static_cast<detail::leaf_node *>(0));
746            node_ptr_vector_->back () = new detail::leaf_node
747                (null_token, true);
748
749            detail::node *rhs_ = node_ptr_vector_->back ();
750
751            node_ptr_vector_->push_back
752                (static_cast<detail::selection_node *>(0));
753            node_ptr_vector_->back () =
754                new detail::selection_node (lhs_, rhs_);
755            lhs_ = node_ptr_vector_->back ();
756
757            node_ptr_vector_->push_back
758                (static_cast<detail::sequence_node *>(0));
759            node_ptr_vector_->back () =
760                new detail::sequence_node (lhs_, root_);
761            root_ = node_ptr_vector_->back ();
762        }
763    }
764
765    static void minimise_dfa (const std::size_t dfa_alphabet_,
766        size_t_vector &dfa_, std::size_t size_)
767    {
768        const std::size_t *first_ = &dfa_.front ();
769        const std::size_t *second_ = 0;
770        const std::size_t *end_ = first_ + size_;
771        std::size_t index_ = 1;
772        std::size_t new_index_ = 1;
773        std::size_t curr_index_ = 0;
774        index_set index_set_;
775        size_t_vector lookup_;
776        std::size_t *lookup_ptr_ = 0;
777
778        lookup_.resize (size_ / dfa_alphabet_, null_token);
779        lookup_ptr_ = &lookup_.front ();
780        *lookup_ptr_ = 0;
781        // Only one 'jam' state, so skip it.
782        first_ += dfa_alphabet_;
783
784        for (; first_ < end_; first_ += dfa_alphabet_, ++index_)
785        {
786            for (second_ = first_ + dfa_alphabet_, curr_index_ = index_ + 1;
787                second_ < end_; second_ += dfa_alphabet_, ++curr_index_)
788            {
789                if (index_set_.find (curr_index_) != index_set_.end ())
790                {
791                    continue;
792                }
793
794                // Some systems have memcmp in namespace std.
795                using namespace std;
796
797                if (memcmp (first_, second_, sizeof (std::size_t) *
798                    dfa_alphabet_) == 0)
799                {
800                    index_set_.insert (curr_index_);
801                    lookup_ptr_[curr_index_] = new_index_;
802                }
803            }
804
805            if (lookup_ptr_[index_] == null_token)
806            {
807                lookup_ptr_[index_] = new_index_;
808                ++new_index_;
809            }
810        }
811
812        if (!index_set_.empty ())
813        {
814            const std::size_t *front_ = &dfa_.front ();
815            size_t_vector new_dfa_ (front_, front_ + dfa_alphabet_);
816            typename index_set::iterator set_end_ =
817                index_set_.end ();
818            const std::size_t *ptr_ = front_ + dfa_alphabet_;
819            std::size_t *new_ptr_ = 0;
820
821            new_dfa_.resize (size_ - index_set_.size () * dfa_alphabet_, 0);
822            new_ptr_ = &new_dfa_.front () + dfa_alphabet_;
823            size_ /= dfa_alphabet_;
824
825            for (index_ = 1; index_ < size_; ++index_)
826            {
827                if (index_set_.find (index_) != set_end_)
828                {
829                    ptr_ += dfa_alphabet_;
830                    continue;
831                }
832
833                new_ptr_[end_state_index] = ptr_[end_state_index];
834                new_ptr_[id_index] = ptr_[id_index];
835                new_ptr_[unique_id_index] = ptr_[unique_id_index];
836                new_ptr_[state_index] = ptr_[state_index];
837                new_ptr_[bol_index] = lookup_ptr_[ptr_[bol_index]];
838                new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]];
839                new_ptr_ += dfa_offset;
840                ptr_ += dfa_offset;
841
842                for (std::size_t i_ = dfa_offset; i_ < dfa_alphabet_; ++i_)
843                {
844                    *new_ptr_++ = lookup_ptr_[*ptr_++];
845                }
846            }
847
848            dfa_.swap (new_dfa_);
849        }
850    }
851};
852
853typedef basic_generator<char> generator;
854typedef basic_generator<wchar_t> wgenerator;
855}
856}
857
858#endif