PageRenderTime 58ms CodeModel.GetById 19ms app.highlight 35ms RepoModel.GetById 1ms app.codeStats 0ms

/Src/Dependencies/Boost/boost/xpressive/detail/dynamic/parser_traits.hpp

http://hadesmem.googlecode.com/
C++ Header | 474 lines | 367 code | 50 blank | 57 comment | 73 complexity | 2f0ce60cf5f7158f3206d35ac5aec7cf MD5 | raw file
  1///////////////////////////////////////////////////////////////////////////////
  2// detail/dynamic/parser_traits.hpp
  3//
  4//  Copyright 2008 Eric Niebler. Distributed under the Boost
  5//  Software License, Version 1.0. (See accompanying file
  6//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7
  8#ifndef BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
  9#define BOOST_XPRESSIVE_DETAIL_DYNAMIC_PARSER_TRAITS_HPP_EAN_10_04_2005
 10
 11// MS compatible compilers support #pragma once
 12#if defined(_MSC_VER) && (_MSC_VER >= 1020)
 13# pragma once
 14#endif
 15
 16#include <string>
 17#include <climits>
 18#include <boost/assert.hpp>
 19#include <boost/throw_exception.hpp>
 20#include <boost/xpressive/regex_error.hpp>
 21#include <boost/xpressive/regex_traits.hpp>
 22#include <boost/xpressive/detail/detail_fwd.hpp>
 23#include <boost/xpressive/detail/dynamic/matchable.hpp>
 24#include <boost/xpressive/detail/dynamic/parser_enum.hpp>
 25#include <boost/xpressive/detail/utility/literals.hpp>
 26#include <boost/xpressive/detail/utility/algorithm.hpp>
 27
 28namespace boost { namespace xpressive
 29{
 30
 31///////////////////////////////////////////////////////////////////////////////
 32// compiler_traits
 33//  this works for char and wchar_t. it must be specialized for anything else.
 34//
 35template<typename RegexTraits>
 36struct compiler_traits
 37{
 38    typedef RegexTraits regex_traits;
 39    typedef typename regex_traits::char_type char_type;
 40    typedef typename regex_traits::string_type string_type;
 41    typedef typename regex_traits::locale_type locale_type;
 42
 43    ///////////////////////////////////////////////////////////////////////////////
 44    // constructor
 45    explicit compiler_traits(RegexTraits const &traits = RegexTraits())
 46      : traits_(traits)
 47      , flags_(regex_constants::ECMAScript)
 48      , space_(lookup_classname(traits_, "space"))
 49      , alnum_(lookup_classname(traits_, "alnum"))
 50    {
 51    }
 52
 53    ///////////////////////////////////////////////////////////////////////////////
 54    // flags
 55    regex_constants::syntax_option_type flags() const
 56    {
 57        return this->flags_;
 58    }
 59
 60    ///////////////////////////////////////////////////////////////////////////////
 61    // flags
 62    void flags(regex_constants::syntax_option_type flags)
 63    {
 64        this->flags_ = flags;
 65    }
 66
 67    ///////////////////////////////////////////////////////////////////////////////
 68    // traits
 69    regex_traits &traits()
 70    {
 71        return this->traits_;
 72    }
 73
 74    regex_traits const &traits() const
 75    {
 76        return this->traits_;
 77    }
 78
 79    ///////////////////////////////////////////////////////////////////////////////
 80    // imbue
 81    locale_type imbue(locale_type const &loc)
 82    {
 83        locale_type oldloc = this->traits().imbue(loc);
 84        this->space_ = lookup_classname(this->traits(), "space");
 85        this->alnum_ = lookup_classname(this->traits(), "alnum");
 86        return oldloc;
 87    }
 88
 89    ///////////////////////////////////////////////////////////////////////////////
 90    // getloc
 91    locale_type getloc() const
 92    {
 93        return this->traits().getloc();
 94    }
 95
 96    ///////////////////////////////////////////////////////////////////////////////
 97    // get_token
 98    //  get a token and advance the iterator
 99    template<typename FwdIter>
100    regex_constants::compiler_token_type get_token(FwdIter &begin, FwdIter end)
101    {
102        using namespace regex_constants;
103        if(this->eat_ws_(begin, end) == end)
104        {
105            return regex_constants::token_end_of_pattern;
106        }
107
108        switch(*begin)
109        {
110        case BOOST_XPR_CHAR_(char_type, '\\'): return this->get_escape_token(++begin, end);
111        case BOOST_XPR_CHAR_(char_type, '.'): ++begin; return token_any;
112        case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_assert_begin_line;
113        case BOOST_XPR_CHAR_(char_type, '$'): ++begin; return token_assert_end_line;
114        case BOOST_XPR_CHAR_(char_type, '('): ++begin; return token_group_begin;
115        case BOOST_XPR_CHAR_(char_type, ')'): ++begin; return token_group_end;
116        case BOOST_XPR_CHAR_(char_type, '|'): ++begin; return token_alternate;
117        case BOOST_XPR_CHAR_(char_type, '['): ++begin; return token_charset_begin;
118
119        case BOOST_XPR_CHAR_(char_type, '*'):
120        case BOOST_XPR_CHAR_(char_type, '+'):
121        case BOOST_XPR_CHAR_(char_type, '?'):
122            return token_invalid_quantifier;
123
124        case BOOST_XPR_CHAR_(char_type, ']'):
125        case BOOST_XPR_CHAR_(char_type, '{'):
126        default:
127            return token_literal;
128        }
129    }
130
131    ///////////////////////////////////////////////////////////////////////////////
132    // get_quant_spec
133    template<typename FwdIter>
134    bool get_quant_spec(FwdIter &begin, FwdIter end, detail::quant_spec &spec)
135    {
136        using namespace regex_constants;
137        FwdIter old_begin;
138
139        if(this->eat_ws_(begin, end) == end)
140        {
141            return false;
142        }
143
144        switch(*begin)
145        {
146        case BOOST_XPR_CHAR_(char_type, '*'):
147            spec.min_ = 0;
148            spec.max_ = (std::numeric_limits<unsigned int>::max)();
149            break;
150
151        case BOOST_XPR_CHAR_(char_type, '+'):
152            spec.min_ = 1;
153            spec.max_ = (std::numeric_limits<unsigned int>::max)();
154            break;
155
156        case BOOST_XPR_CHAR_(char_type, '?'):
157            spec.min_ = 0;
158            spec.max_ = 1;
159            break;
160
161        case BOOST_XPR_CHAR_(char_type, '{'):
162            old_begin = this->eat_ws_(++begin, end);
163            spec.min_ = spec.max_ = detail::toi(begin, end, this->traits());
164            BOOST_XPR_ENSURE_
165            (
166                begin != old_begin && begin != end, error_brace, "invalid quantifier"
167            );
168
169            if(*begin == BOOST_XPR_CHAR_(char_type, ','))
170            {
171                old_begin = this->eat_ws_(++begin, end);
172                spec.max_ = detail::toi(begin, end, this->traits());
173                BOOST_XPR_ENSURE_
174                (
175                    begin != end && BOOST_XPR_CHAR_(char_type, '}') == *begin
176                  , error_brace, "invalid quantifier"
177                );
178
179                if(begin == old_begin)
180                {
181                    spec.max_ = (std::numeric_limits<unsigned int>::max)();
182                }
183                else
184                {
185                    BOOST_XPR_ENSURE_
186                    (
187                        spec.min_ <= spec.max_, error_badbrace, "invalid quantification range"
188                    );
189                }
190            }
191            else
192            {
193                BOOST_XPR_ENSURE_
194                (
195                    BOOST_XPR_CHAR_(char_type, '}') == *begin, error_brace, "invalid quantifier"
196                );
197            }
198            break;
199
200        default:
201            return false;
202        }
203
204        spec.greedy_ = true;
205        if(this->eat_ws_(++begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
206        {
207            ++begin;
208            spec.greedy_ = false;
209        }
210
211        return true;
212    }
213
214    ///////////////////////////////////////////////////////////////////////////
215    // get_group_type
216    template<typename FwdIter>
217    regex_constants::compiler_token_type get_group_type(FwdIter &begin, FwdIter end, string_type &name)
218    {
219        using namespace regex_constants;
220        if(this->eat_ws_(begin, end) != end && BOOST_XPR_CHAR_(char_type, '?') == *begin)
221        {
222            this->eat_ws_(++begin, end);
223            BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
224
225            switch(*begin)
226            {
227            case BOOST_XPR_CHAR_(char_type, ':'): ++begin; return token_no_mark;
228            case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_independent_sub_expression;
229            case BOOST_XPR_CHAR_(char_type, '#'): ++begin; return token_comment;
230            case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookahead;
231            case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookahead;
232            case BOOST_XPR_CHAR_(char_type, 'R'): ++begin; return token_recurse;
233            case BOOST_XPR_CHAR_(char_type, '$'):
234                this->get_name_(++begin, end, name);
235                BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
236                if(BOOST_XPR_CHAR_(char_type, '=') == *begin)
237                {
238                    ++begin;
239                    return token_rule_assign;
240                }
241                return token_rule_ref;
242
243            case BOOST_XPR_CHAR_(char_type, '<'):
244                this->eat_ws_(++begin, end);
245                BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
246                switch(*begin)
247                {
248                case BOOST_XPR_CHAR_(char_type, '='): ++begin; return token_positive_lookbehind;
249                case BOOST_XPR_CHAR_(char_type, '!'): ++begin; return token_negative_lookbehind;
250                default:
251                    BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
252                }
253
254            case BOOST_XPR_CHAR_(char_type, 'P'):
255                this->eat_ws_(++begin, end);
256                BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
257                switch(*begin)
258                {
259                case BOOST_XPR_CHAR_(char_type, '<'):
260                    this->get_name_(++begin, end, name);
261                    BOOST_XPR_ENSURE_(begin != end && BOOST_XPR_CHAR_(char_type, '>') == *begin++, error_paren, "incomplete extension");
262                    return token_named_mark;
263                case BOOST_XPR_CHAR_(char_type, '='):
264                    this->get_name_(++begin, end, name);
265                    BOOST_XPR_ENSURE_(begin != end, error_paren, "incomplete extension");
266                    return token_named_mark_ref;
267                default:
268                    BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
269                }
270
271            case BOOST_XPR_CHAR_(char_type, 'i'):
272            case BOOST_XPR_CHAR_(char_type, 'm'):
273            case BOOST_XPR_CHAR_(char_type, 's'):
274            case BOOST_XPR_CHAR_(char_type, 'x'):
275            case BOOST_XPR_CHAR_(char_type, '-'):
276                return this->parse_mods_(begin, end);
277
278            default:
279                BOOST_THROW_EXCEPTION(regex_error(error_badbrace, "unrecognized extension"));
280            }
281        }
282
283        return token_literal;
284    }
285
286    //////////////////////////////////////////////////////////////////////////
287    // get_charset_token
288    //  NOTE: white-space is *never* ignored in a charset.
289    template<typename FwdIter>
290    regex_constants::compiler_token_type get_charset_token(FwdIter &begin, FwdIter end)
291    {
292        using namespace regex_constants;
293        BOOST_ASSERT(begin != end);
294        switch(*begin)
295        {
296        case BOOST_XPR_CHAR_(char_type, '^'): ++begin; return token_charset_invert;
297        case BOOST_XPR_CHAR_(char_type, '-'): ++begin; return token_charset_hyphen;
298        case BOOST_XPR_CHAR_(char_type, ']'): ++begin; return token_charset_end;
299        case BOOST_XPR_CHAR_(char_type, '['):
300            {
301                FwdIter next = begin; ++next;
302                if(next != end)
303                {
304                    BOOST_XPR_ENSURE_(
305                        *next != BOOST_XPR_CHAR_(char_type, '=')
306                      , error_collate
307                      , "equivalence classes are not yet supported"
308                    );
309
310                    BOOST_XPR_ENSURE_(
311                        *next != BOOST_XPR_CHAR_(char_type, '.')
312                      , error_collate
313                      , "collation sequences are not yet supported"
314                    );
315
316                    if(*next == BOOST_XPR_CHAR_(char_type, ':'))
317                    {
318                        begin = ++next;
319                        return token_posix_charset_begin;
320                    }
321                }
322            }
323            break;
324        case BOOST_XPR_CHAR_(char_type, ':'):
325            {
326                FwdIter next = begin; ++next;
327                if(next != end && *next == BOOST_XPR_CHAR_(char_type, ']'))
328                {
329                    begin = ++next;
330                    return token_posix_charset_end;
331                }
332            }
333            break;
334        case BOOST_XPR_CHAR_(char_type, '\\'):
335            if(++begin != end)
336            {
337                switch(*begin)
338                {
339                case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_charset_backspace;
340                default:;
341                }
342            }
343            return token_escape;
344        default:;
345        }
346        return token_literal;
347    }
348
349    //////////////////////////////////////////////////////////////////////////
350    // get_escape_token
351    template<typename FwdIter>
352    regex_constants::compiler_token_type get_escape_token(FwdIter &begin, FwdIter end)
353    {
354        using namespace regex_constants;
355        if(begin != end)
356        {
357            switch(*begin)
358            {
359            //case BOOST_XPR_CHAR_(char_type, 'a'): ++begin; return token_escape_bell;
360            //case BOOST_XPR_CHAR_(char_type, 'c'): ++begin; return token_escape_control;
361            //case BOOST_XPR_CHAR_(char_type, 'e'): ++begin; return token_escape_escape;
362            //case BOOST_XPR_CHAR_(char_type, 'f'): ++begin; return token_escape_formfeed;
363            //case BOOST_XPR_CHAR_(char_type, 'n'): ++begin; return token_escape_newline;
364            //case BOOST_XPR_CHAR_(char_type, 't'): ++begin; return token_escape_horizontal_tab;
365            //case BOOST_XPR_CHAR_(char_type, 'v'): ++begin; return token_escape_vertical_tab;
366            case BOOST_XPR_CHAR_(char_type, 'A'): ++begin; return token_assert_begin_sequence;
367            case BOOST_XPR_CHAR_(char_type, 'b'): ++begin; return token_assert_word_boundary;
368            case BOOST_XPR_CHAR_(char_type, 'B'): ++begin; return token_assert_not_word_boundary;
369            case BOOST_XPR_CHAR_(char_type, 'E'): ++begin; return token_quote_meta_end;
370            case BOOST_XPR_CHAR_(char_type, 'Q'): ++begin; return token_quote_meta_begin;
371            case BOOST_XPR_CHAR_(char_type, 'Z'): ++begin; return token_assert_end_sequence;
372            // Non-standard extension to ECMAScript syntax
373            case BOOST_XPR_CHAR_(char_type, '<'): ++begin; return token_assert_word_begin;
374            case BOOST_XPR_CHAR_(char_type, '>'): ++begin; return token_assert_word_end;
375            default:; // fall-through
376            }
377        }
378
379        return token_escape;
380    }
381
382private:
383
384    //////////////////////////////////////////////////////////////////////////
385    // parse_mods_
386    template<typename FwdIter>
387    regex_constants::compiler_token_type parse_mods_(FwdIter &begin, FwdIter end)
388    {
389        using namespace regex_constants;
390        bool set = true;
391        do switch(*begin)
392        {
393        case BOOST_XPR_CHAR_(char_type, 'i'): this->flag_(set, icase_); break;
394        case BOOST_XPR_CHAR_(char_type, 'm'): this->flag_(!set, single_line); break;
395        case BOOST_XPR_CHAR_(char_type, 's'): this->flag_(!set, not_dot_newline); break;
396        case BOOST_XPR_CHAR_(char_type, 'x'): this->flag_(set, ignore_white_space); break;
397        case BOOST_XPR_CHAR_(char_type, ':'): ++begin; // fall-through
398        case BOOST_XPR_CHAR_(char_type, ')'): return token_no_mark;
399        case BOOST_XPR_CHAR_(char_type, '-'): if(false == (set = !set)) break; // else fall-through
400        default: BOOST_THROW_EXCEPTION(regex_error(error_paren, "unknown pattern modifier"));
401        }
402        while(BOOST_XPR_ENSURE_(++begin != end, error_paren, "incomplete extension"));
403        // this return is technically unreachable, but this must
404        // be here to work around a bug in gcc 4.0
405        return token_no_mark;
406    }
407
408    ///////////////////////////////////////////////////////////////////////////////
409    // flag_
410    void flag_(bool set, regex_constants::syntax_option_type flag)
411    {
412        this->flags_ = set ? (this->flags_ | flag) : (this->flags_ & ~flag);
413    }
414
415    ///////////////////////////////////////////////////////////////////////////
416    // is_space_
417    bool is_space_(char_type ch) const
418    {
419        return 0 != this->space_ && this->traits().isctype(ch, this->space_);
420    }
421
422    ///////////////////////////////////////////////////////////////////////////
423    // is_alnum_
424    bool is_alnum_(char_type ch) const
425    {
426        return 0 != this->alnum_ && this->traits().isctype(ch, this->alnum_);
427    }
428
429    ///////////////////////////////////////////////////////////////////////////
430    // get_name_
431    template<typename FwdIter>
432    void get_name_(FwdIter &begin, FwdIter end, string_type &name)
433    {
434        this->eat_ws_(begin, end);
435        for(name.clear(); begin != end && this->is_alnum_(*begin); ++begin)
436        {
437            name.push_back(*begin);
438        }
439        this->eat_ws_(begin, end);
440        BOOST_XPR_ENSURE_(!name.empty(), regex_constants::error_paren, "incomplete extension");
441    }
442
443    ///////////////////////////////////////////////////////////////////////////////
444    // eat_ws_
445    template<typename FwdIter>
446    FwdIter &eat_ws_(FwdIter &begin, FwdIter end)
447    {
448        if(0 != (regex_constants::ignore_white_space & this->flags()))
449        {
450            while(end != begin && (BOOST_XPR_CHAR_(char_type, '#') == *begin || this->is_space_(*begin)))
451            {
452                if(BOOST_XPR_CHAR_(char_type, '#') == *begin++)
453                {
454                    while(end != begin && BOOST_XPR_CHAR_(char_type, '\n') != *begin++) {}
455                }
456                else
457                {
458                    for(; end != begin && this->is_space_(*begin); ++begin) {}
459                }
460            }
461        }
462
463        return begin;
464    }
465
466    regex_traits traits_;
467    regex_constants::syntax_option_type flags_;
468    typename regex_traits::char_class_type space_;
469    typename regex_traits::char_class_type alnum_;
470};
471
472}} // namespace boost::xpressive
473
474#endif