tokenize_and_parse.hpp - This C++ header file provides func…

/Src/Dependencies/Boost/boost/spirit/home/lex/tokenize_and_parse.hpp

http://hadesmem.googlecode.com/ · C++ Header · 325 lines · 143 code · 29 blank · 153 comment · 15 complexity · 47ca270ca9e5ed379a4f29b1f32eb91e MD5 · raw file

//  Copyright (c) 2001-2011 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#if !defined(BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM)
#define BOOST_SPIRIT_LEXER_PARSE_NOV_17_2007_0246PM

#if defined(_MSC_VER)
#pragma once
#endif

#include <boost/spirit/home/qi/skip_over.hpp>
#include <boost/spirit/home/qi/parse.hpp>
#include <boost/spirit/home/qi/nonterminal/grammar.hpp>
#include <boost/spirit/home/support/unused.hpp>
#include <boost/spirit/home/lex/lexer.hpp>
#include <boost/mpl/assert.hpp>

namespace boost { namespace spirit { namespace lex
{
    ///////////////////////////////////////////////////////////////////////////
    //  Import skip_flag enumerator type from Qi namespace
    using qi::skip_flag;

    ///////////////////////////////////////////////////////////////////////////
    //
    //  The tokenize_and_parse() function is one of the main Spirit API 
    //  functions. It simplifies using a lexer as the underlying token source
    //  while parsing a given input sequence.
    //
    //  The function takes a pair of iterators spanning the underlying input 
    //  stream to parse, the lexer object (built from the token definitions) 
    //  and a parser object (built from the parser grammar definition).
    //
    //  The second version of this function additionally takes an attribute to 
    //  be used as the top level data structure instance the parser should use 
    //  to store the recognized input to.
    //
    //  The function returns true if the parsing succeeded (the given input
    //  sequence has been successfully matched by the given grammar).
    //
    //  first, last:    The pair of iterators spanning the underlying input 
    //                  sequence to parse. These iterators must at least 
    //                  conform to the requirements of the std::intput_iterator 
    //                  category.
    //                  On exit the iterator 'first' will be updated to the 
    //                  position right after the last successfully matched 
    //                  token. 
    //  lex:            The lexer object (encoding the token definitions) to be
    //                  used to convert the input sequence into a sequence of 
    //                  tokens. This token sequence is passed to the parsing 
    //                  process. The LexerExpr type must conform to the 
    //                  lexer interface described in the corresponding section
    //                  of the documentation.
    //  xpr:            The grammar object (encoding the parser grammar) to be
    //                  used to match the token sequence generated by the lex 
    //                  object instance. The ParserExpr type must conform to 
    //                  the grammar interface described in the corresponding 
    //                  section of the documentation.
    //  attr:           The top level attribute passed to the parser. It will 
    //                  be populated during the parsing of the input sequence.
    //                  On exit it will hold the 'parser result' corresponding 
    //                  to the matched input sequence.
    //
    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer, typename ParserExpr>
    inline bool
    tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex,
        ParserExpr const& xpr)
    {
        // Report invalid expression error as early as possible.
        // If you got an error_invalid_expression error message here,
        // then the expression (expr) is not a valid spirit qi expression.
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);

        typename Lexer::iterator_type iter = lex.begin(first, last);
        return compile<qi::domain>(xpr).parse(
            iter, lex.end(), unused, unused, unused);
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer, typename ParserExpr
      , typename Attribute>
    inline bool
    tokenize_and_parse(Iterator& first, Iterator last, Lexer const& lex
      , ParserExpr const& xpr, Attribute& attr)
    {
        // Report invalid expression error as early as possible.
        // If you got an error_invalid_expression error message here,
        // then the expression (expr) is not a valid spirit qi expression.
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);

        typename Lexer::iterator_type iter = lex.begin(first, last);
        return compile<qi::domain>(xpr).parse(
            iter, lex.end(), unused, unused, attr);
    }

    ///////////////////////////////////////////////////////////////////////////
    //
    //  The tokenize_and_phrase_parse() function is one of the main Spirit API 
    //  functions. It simplifies using a lexer as the underlying token source
    //  while phrase parsing a given input sequence.
    //
    //  The function takes a pair of iterators spanning the underlying input 
    //  stream to parse, the lexer object (built from the token definitions) 
    //  and a parser object (built from the parser grammar definition). The 
    //  additional skipper parameter will be used as the skip parser during
    //  the parsing process.
    //
    //  The second version of this function additionally takes an attribute to 
    //  be used as the top level data structure instance the parser should use 
    //  to store the recognized input to.
    //
    //  The function returns true if the parsing succeeded (the given input
    //  sequence has been successfully matched by the given grammar).
    //
    //  first, last:    The pair of iterators spanning the underlying input 
    //                  sequence to parse. These iterators must at least 
    //                  conform to the requirements of the std::intput_iterator 
    //                  category.
    //                  On exit the iterator 'first' will be updated to the 
    //                  position right after the last successfully matched 
    //                  token. 
    //  lex:            The lexer object (encoding the token definitions) to be
    //                  used to convert the input sequence into a sequence of 
    //                  tokens. This token sequence is passed to the parsing 
    //                  process. The LexerExpr type must conform to the 
    //                  lexer interface described in the corresponding section
    //                  of the documentation.
    //  xpr:            The grammar object (encoding the parser grammar) to be
    //                  used to match the token sequence generated by the lex 
    //                  object instance. The ParserExpr type must conform to 
    //                  the grammar interface described in the corresponding 
    //                  section of the documentation.
    //  skipper:        The skip parser to be used while parsing the given 
    //                  input sequence. Note, the skip parser will have to 
    //                  act on the same token sequence as the main parser 
    //                  'xpr'.
    //  post_skip:      The post_skip flag controls whether the function will
    //                  invoke an additional post skip after the main parser
    //                  returned.
    //  attr:           The top level attribute passed to the parser. It will 
    //                  be populated during the parsing of the input sequence.
    //                  On exit it will hold the 'parser result' corresponding 
    //                  to the matched input sequence.
    //
    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer, typename ParserExpr
      , typename Skipper>
    inline bool
    tokenize_and_phrase_parse(Iterator& first, Iterator last
      , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
      , BOOST_SCOPED_ENUM(skip_flag) post_skip = skip_flag::postskip)
    {
        // Report invalid expression error as early as possible.
        // If you got an error_invalid_expression error message here,
        // then the expression (expr) is not a valid spirit qi expression.
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);

        typedef
            typename spirit::result_of::compile<qi::domain, Skipper>::type
        skipper_type;
        skipper_type const skipper_ = compile<qi::domain>(skipper);

        typename Lexer::iterator_type iter = lex.begin(first, last);
        typename Lexer::iterator_type end = lex.end();
        if (!compile<qi::domain>(xpr).parse(
                iter, end, unused, skipper_, unused))
            return false;

        // do a final post-skip
        if (post_skip == skip_flag::postskip)
            qi::skip_over(iter, end, skipper_);
        return true;
    }

    template <typename Iterator, typename Lexer, typename ParserExpr
      , typename Skipper, typename Attribute>
    inline bool
    tokenize_and_phrase_parse(Iterator& first, Iterator last
      , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
      , BOOST_SCOPED_ENUM(skip_flag) post_skip, Attribute& attr)
    {
        // Report invalid expression error as early as possible.
        // If you got an error_invalid_expression error message here,
        // then the expression (expr) is not a valid spirit qi expression.
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, ParserExpr);
        BOOST_SPIRIT_ASSERT_MATCH(qi::domain, Skipper);

        typedef
            typename spirit::result_of::compile<qi::domain, Skipper>::type
        skipper_type;
        skipper_type const skipper_ = compile<qi::domain>(skipper);

        typename Lexer::iterator_type iter = lex.begin(first, last);
        typename Lexer::iterator_type end = lex.end();
        if (!compile<qi::domain>(xpr).parse(
                iter, end, unused, skipper_, attr))
            return false;

        // do a final post-skip
        if (post_skip == skip_flag::postskip)
            qi::skip_over(iter, end, skipper_);
        return true;
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer, typename ParserExpr
      , typename Skipper, typename Attribute>
    inline bool
    tokenize_and_phrase_parse(Iterator& first, Iterator last
      , Lexer const& lex, ParserExpr const& xpr, Skipper const& skipper
      , Attribute& attr)
    {
        return tokenize_and_phrase_parse(first, last, lex, xpr, skipper
          , skip_flag::postskip, attr);
    }

    ///////////////////////////////////////////////////////////////////////////
    //
    //  The tokenize() function is one of the main Spirit API functions. It 
    //  simplifies using a lexer to tokenize a given input sequence. It's main
    //  purpose is to use the lexer to tokenize all the input. 
    //
    //  The second version below discards all generated tokens afterwards. 
    //  This is useful whenever all the needed functionality has been 
    //  implemented directly inside the lexer semantic actions, which are being 
    //  executed while the tokens are matched. 
    //
    //  The function takes a pair of iterators spanning the underlying input 
    //  stream to scan, the lexer object (built from the token definitions),
    //  and a (optional) functor being called for each of the generated tokens. 
    //
    //  The function returns true if the scanning of the input succeeded (the 
    //  given input sequence has been successfully matched by the given token
    //  definitions).
    //
    //  first, last:    The pair of iterators spanning the underlying input 
    //                  sequence to parse. These iterators must at least 
    //                  conform to the requirements of the std::intput_iterator 
    //                  category.
    //                  On exit the iterator 'first' will be updated to the 
    //                  position right after the last successfully matched 
    //                  token. 
    //  lex:            The lexer object (encoding the token definitions) to be
    //                  used to convert the input sequence into a sequence of 
    //                  tokens. The LexerExpr type must conform to the 
    //                  lexer interface described in the corresponding section
    //                  of the documentation.
    //  f:              A functor (callable object) taking a single argument of
    //                  the token type and returning a bool, indicating whether
    //                  the tokenization should be canceled.
    //  initial_state:  The name of the state the lexer should start matching.
    //                  The default value is zero, causing the lexer to start 
    //                  in its 'INITIAL' state.
    //
    ///////////////////////////////////////////////////////////////////////////
    namespace detail
    {
        template <typename Token, typename F>
        bool tokenize_callback(Token const& t, F f)
        {
            return f(t);
        }

        template <typename Token, typename Eval>
        bool tokenize_callback(Token const& t, phoenix::actor<Eval> const& f)
        {
            f(t);
            return true;
        }

        template <typename Token>
        bool tokenize_callback(Token const& t, void (*f)(Token const&))
        {
            f(t);
            return true;
        }

        template <typename Token>
        bool tokenize_callback(Token const& t, bool (*f)(Token const&))
        {
            return f(t);
        }
    }

    template <typename Iterator, typename Lexer, typename F>
    inline bool
    tokenize(Iterator& first, Iterator last, Lexer const& lex, F f
      , typename Lexer::char_type const* initial_state = 0)
    {
        typedef typename Lexer::iterator_type iterator_type;

        iterator_type iter = lex.begin(first, last, initial_state);
        iterator_type end = lex.end();
        for (/**/; iter != end && token_is_valid(*iter); ++iter) 
        {
            if (!detail::tokenize_callback(*iter, f))
                return false;
        }
        return (iter == end) ? true : false;
    }

    ///////////////////////////////////////////////////////////////////////////
    template <typename Iterator, typename Lexer>
    inline bool
    tokenize(Iterator& first, Iterator last, Lexer const& lex
      , typename Lexer::char_type const* initial_state = 0)
    {
        typedef typename Lexer::iterator_type iterator_type;

        iterator_type iter = lex.begin(first, last, initial_state);
        iterator_type end = lex.end();

        while (iter != end && token_is_valid(*iter))
            ++iter;

        return (iter == end) ? true : false;
    }

}}}

#endif
Summary ✨

This C++ header file provides functionality for tokenizing input streams using a lexer. It offers various functions to tokenize input, including tokenize(), which takes an iterator range and a lexer object as arguments, and returns true if the input is successfully matched by the token definitions. The functions also provide options for customizing the tokenization process, such as specifying initial states or callback functions for each token.
Alerts (2)

Complexity hotspot; line 298 (total complexity: 3)
298
Complexity hotspot; line 317 (total complexity: 3)
317