/CMakeModules/TokenList2DsnLexer.cmake
CMake | 387 lines | 252 code | 61 blank | 74 comment | 24 complexity | 9d68d3762d6185de8b89d85472d360fb MD5 | raw file
- # This program source code file is part of KICAD, a free EDA CAD application.
- #
- # Copyright (C) 2010 Wayne Stambaugh <stambaughw@verizon.net>
- # Copyright (C) 2010 Kicad Developers, see AUTHORS.txt for contributors.
- #
- # This program is free software; you can redistribute it and/or
- # modify it under the terms of the GNU General Public License
- # as published by the Free Software Foundation; either version 2
- # of the License, or (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program; if not, you may find one here:
- # http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
- # or you may search the http://www.gnu.org website for the version 2 license,
- # or you may write to the Free Software Foundation, Inc.,
- # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- #
- #
- # This script converts a plain text file with a line feed separated list
- # of token names into the appropriate source and header files required by
- # the DSN lexer. See files "<base_source_path>/common/dsnlexer.cpp" and
- # "<base_source_path>/include/dsnlexer.h" for more information about how
- # the DSN lexer works. The token list file format requires a single token
- # per line. Tokens can only contain lower case letters, numbers, and
- # underscores. The first letter of each token must be a lower case letter.
- # Tokens must be unique. If any of the above criteria are not met, the
- # source and header files will not be generated and a build error will
- # occur.
- #
- # Valid tokens: a a1 foo_1 foo_bar2
- # Invalid tokens: 1 A _foo bar_ foO
- #
- # Invocation Parameters are: enum, inputFile, outCppFile, outHeaderFile
- #
- # enum - Required, namespace in which the enum T will be placed.
- # Keep it short because from outside the class you want a short enum name
- # like enum::T. Enums are contained in their own namespace to avoid
- # collisions on enum value names, a problem with C++ unless the enum
- # itself is in a separate namespace.
- #
- # inputFile - Required, name of the token list file, or "*.keywords" file.
- # Choose the basefilename carefully, it decides the class name
- # used in the generated *_lexer.h file.
- #
- # outCppFile - Optional, full path and file name of where to save the generated
- # cpp keywords file. If not defined, the output path is the same
- # path as the token list file path, with a file name of *_keywords.cpp
- #
- # outHeaderFile - Optional, full path and file name of where to save the generated
- # *.h lexfer file. If not defined, the output path is the same
- # path as the token list file path, with a file name of *_lexer.h
- #
- # Use the max_lexer() CMake function from functions.cmake for invocation convenience.
- #message( STATUS "TokenList2DsnLexer.cmake" ) # indicate we are running
- set( tokens "" )
- set( lineCount 0 )
- set( dsnErrorMsg "TokenList2DsnLexer.cmake failure:" )
- if( NOT EXISTS ${inputFile} )
- message( FATAL_ERROR "${dsnErrorMsg} file ${inputFile} cannot be found." )
- endif()
- if( NOT DEFINED enum )
- message( FATAL_ERROR "${dsnErrorMsg} missing \"enum\" processing ${inputFile}." )
- endif()
- get_filename_component( outputPath "${inputFile}" PATH )
- # the keywords filename without extension is important, it sets the classname into RESULT
- get_filename_component( result "${inputFile}" NAME_WE )
- string( TOUPPER "${result}" RESULT )
- set( LEXERCLASS "${RESULT}_LEXER" )
- set( PARSERCLASS "${RESULT}_PARSER" )
- #message( "enum:'${enum}' result:'${result}' outputPath:'${outputPath}' inputFile:'${inputFile}'" )
- if( NOT DEFINED outCppFile )
- set( outCppFile "${outputPath}/${result}_keywords.cpp" )
- endif()
- if( NOT DEFINED outHeaderFile )
- set( outHeaderFile "${outputPath}/${result}_lexer.h" )
- endif()
- # Create tag for generating header file.
- set( headerTag "${LEXERCLASS}_H_" )
- set( includeFileHeader
- "
- /* Do not modify this file it was automatically generated by the
- * TokenList2DsnLexer CMake script.
- */
- #ifndef ${headerTag}
- #define ${headerTag}
- #include <dsnlexer.h>
- /**
- * C++ does not put enum _values_ in separate namespaces unless the enum itself
- * is in a separate namespace. All the token enums must be in separate namespaces
- * otherwise the C++ compiler will eventually complain if it sees more than one
- * DSNLEXER in the same compilation unit, say by mutliple header file inclusion.
- * Plus this also enables re-use of the same enum name T. A typedef can always be used
- * to clarify which enum T is in play should that ever be a problem. This is
- * unlikely since Parse() functions will usually only be exposed to one header
- * file like this one. But if there is a problem, then use:
- * typedef ${enum}::T T;
- * within that problem area.
- */
- namespace ${enum}
- {
- /// enum T contains all this lexer's tokens.
- enum T
- {
- // these first few are negative special ones for syntax, and are
- // inherited from DSNLEXER.
- T_NONE = DSN_NONE,
- T_COMMENT = DSN_COMMENT,
- T_STRING_QUOTE = DSN_STRING_QUOTE,
- T_QUOTE_DEF = DSN_QUOTE_DEF,
- T_DASH = DSN_DASH,
- T_SYMBOL = DSN_SYMBOL,
- T_NUMBER = DSN_NUMBER,
- T_RIGHT = DSN_RIGHT, // right bracket: ')'
- T_LEFT = DSN_LEFT, // left bracket: '('
- T_STRING = DSN_STRING, // a quoted string, stripped of the quotes
- T_EOF = DSN_EOF, // special case for end of file
- "
- )
- set( sourceFileHeader
- "
- /* Do not modify this file it was automatically generated by the
- * TokenList2DsnLexer CMake script.
- *
- * Include this file in your lexer class to provide the keywords for
- * your DSN lexer.
- */
- #include <${result}_lexer.h>
- using namespace ${enum};
- #define TOKDEF(x) { #x, T_##x }
- const KEYWORD ${LEXERCLASS}::keywords[] = {
- "
- )
- file( STRINGS ${inputFile} lines NO_HEX_CONVERSION )
- foreach( line ${lines} )
- math( EXPR lineCount "${lineCount} + 1" )
- # strip any comment from # to end of line
- string( REGEX REPLACE "#.*$" "" tmpToken "${line}" )
- string( STRIP "${tmpToken}" token )
- # Ignore empty lines.
- if( NOT token STREQUAL "" ) # if token is "off" simple if( token) does not work
- # Make sure token is valid.
- #message( "token=${token}" )
- string( REGEX MATCH "[a-z][_0-9a-z]*" validToken "${token}" )
- #message( "validToken=${validToken}" )
- if( validToken STREQUAL token )
- list( APPEND tokens "${validToken}" )
- else()
- message( FATAL_ERROR
- "Invalid token string \"${tmpToken}\" at line ${lineCount} in file "
- "<${inputFile}>." )
- endif()
- endif()
- endforeach()
- list( SORT tokens )
- # Check for duplicates.
- list( LENGTH tokens tokensBefore )
- list( REMOVE_DUPLICATES tokens )
- list( LENGTH tokens tokensAfter )
- if( NOT ( tokensBefore EQUAL tokensAfter ) )
- message( FATAL_ERROR "Duplicate tokens found in file <${inputFile}>." )
- endif()
- file( WRITE "${outHeaderFile}" "${includeFileHeader}" )
- file( WRITE "${outCppFile}" "${sourceFileHeader}" )
- set( lineCount 1 )
- foreach( token ${tokens} )
- if( lineCount EQUAL 1 )
- file( APPEND "${outHeaderFile}" " T_${token} = 0" )
- else( lineCount EQUAL 1 )
- file( APPEND "${outHeaderFile}" " T_${token}" )
- endif( lineCount EQUAL 1 )
- file(APPEND "${outCppFile}" " TOKDEF( ${token} )" )
- if( lineCount EQUAL tokensAfter )
- file( APPEND "${outHeaderFile}" "\n" )
- file( APPEND "${outCppFile}" "\n" )
- else( lineCount EQUAL tokensAfter )
- file( APPEND "${outHeaderFile}" ",\n" )
- file( APPEND "${outCppFile}" ",\n" )
- endif( lineCount EQUAL tokensAfter )
- math( EXPR lineCount "${lineCount} + 1" )
- endforeach()
- file( APPEND "${outHeaderFile}"
- " };
- } // namespace ${enum}
- /**
- * Class ${LEXERCLASS}
- * is an automatically generated class using the TokenList2DnsLexer.cmake
- * technology, based on keywords provided by file:
- * ${inputFile}
- */
- class ${LEXERCLASS} : public DSNLEXER
- {
- /// Auto generated lexer keywords table and length:
- static const KEYWORD keywords[];
- static const unsigned keyword_count;
- public:
- /**
- * Constructor ( const std::string&, const wxString& )
- * @param aSExpression is (utf8) text possibly from the clipboard that you want to parse.
- * @param aSource is a description of the origin of @a aSExpression, such as a filename.
- * If left empty, then _(\"clipboard\") is used.
- */
- ${LEXERCLASS}( const std::string& aSExpression, const wxString& aSource = wxEmptyString ) :
- DSNLEXER( keywords, keyword_count, aSExpression, aSource )
- {
- }
- /**
- * Constructor ( FILE* )
- * takes @a aFile already opened for reading and @a aFilename as parameters.
- * The opened file is assumed to be positioned at the beginning of the file
- * for purposes of accurate line number reporting in error messages. The
- * FILE is closed by this instance when its destructor is called.
- * @param aFile is a FILE already opened for reading.
- * @param aFilename is the name of the opened file, needed for error reporting.
- */
- ${LEXERCLASS}( FILE* aFile, const wxString& aFilename ) :
- DSNLEXER( keywords, keyword_count, aFile, aFilename )
- {
- }
- /**
- * Constructor ( LINE_READER* )
- * intializes a lexer and prepares to read from @a aLineReader which
- * is assumed ready, and may be in use by other DSNLEXERs also. No ownership
- * is taken of @a aLineReader. This enables it to be used by other lexers also.
- * The transition between grammars in such a case, must happen on a text
- * line boundary, not within the same line of text.
- *
- * @param aLineReader is any subclassed instance of LINE_READER, such as
- * STRING_LINE_READER or FILE_LINE_READER. No ownership is taken of aLineReader.
- */
- ${LEXERCLASS}( LINE_READER* aLineReader ) :
- DSNLEXER( keywords, keyword_count, aLineReader )
- {
- }
- /**
- * Function TokenName
- * returns the name of the token in ASCII form.
- */
- static const char* TokenName( ${enum}::T aTok );
- /**
- * Function NextTok
- * returns the next token found in the input file or T_EOF when reaching
- * the end of file. Users should wrap this function to return an enum
- * to aid in grammar debugging while running under a debugger, but leave
- * this lower level function returning an int (so the enum does not collide
- * with another usage).
- * @return ${enum}::T - the type of token found next.
- * @throw IO_ERROR - only if the LINE_READER throws it.
- */
- ${enum}::T NextTok() throw( IO_ERROR )
- {
- return (${enum}::T) DSNLEXER::NextTok();
- }
- /**
- * Function NeedSYMBOL
- * calls NextTok() and then verifies that the token read in
- * satisfies bool IsSymbol().
- * If not, an IO_ERROR is thrown.
- * @return int - the actual token read in.
- * @throw IO_ERROR, if the next token does not satisfy IsSymbol()
- */
- ${enum}::T NeedSYMBOL() throw( IO_ERROR )
- {
- return (${enum}::T) DSNLEXER::NeedSYMBOL();
- }
- /**
- * Function NeedSYMBOLorNUMBER
- * calls NextTok() and then verifies that the token read in
- * satisfies bool IsSymbol() or tok==T_NUMBER.
- * If not, an IO_ERROR is thrown.
- * @return int - the actual token read in.
- * @throw IO_ERROR, if the next token does not satisfy the above test
- */
- ${enum}::T NeedSYMBOLorNUMBER() throw( IO_ERROR )
- {
- return (${enum}::T) DSNLEXER::NeedSYMBOLorNUMBER();
- }
- /**
- * Function CurTok
- * returns whatever NextTok() returned the last time it was called.
- */
- ${enum}::T CurTok()
- {
- return (${enum}::T) DSNLEXER::CurTok();
- }
- /**
- * Function PrevTok
- * returns whatever NextTok() returned the 2nd to last time it was called.
- */
- ${enum}::T PrevTok()
- {
- return (${enum}::T) DSNLEXER::PrevTok();
- }
- };
- // example usage
- /**
- * Class ${LEXCLASS}_PARSER
- * holds data and functions pertinent to parsing a S-expression file .
- *
- class ${PARSERCLASS} : public ${LEXERCLASS}
- {
- };
- */
- #endif // ${headerTag}
- "
- )
- file( APPEND "${outCppFile}"
- "};
- const unsigned ${LEXERCLASS}::keyword_count = unsigned( sizeof( ${LEXERCLASS}::keywords )/sizeof( ${LEXERCLASS}::keywords[0] ) );
- const char* ${LEXERCLASS}::TokenName( T aTok )
- {
- const char* ret;
- if( aTok < 0 )
- ret = DSNLEXER::Syntax( aTok );
- else if( (unsigned) aTok < keyword_count )
- ret = keywords[aTok].name;
- else
- ret = \"token too big\";
- return ret;
- }
- "
- )