/pnwtl/third_party/scintilla/lexers/LexErlang.cxx
https://code.google.com/p/pnotepad/ · C++ · 624 lines · 512 code · 70 blank · 42 comment · 247 complexity · 263bd19e699b31a6dda2cac6c3229b25 MD5 · raw file
- // Scintilla source code edit control
- // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
- // The License.txt file describes the conditions under which this software may be distributed.
- /** @file LexErlang.cxx
- ** Lexer for Erlang.
- ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com)
- ** Originally wrote by Peter-Henry Mander,
- ** based on Matlab lexer by José Fonseca.
- **/
-
- #include <stdlib.h>
- #include <string.h>
- #include <stdio.h>
- #include <stdarg.h>
- #include <assert.h>
- #include <ctype.h>
-
- #include "ILexer.h"
- #include "Scintilla.h"
- #include "SciLexer.h"
-
- #include "PropSetSimple.h"
- #include "WordList.h"
- #include "LexAccessor.h"
- #include "Accessor.h"
- #include "StyleContext.h"
- #include "CharacterSet.h"
- #include "LexerModule.h"
-
- #ifdef SCI_NAMESPACE
- using namespace Scintilla;
- #endif
-
- static int is_radix(int radix, int ch) {
- int digit;
-
- if (36 < radix || 2 > radix)
- return 0;
-
- if (isdigit(ch)) {
- digit = ch - '0';
- } else if (isalnum(ch)) {
- digit = toupper(ch) - 'A' + 10;
- } else {
- return 0;
- }
-
- return (digit < radix);
- }
-
- typedef enum {
- STATE_NULL,
- COMMENT,
- COMMENT_FUNCTION,
- COMMENT_MODULE,
- COMMENT_DOC,
- COMMENT_DOC_MACRO,
- ATOM_UNQUOTED,
- ATOM_QUOTED,
- NODE_NAME_UNQUOTED,
- NODE_NAME_QUOTED,
- MACRO_START,
- MACRO_UNQUOTED,
- MACRO_QUOTED,
- RECORD_START,
- RECORD_UNQUOTED,
- RECORD_QUOTED,
- NUMERAL_START,
- NUMERAL_BASE_VALUE,
- NUMERAL_FLOAT,
- NUMERAL_EXPONENT,
- PREPROCESSOR
- } atom_parse_state_t;
-
- static inline bool IsAWordChar(const int ch) {
- return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_');
- }
-
- static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle,
- WordList *keywordlists[], Accessor &styler) {
-
- StyleContext sc(startPos, length, initStyle, styler);
- WordList &reservedWords = *keywordlists[0];
- WordList &erlangBIFs = *keywordlists[1];
- WordList &erlangPreproc = *keywordlists[2];
- WordList &erlangModulesAtt = *keywordlists[3];
- WordList &erlangDoc = *keywordlists[4];
- WordList &erlangDocMacro = *keywordlists[5];
- int radix_digits = 0;
- int exponent_digits = 0;
- atom_parse_state_t parse_state = STATE_NULL;
- atom_parse_state_t old_parse_state = STATE_NULL;
- bool to_late_to_comment = false;
- char cur[100];
- int old_style = SCE_ERLANG_DEFAULT;
-
- styler.StartAt(startPos);
-
- for (; sc.More(); sc.Forward()) {
- int style = SCE_ERLANG_DEFAULT;
- if (STATE_NULL != parse_state) {
-
- switch (parse_state) {
-
- case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break;
-
- /* COMMENTS ------------------------------------------------------*/
- case COMMENT : {
- if (sc.ch != '%') {
- to_late_to_comment = true;
- } else if (!to_late_to_comment && sc.ch == '%') {
- // Switch to comment level 2 (Function)
- sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION);
- old_style = SCE_ERLANG_COMMENT_FUNCTION;
- parse_state = COMMENT_FUNCTION;
- sc.Forward();
- }
- }
- // V--- Falling through!
- case COMMENT_FUNCTION : {
- if (sc.ch != '%') {
- to_late_to_comment = true;
- } else if (!to_late_to_comment && sc.ch == '%') {
- // Switch to comment level 3 (Module)
- sc.ChangeState(SCE_ERLANG_COMMENT_MODULE);
- old_style = SCE_ERLANG_COMMENT_MODULE;
- parse_state = COMMENT_MODULE;
- sc.Forward();
- }
- }
- // V--- Falling through!
- case COMMENT_MODULE : {
- if (parse_state != COMMENT) {
- // Search for comment documentation
- if (sc.chNext == '@') {
- old_parse_state = parse_state;
- parse_state = ('{' == sc.ch)
- ? COMMENT_DOC_MACRO
- : COMMENT_DOC;
- sc.ForwardSetState(sc.state);
- }
- }
-
- // All comments types fall here.
- if (sc.atLineEnd) {
- to_late_to_comment = false;
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- case COMMENT_DOC :
- // V--- Falling through!
- case COMMENT_DOC_MACRO : {
-
- if (!isalnum(sc.ch)) {
- // Try to match documentation comment
- sc.GetCurrent(cur, sizeof(cur));
-
- if (parse_state == COMMENT_DOC_MACRO
- && erlangDocMacro.InList(cur)) {
- sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO);
- while (sc.ch != '}' && !sc.atLineEnd)
- sc.Forward();
- } else if (erlangDoc.InList(cur)) {
- sc.ChangeState(SCE_ERLANG_COMMENT_DOC);
- } else {
- sc.ChangeState(old_style);
- }
-
- // Switch back to old state
- sc.SetState(old_style);
- parse_state = old_parse_state;
- }
-
- if (sc.atLineEnd) {
- to_late_to_comment = false;
- sc.ChangeState(old_style);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- /* -------------------------------------------------------------- */
- /* Atoms ---------------------------------------------------------*/
- case ATOM_UNQUOTED : {
- if ('@' == sc.ch){
- parse_state = NODE_NAME_UNQUOTED;
- } else if (sc.ch == ':') {
- // Searching for module name
- if (sc.chNext == ' ') {
- // error
- sc.ChangeState(SCE_ERLANG_UNKNOWN);
- parse_state = STATE_NULL;
- } else {
- sc.Forward();
- if (isalnum(sc.ch)) {
- sc.GetCurrent(cur, sizeof(cur));
- sc.ChangeState(SCE_ERLANG_MODULES);
- sc.SetState(SCE_ERLANG_MODULES);
- }
- }
- } else if (!IsAWordChar(sc.ch)) {
-
- sc.GetCurrent(cur, sizeof(cur));
- if (reservedWords.InList(cur)) {
- style = SCE_ERLANG_KEYWORD;
- } else if (erlangBIFs.InList(cur)
- && strcmp(cur,"erlang:")){
- style = SCE_ERLANG_BIFS;
- } else if (sc.ch == '(' || '/' == sc.ch){
- style = SCE_ERLANG_FUNCTION_NAME;
- } else {
- style = SCE_ERLANG_ATOM;
- }
-
- sc.ChangeState(style);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
-
- } break;
-
- case ATOM_QUOTED : {
- if ( '@' == sc.ch ){
- parse_state = NODE_NAME_QUOTED;
- } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
- sc.ChangeState(SCE_ERLANG_ATOM);
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- /* -------------------------------------------------------------- */
- /* Node names ----------------------------------------------------*/
- case NODE_NAME_UNQUOTED : {
- if ('@' == sc.ch) {
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- } else if (!IsAWordChar(sc.ch)) {
- sc.ChangeState(SCE_ERLANG_NODE_NAME);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- case NODE_NAME_QUOTED : {
- if ('@' == sc.ch) {
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
- sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED);
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- /* -------------------------------------------------------------- */
- /* Records -------------------------------------------------------*/
- case RECORD_START : {
- if ('\'' == sc.ch) {
- parse_state = RECORD_QUOTED;
- } else if (isalpha(sc.ch) && islower(sc.ch)) {
- parse_state = RECORD_UNQUOTED;
- } else { // error
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- case RECORD_UNQUOTED : {
- if (!IsAWordChar(sc.ch)) {
- sc.ChangeState(SCE_ERLANG_RECORD);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- case RECORD_QUOTED : {
- if ('\'' == sc.ch && '\\' != sc.chPrev) {
- sc.ChangeState(SCE_ERLANG_RECORD_QUOTED);
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- /* -------------------------------------------------------------- */
- /* Macros --------------------------------------------------------*/
- case MACRO_START : {
- if ('\'' == sc.ch) {
- parse_state = MACRO_QUOTED;
- } else if (isalpha(sc.ch)) {
- parse_state = MACRO_UNQUOTED;
- } else { // error
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- case MACRO_UNQUOTED : {
- if (!IsAWordChar(sc.ch)) {
- sc.ChangeState(SCE_ERLANG_MACRO);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- case MACRO_QUOTED : {
- if ('\'' == sc.ch && '\\' != sc.chPrev) {
- sc.ChangeState(SCE_ERLANG_MACRO_QUOTED);
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- /* -------------------------------------------------------------- */
- /* Numerics ------------------------------------------------------*/
- /* Simple integer */
- case NUMERAL_START : {
- if (isdigit(sc.ch)) {
- radix_digits *= 10;
- radix_digits += sc.ch - '0'; // Assuming ASCII here!
- } else if ('#' == sc.ch) {
- if (2 > radix_digits || 36 < radix_digits) {
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- } else {
- parse_state = NUMERAL_BASE_VALUE;
- }
- } else if ('.' == sc.ch && isdigit(sc.chNext)) {
- radix_digits = 0;
- parse_state = NUMERAL_FLOAT;
- } else if ('e' == sc.ch || 'E' == sc.ch) {
- exponent_digits = 0;
- parse_state = NUMERAL_EXPONENT;
- } else {
- radix_digits = 0;
- sc.ChangeState(SCE_ERLANG_NUMBER);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- /* Integer in other base than 10 (x#yyy) */
- case NUMERAL_BASE_VALUE : {
- if (!is_radix(radix_digits,sc.ch)) {
- radix_digits = 0;
-
- if (!isalnum(sc.ch))
- sc.ChangeState(SCE_ERLANG_NUMBER);
-
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- /* Float (x.yyy) */
- case NUMERAL_FLOAT : {
- if ('e' == sc.ch || 'E' == sc.ch) {
- exponent_digits = 0;
- parse_state = NUMERAL_EXPONENT;
- } else if (!isdigit(sc.ch)) {
- sc.ChangeState(SCE_ERLANG_NUMBER);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- /* Exponent, either integer or float (xEyy, x.yyEzzz) */
- case NUMERAL_EXPONENT : {
- if (('-' == sc.ch || '+' == sc.ch)
- && (isdigit(sc.chNext))) {
- sc.Forward();
- } else if (!isdigit(sc.ch)) {
- if (0 < exponent_digits)
- sc.ChangeState(SCE_ERLANG_NUMBER);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- } else {
- ++exponent_digits;
- }
- } break;
-
- /* -------------------------------------------------------------- */
- /* Preprocessor --------------------------------------------------*/
- case PREPROCESSOR : {
- if (!IsAWordChar(sc.ch)) {
-
- sc.GetCurrent(cur, sizeof(cur));
- if (erlangPreproc.InList(cur)) {
- style = SCE_ERLANG_PREPROC;
- } else if (erlangModulesAtt.InList(cur)) {
- style = SCE_ERLANG_MODULES_ATT;
- }
-
- sc.ChangeState(style);
- sc.SetState(SCE_ERLANG_DEFAULT);
- parse_state = STATE_NULL;
- }
- } break;
-
- }
-
- } /* End of : STATE_NULL != parse_state */
- else
- {
- switch (sc.state) {
- case SCE_ERLANG_VARIABLE : {
- if (!IsAWordChar(sc.ch))
- sc.SetState(SCE_ERLANG_DEFAULT);
- } break;
- case SCE_ERLANG_STRING : {
- if (sc.ch == '\"' && sc.chPrev != '\\')
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- } break;
- case SCE_ERLANG_COMMENT : {
- if (sc.atLineEnd)
- sc.SetState(SCE_ERLANG_DEFAULT);
- } break;
- case SCE_ERLANG_CHARACTER : {
- if (sc.chPrev == '\\') {
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- } else if (sc.ch != '\\') {
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- }
- } break;
- case SCE_ERLANG_OPERATOR : {
- if (sc.chPrev == '.') {
- if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\'
- || sc.ch == '^') {
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- } else if (sc.ch == '\'') {
- sc.ForwardSetState(SCE_ERLANG_DEFAULT);
- } else {
- sc.SetState(SCE_ERLANG_DEFAULT);
- }
- } else {
- sc.SetState(SCE_ERLANG_DEFAULT);
- }
- } break;
- }
- }
-
- if (sc.state == SCE_ERLANG_DEFAULT) {
- bool no_new_state = false;
-
- switch (sc.ch) {
- case '\"' : sc.SetState(SCE_ERLANG_STRING); break;
- case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break;
- case '%' : {
- parse_state = COMMENT;
- sc.SetState(SCE_ERLANG_COMMENT);
- } break;
- case '#' : {
- parse_state = RECORD_START;
- sc.SetState(SCE_ERLANG_UNKNOWN);
- } break;
- case '?' : {
- parse_state = MACRO_START;
- sc.SetState(SCE_ERLANG_UNKNOWN);
- } break;
- case '\'' : {
- parse_state = ATOM_QUOTED;
- sc.SetState(SCE_ERLANG_UNKNOWN);
- } break;
- case '+' :
- case '-' : {
- if (IsADigit(sc.chNext)) {
- parse_state = NUMERAL_START;
- radix_digits = 0;
- sc.SetState(SCE_ERLANG_UNKNOWN);
- } else if (sc.ch != '+') {
- parse_state = PREPROCESSOR;
- sc.SetState(SCE_ERLANG_UNKNOWN);
- }
- } break;
- default : no_new_state = true;
- }
-
- if (no_new_state) {
- if (isdigit(sc.ch)) {
- parse_state = NUMERAL_START;
- radix_digits = sc.ch - '0';
- sc.SetState(SCE_ERLANG_UNKNOWN);
- } else if (isupper(sc.ch) || '_' == sc.ch) {
- sc.SetState(SCE_ERLANG_VARIABLE);
- } else if (isalpha(sc.ch)) {
- parse_state = ATOM_UNQUOTED;
- sc.SetState(SCE_ERLANG_UNKNOWN);
- } else if (isoperator(static_cast<char>(sc.ch))
- || sc.ch == '\\') {
- sc.SetState(SCE_ERLANG_OPERATOR);
- }
- }
- }
-
- }
- sc.Complete();
- }
-
- static int ClassifyErlangFoldPoint(
- Accessor &styler,
- int styleNext,
- int keyword_start
- ) {
- int lev = 0;
- if (styler.Match(keyword_start,"case")
- || (
- styler.Match(keyword_start,"fun")
- && (SCE_ERLANG_FUNCTION_NAME != styleNext)
- )
- || styler.Match(keyword_start,"if")
- || styler.Match(keyword_start,"query")
- || styler.Match(keyword_start,"receive")
- ) {
- ++lev;
- } else if (styler.Match(keyword_start,"end")) {
- --lev;
- }
-
- return lev;
- }
-
- static void FoldErlangDoc(
- unsigned int startPos, int length, int initStyle,
- WordList** /*keywordlists*/, Accessor &styler
- ) {
- unsigned int endPos = startPos + length;
- int currentLine = styler.GetLine(startPos);
- int lev;
- int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK;
- int currentLevel = previousLevel;
- int styleNext = styler.StyleAt(startPos);
- int style = initStyle;
- int stylePrev;
- int keyword_start = 0;
- char ch;
- char chNext = styler.SafeGetCharAt(startPos);
- bool atEOL;
-
- for (unsigned int i = startPos; i < endPos; i++) {
- ch = chNext;
- chNext = styler.SafeGetCharAt(i + 1);
-
- // Get styles
- stylePrev = style;
- style = styleNext;
- styleNext = styler.StyleAt(i + 1);
- atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n');
-
- if (stylePrev != SCE_ERLANG_KEYWORD
- && style == SCE_ERLANG_KEYWORD) {
- keyword_start = i;
- }
-
- // Fold on keywords
- if (stylePrev == SCE_ERLANG_KEYWORD
- && style != SCE_ERLANG_KEYWORD
- && style != SCE_ERLANG_ATOM
- ) {
- currentLevel += ClassifyErlangFoldPoint(styler,
- styleNext,
- keyword_start);
- }
-
- // Fold on comments
- if (style == SCE_ERLANG_COMMENT
- || style == SCE_ERLANG_COMMENT_MODULE
- || style == SCE_ERLANG_COMMENT_FUNCTION) {
-
- if (ch == '%' && chNext == '{') {
- currentLevel++;
- } else if (ch == '%' && chNext == '}') {
- currentLevel--;
- }
- }
-
- // Fold on braces
- if (style == SCE_ERLANG_OPERATOR) {
- if (ch == '{' || ch == '(' || ch == '[') {
- currentLevel++;
- } else if (ch == '}' || ch == ')' || ch == ']') {
- currentLevel--;
- }
- }
-
-
- if (atEOL) {
- lev = previousLevel;
-
- if (currentLevel > previousLevel)
- lev |= SC_FOLDLEVELHEADERFLAG;
-
- if (lev != styler.LevelAt(currentLine))
- styler.SetLevel(currentLine, lev);
-
- currentLine++;
- previousLevel = currentLevel;
- }
-
- }
-
- // Fill in the real level of the next line, keeping the current flags as they will be filled in later
- styler.SetLevel(currentLine,
- previousLevel
- | (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK));
- }
-
- static const char * const erlangWordListDesc[] = {
- "Erlang Reserved words",
- "Erlang BIFs",
- "Erlang Preprocessor",
- "Erlang Module Attributes",
- "Erlang Documentation",
- "Erlang Documentation Macro",
- 0
- };
-
- LexerModule lmErlang(
- SCLEX_ERLANG,
- ColouriseErlangDoc,
- "erlang",
- FoldErlangDoc,
- erlangWordListDesc);