LexRuby.cxx - Scintilla source code edit control Copyright …

/cssed-0.4.0/scintilla/src/LexRuby.cxx

# · C++ · 1256 lines · 1015 code · 70 blank · 171 comment · 364 complexity · fcc18055fbdfb103385ab2e5e2394f6f MD5 · raw file

// Scintilla source code edit control

/** @file LexRuby.cxx

 ** Lexer for Ruby.

 **/

// Copyright 2001- by Clemens Wyss <wys@helbling.ch>

// The License.txt file describes the conditions under which this software may be distributed.



#include <stdlib.h>

#include <string.h>

#include <ctype.h>

#include <stdio.h>

#include <stdarg.h>



#include "Platform.h"



#include "PropSet.h"

#include "Accessor.h"

#include "KeyWords.h"

#include "Scintilla.h"

#include "SciLexer.h"



#ifdef SCI_NAMESPACE

using namespace Scintilla;

#endif



//XXX Identical to Perl, put in common area

static inline bool isEOLChar(char ch) {

	return (ch == '\r') || (ch == '\n');

}



static inline bool isRubyOperatorChar(char ch) {

	return strchr("%^&*\\()-+=|{}[]:;<>,/?!.~",ch) != NULL;

}





static inline bool isSafeAlpha(char ch) {

    return ((unsigned int) ch <= 127) && isalpha(ch);

}



#define MAX_KEYWORD_LENGTH 200



#define STYLE_MASK 63

#define actual_style(style) (style & STYLE_MASK)



static bool followsDot(unsigned int pos, Accessor &styler) {

    styler.Flush();

    for (; pos >= 1; --pos) {

        int style = actual_style(styler.StyleAt(pos));

        char ch;

        switch (style) {

            case SCE_RB_DEFAULT:

                ch = styler[pos];

                if (ch == ' ' || ch == '\t') {

                    //continue

                } else {

                    return false;

                }

                break;

                

            case SCE_RB_OPERATOR:

                return styler[pos] == '.';



            default:

                return false;

        }

    }

    return false;

}



// Forward declarations

static bool keywordIsAmbiguous(const char *prevWord);

static bool keywordDoStartsLoop(int pos,

                                Accessor &styler);

static bool keywordIsModifier(const char *word,

                              int pos,

                              Accessor &styler);



static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {

	char s[100];

    unsigned int i, j;

	unsigned int lim = end - start + 1; // num chars to copy

	if (lim >= MAX_KEYWORD_LENGTH) {

		lim = MAX_KEYWORD_LENGTH - 1;

	}

	for (i = start, j = 0; j < lim; i++, j++) {

		s[j] = styler[i];

	}

    s[j] = '\0';

	int chAttr;

	if (0 == strcmp(prevWord, "class"))

		chAttr = SCE_RB_CLASSNAME;

	else if (0 == strcmp(prevWord, "module"))

		chAttr = SCE_RB_MODULE_NAME;

	else if (0 == strcmp(prevWord, "def"))

		chAttr = SCE_RB_DEFNAME;

    else if (keywords.InList(s) && !followsDot(start - 1, styler)) {

        if (keywordIsAmbiguous(s)

            && keywordIsModifier(s, start, styler)) {

            

            // Demoted keywords are colored as keywords,

            // but do not affect changes in indentation.

            //

            // Consider the word 'if':

            // 1. <<if test ...>> : normal

            // 2. <<stmt if test>> : demoted

            // 3. <<lhs = if ...>> : normal: start a new indent level

            // 4. <<obj.if = 10>> : color as identifer, since it follows '.'

            

            chAttr = SCE_RB_WORD_DEMOTED;

        } else {

            chAttr = SCE_RB_WORD;

        }

	} else

        chAttr = SCE_RB_IDENTIFIER;

	styler.ColourTo(end, chAttr);

	if (chAttr == SCE_RB_WORD) {

		strcpy(prevWord, s);

	} else {

		prevWord[0] = 0;

	}

    return chAttr;

}





//XXX Identical to Perl, put in common area

static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {

	if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {

		return false;

	}

	while (*val) {

		if (*val != styler[pos++]) {

			return false;

		}

		val++;

	}

	return true;

}



// Do Ruby better -- find the end of the line, work back,

// and then check for leading white space



// Precondition: the here-doc target can be indented

static bool lookingAtHereDocDelim(Accessor	   &styler,

                                  int 			pos,

                                  int 			lengthDoc,

                                  const char   *HereDocDelim)

{

    if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {

        return false;

    }

    while (--pos > 0) {

        char ch = styler[pos];

        if (isEOLChar(ch)) {

            return true;

        } else if (ch != ' ' && ch != '\t') {

            return false;

        }

    }

    return false;

}



//XXX Identical to Perl, put in common area

static char opposite(char ch) {

	if (ch == '(')

		return ')';

	if (ch == '[')

		return ']';

	if (ch == '{')

		return '}';

	if (ch == '<')

		return '>';

	return ch;

}



// Null transitions when we see we've reached the end

// and need to relex the curr char.



static void redo_char(int &i, char &ch, char &chNext, char &chNext2,

                      int &state) {

    i--;

    chNext2 = chNext;

    chNext = ch;

    state = SCE_RB_DEFAULT;

}



static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {

    i++;

    ch = chNext;

    chNext = chNext2;

}



// precondition: startPos points to one after the EOL char

static bool currLineContainsHereDelims(int& startPos,

                                       Accessor &styler) {

    if (startPos <= 1)

        return false;



    int pos;

    for (pos = startPos - 1; pos > 0; pos--) {

        char ch = styler.SafeGetCharAt(pos);

        if (isEOLChar(ch)) {

            // Leave the pointers where they are -- there are no

            // here doc delims on the current line, even if

            // the EOL isn't default style

            

            return false;

        } else {

            styler.Flush();

            if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {

                break;

            }

        }

    }

    if (pos == 0) {

        return false;

    }

    // Update the pointers so we don't have to re-analyze the string

    startPos = pos;

    return true;

}





static bool isEmptyLine(int pos,

                        Accessor &styler) {

	int spaceFlags = 0;

	int lineCurrent = styler.GetLine(pos);

	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);

    return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;

}



static bool RE_CanFollowKeyword(const char *keyword) {

    if (!strcmp(keyword, "and")

        || !strcmp(keyword, "begin")

        || !strcmp(keyword, "break")

        || !strcmp(keyword, "case")

        || !strcmp(keyword, "do")

        || !strcmp(keyword, "else")

        || !strcmp(keyword, "elsif")

        || !strcmp(keyword, "if")

        || !strcmp(keyword, "next")

        || !strcmp(keyword, "return")

        || !strcmp(keyword, "when")

        || !strcmp(keyword, "unless")

        || !strcmp(keyword, "until")

        || !strcmp(keyword, "not")

        || !strcmp(keyword, "or")) {

        return true;

    }

    return false;

}

    



//todo: if we aren't looking at a stdio character,

// move to the start of the first line that is not in a 

// multi-line construct



static void synchronizeDocStart(unsigned int& startPos,

                                int &length,

                                int &initStyle,

                                Accessor &styler,

                                bool skipWhiteSpace=false) {



    styler.Flush();

    int style = actual_style(styler.StyleAt(startPos));

    switch (style) {

        case SCE_RB_STDIN:

        case SCE_RB_STDOUT:

        case SCE_RB_STDERR:

            // Don't do anything else with these.

            return;

    }

    

    int pos = startPos;

    // Quick way to characterize each line

    int lineStart;

    for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {

        // Now look at the style before the previous line's EOL

        pos = styler.LineStart(lineStart) - 1;

        if (pos <= 10) {

            lineStart = 0;

            break;

        }

        char ch = styler.SafeGetCharAt(pos);

        char chPrev = styler.SafeGetCharAt(pos - 1);

        if (ch == '\n' && chPrev == '\r') {

            pos--;

        }

        if (styler.SafeGetCharAt(pos - 1) == '\\') {

            // Continuation line -- keep going

        } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {

            // Part of multi-line construct -- keep going

        } else if (currLineContainsHereDelims(pos, styler)) {

            // Keep going, with pos and length now pointing

            // at the end of the here-doc delimiter

        } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {

            // Keep going

        } else {

            break;

        }

    }

    pos = styler.LineStart(lineStart);

    length += (startPos - pos);

    startPos = pos;

    initStyle = SCE_RB_DEFAULT;

}



static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,

						   WordList *keywordlists[], Accessor &styler) {



	// Lexer for Ruby often has to backtrack to start of current style to determine

	// which characters are being used as quotes, how deeply nested is the

	// start position and what the termination string is for here documents

    

	WordList &keywords = *keywordlists[0];



	class HereDocCls {

	public:

		int State;

        // States

        // 0: '<<' encountered

		// 1: collect the delimiter

        // 1b: text between the end of the delimiter and the EOL

		// 2: here doc text (lines after the delimiter)

		char Quote;		// the char after '<<'

		bool Quoted;		// true if Quote in ('\'','"','`')

		int DelimiterLength;	// strlen(Delimiter)

		char Delimiter[256];	// the Delimiter, limit of 256: from Perl

        bool CanBeIndented;

		HereDocCls() {

			State = 0;

			DelimiterLength = 0;

			Delimiter[0] = '\0';

            CanBeIndented = false;

		}

	};

	HereDocCls HereDoc;	



	class QuoteCls {

		public:

		int  Count;

		char Up;

		char Down;

		QuoteCls() {

			this->New();

		}

		void New() {

			Count = 0;

			Up    = '\0';

			Down  = '\0';

		}

		void Open(char u) {

			Count++;

			Up    = u;

			Down  = opposite(Up);

		}

	};

	QuoteCls Quote;



    int numDots = 0;  // For numbers --

                      // Don't start lexing in the middle of a num



    synchronizeDocStart(startPos, length, initStyle, styler, // ref args

                        false);



	bool preferRE = true;

    int state = initStyle;

	int lengthDoc = startPos + length;



	char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero

	prevWord[0] = '\0';

	if (length == 0)

		return;



	char chPrev = styler.SafeGetCharAt(startPos - 1);

	char chNext = styler.SafeGetCharAt(startPos);

	// Ruby uses a different mask because bad indentation is marked by oring with 32

	styler.StartAt(startPos, 127);

	styler.StartSegment(startPos);



    static int q_states[] = {SCE_RB_STRING_Q,

                             SCE_RB_STRING_QQ,

                             SCE_RB_STRING_QR,

                             SCE_RB_STRING_QW,

                             SCE_RB_STRING_QW,

                             SCE_RB_STRING_QX};

    static const char* q_chars = "qQrwWx";

    

	for (int i = startPos; i < lengthDoc; i++) {

		char ch = chNext;

		chNext = styler.SafeGetCharAt(i + 1);

		char chNext2 = styler.SafeGetCharAt(i + 2);



        if (styler.IsLeadByte(ch)) {

			chNext = chNext2;

			chPrev = ' ';

			i += 1;

			continue;

		}

		

        // skip on DOS/Windows

        //No, don't, because some things will get tagged on,

        // so we won't recognize keywords, for example

#if 0

		if (ch == '\r' && chNext == '\n') {

	    	continue;

        }

#endif

            

        if (HereDoc.State == 1 && isEOLChar(ch)) {

			// Begin of here-doc (the line after the here-doc delimiter):

			HereDoc.State = 2;

			styler.ColourTo(i-1, state);

            // Don't check for a missing quote, just jump into

            // the here-doc state

            state = SCE_RB_HERE_Q;

        }



        // Regular transitions

		if (state == SCE_RB_DEFAULT) {

            if (isdigit(ch)) {

            	styler.ColourTo(i - 1, state);

				state = SCE_RB_NUMBER;

                numDots = 0;

            } else if (iswordstart(ch)) {

            	styler.ColourTo(i - 1, state);

				state = SCE_RB_WORD;

			} else if (ch == '#') {

				styler.ColourTo(i - 1, state);

				state = SCE_RB_COMMENTLINE;

			} else if (ch == '=') {

				// =begin indicates the start of a comment (doc) block

                if (i == 0 || isEOLChar(chPrev)

                    && chNext == 'b'

                    && styler.SafeGetCharAt(i + 2) == 'e'

                    && styler.SafeGetCharAt(i + 3) == 'g'

                    && styler.SafeGetCharAt(i + 4) == 'i'

                    && styler.SafeGetCharAt(i + 5) == 'n'

                    && !iswordchar(styler.SafeGetCharAt(i + 6))) {

                    styler.ColourTo(i - 1, state);

                    state = SCE_RB_POD;

				} else {

					styler.ColourTo(i - 1, state);

					styler.ColourTo(i, SCE_RB_OPERATOR);

					preferRE = true;

				}

			} else if (ch == '"') {

				styler.ColourTo(i - 1, state);

				state = SCE_RB_STRING;

				Quote.New();

				Quote.Open(ch);

			} else if (ch == '\'') {

                styler.ColourTo(i - 1, state);

                state = SCE_RB_CHARACTER;

                Quote.New();

                Quote.Open(ch);

			} else if (ch == '`') {

				styler.ColourTo(i - 1, state);

				state = SCE_RB_BACKTICKS;

				Quote.New();

				Quote.Open(ch);

			} else if (ch == '@') {

                // Instance or class var

				styler.ColourTo(i - 1, state);

                if (chNext == '@') {

                    state = SCE_RB_CLASS_VAR;

                    advance_char(i, ch, chNext, chNext2); // pass by ref

                } else {

                    state = SCE_RB_INSTANCE_VAR;

                }

			} else if (ch == '$') {

                // Check for a builtin global

				styler.ColourTo(i - 1, state);

                // Recognize it bit by bit

                state = SCE_RB_GLOBAL;

            } else if (ch == '/' && preferRE) {

                // Ambigous operator

				styler.ColourTo(i - 1, state);

				state = SCE_RB_REGEX;

                Quote.New();

                Quote.Open(ch);

			} else if (ch == '<' && chNext == '<' && chNext2 != '=') {



            // Recognise the '<<' symbol - either a here document or a binary op

                

				styler.ColourTo(i - 1, state);

                i++;

                chNext = chNext2;

				styler.ColourTo(i, SCE_RB_OPERATOR);

                

                if (preferRE) {

                    state = SCE_RB_HERE_DELIM;

				    HereDoc.State = 0;

                } else {

                    // leave state as default

                    // We don't have all the heuristics Perl has for indications

                    // of a here-doc, because '<<' is overloadable and used

                    // for so many other classes.

					preferRE = true;

                }

            } else if (ch == ':') {

				styler.ColourTo(i - 1, state);

                if (chNext == ':') {

                    // Mark "::" as an operator, not symbol start

                    styler.ColourTo(i + 1, SCE_RB_OPERATOR);

                    advance_char(i, ch, chNext, chNext2); // pass by ref

                    state = SCE_RB_DEFAULT;

					preferRE = false;

                } else if (iswordchar(chNext)) {

					state = SCE_RB_SYMBOL;

                } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {

                    // Do the operator analysis in-line, looking ahead

                    // Based on the table in pickaxe 2nd ed., page 339

                    bool doColoring = true;

                    switch (chNext) {

                    case '[':

                        if (chNext2 == ']' ) {

                            char ch_tmp = styler.SafeGetCharAt(i + 3);

                            if (ch_tmp == '=') {

                                i += 3;

                                ch = ch_tmp;

                                chNext = styler.SafeGetCharAt(i + 1);

                            } else {

                                i += 2;

                                ch = chNext2;

                                chNext = ch_tmp;

                            }

                        } else {

                            doColoring = false;

                        }

                        break;



                    case '*':

                        if (chNext2 == '*') {

                            i += 2;

                            ch = chNext2;

                            chNext = styler.SafeGetCharAt(i + 1);

                        } else {

                            advance_char(i, ch, chNext, chNext2);

                        }

                        break;



                    case '!':

                        if (chNext2 == '=' || chNext2 == '~') {

                            i += 2;

                            ch = chNext2;

                            chNext = styler.SafeGetCharAt(i + 1);

                        } else {

                            advance_char(i, ch, chNext, chNext2);

                        }

                        break;



                    case '<':

                        if (chNext2 == '<') {

                            i += 2;

                            ch = chNext2;

                            chNext = styler.SafeGetCharAt(i + 1);

                        } else if (chNext2 == '=') {

                            char ch_tmp = styler.SafeGetCharAt(i + 3);

                            if (ch_tmp == '>') {  // <=> operator

                                i += 3;

                                ch = ch_tmp;

                                chNext = styler.SafeGetCharAt(i + 1);

                            } else {

                                i += 2;

                                ch = chNext2;

                                chNext = ch_tmp;

                            }

                        } else {

                            advance_char(i, ch, chNext, chNext2);

                        }

                        break;



                    default:

                        // Simple one-character operators

                        advance_char(i, ch, chNext, chNext2);

                        break;

                    }

                    if (doColoring) {

                        styler.ColourTo(i, SCE_RB_SYMBOL);

                        state = SCE_RB_DEFAULT;

                    }

				} else if (!preferRE) {

					// Don't color symbol strings (yet)

					// Just color the ":" and color rest as string

					styler.ColourTo(i, SCE_RB_SYMBOL);

					state = SCE_RB_DEFAULT;

                } else {

                    styler.ColourTo(i, SCE_RB_OPERATOR);

                    state = SCE_RB_DEFAULT;

                    preferRE = true;

                }

            } else if (ch == '%') {

                styler.ColourTo(i - 1, state);

                bool have_string = false;

                if (strchr(q_chars, chNext) && !iswordchar(chNext2)) {

                    Quote.New();

                    const char *hit = strchr(q_chars, chNext);

                    if (hit != NULL) {

                        state = q_states[hit - q_chars];

                        Quote.Open(chNext2);

                        i += 2;

                        ch = chNext2;

						chNext = styler.SafeGetCharAt(i + 1);

                        have_string = true;

                    }

                } else if (!iswordchar(chNext)) {

                    state = SCE_RB_STRING_QQ;

                    Quote.Open(chNext);

                    advance_char(i, ch, chNext, chNext2); // pass by ref

                    have_string = true;

                }

                if (!have_string) {

                    styler.ColourTo(i, SCE_RB_OPERATOR);

                    // stay in default

                    preferRE = true;

                }

            } else if (isoperator(ch)) {

				styler.ColourTo(i - 1, state);

				styler.ColourTo(i, SCE_RB_OPERATOR);

                // If we're ending an expression or block,

                // assume it ends an object, and the ambivalent

                // constructs are binary operators

                //

                // So if we don't have one of these chars,

                // we aren't ending an object exp'n, and ops

                // like : << / are unary operators.

                

                preferRE = (strchr(")}]", ch) == NULL);

                // Stay in default state

            } else if (isEOLChar(ch)) {

                // Make sure it's a true line-end, with no backslash

                if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))

                    && chPrev != '\\') {

                    // Assume we've hit the end of the statement.

                    preferRE = true;

                }

            }

        } else if (state == SCE_RB_WORD) {

            if (ch == '.' || !iswordchar(ch)) {

                // Words include x? in all contexts,

                // and <letters>= after either 'def' or a dot

                // Move along until a complete word is on our left



                // Default accessor treats '.' as word-chars,

                // but we don't for now.

                

                if (ch == '='

                    && iswordchar(chPrev)

                    && (chNext == '('

                        || strchr(" \t\n\r", chNext) != NULL)

                    && (!strcmp(prevWord, "def")

                        || followsDot(styler.GetStartSegment(), styler))) {

                    // <name>= is a name only when being def'd -- Get it the next time

                    // This means that <name>=<name> is always lexed as

                    // <name>, (op, =), <name>

                } else if ((ch == '?' || ch == '!')

                           && iswordchar(chPrev)

                           && !iswordchar(chNext)) {

                    // <name>? is a name -- Get it the next time

                    // But <name>?<name> is always lexed as

                    // <name>, (op, ?), <name>

                    // Same with <name>! to indicate a method that

                    // modifies its target

                } else if (isEOLChar(ch)

                           && isMatch(styler, lengthDoc, i - 7, "__END__")) {

                    styler.ColourTo(i, SCE_RB_DATASECTION);

                    state = SCE_RB_DATASECTION;

                    // No need to handle this state -- we'll just move to the end

                    preferRE = false;

                } else {

					int wordStartPos = styler.GetStartSegment();

                    int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);

                    switch (word_style) {

                        case SCE_RB_WORD:

                            preferRE = RE_CanFollowKeyword(prevWord);

							break;

                            

                        case SCE_RB_WORD_DEMOTED:

                            preferRE = true;

							break;

                            

                        case SCE_RB_IDENTIFIER:

                            if (isMatch(styler, lengthDoc, wordStartPos, "print")) {

                                preferRE = true;

                            } else if (isEOLChar(ch)) {

                                preferRE = true;

                            } else {

                                preferRE = false;

                            }

							break;

                        default:

                            preferRE = false;

                    }

                    redo_char(i, ch, chNext, chNext2, state); // pass by ref

                }

            }

        } else if (state == SCE_RB_NUMBER) {

            if (isalnum(ch) || ch == '_') {

                // Keep going

            } else if (ch == '.' && ++numDots == 1) {

                // Keep going

            } else {

                styler.ColourTo(i - 1, state);

                redo_char(i, ch, chNext, chNext2, state); // pass by ref

                preferRE = false;

            }

        } else if (state == SCE_RB_COMMENTLINE) {

			if (isEOLChar(ch)) {

                styler.ColourTo(i - 1, state);

                state = SCE_RB_DEFAULT;

                // Use whatever setting we had going into the comment

            }

        } else if (state == SCE_RB_HERE_DELIM) {

            // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx

            // Slightly different: if we find an immediate '-',

            // the target can appear indented.

            

			if (HereDoc.State == 0) { // '<<' encountered

				HereDoc.State = 1;

                HereDoc.DelimiterLength = 0;

                if (ch == '-') {

                    HereDoc.CanBeIndented = true;

                    advance_char(i, ch, chNext, chNext2); // pass by ref

                } else {

                    HereDoc.CanBeIndented = false;

                }

                if (isEOLChar(ch)) {

                    // Bail out of doing a here doc if there's no target

                    state = SCE_RB_DEFAULT;

                    preferRE = false;

                } else {

                    HereDoc.Quote = ch;

                

                    if (ch == '\'' || ch == '"' || ch == '`') {

                        HereDoc.Quoted = true;

                        HereDoc.Delimiter[0] = '\0';

                    } else {

                        HereDoc.Quoted = false;

                        HereDoc.Delimiter[0] = ch;

                        HereDoc.Delimiter[1] = '\0';

                        HereDoc.DelimiterLength = 1;

                    }

                }

			} else if (HereDoc.State == 1) { // collect the delimiter

                if (isEOLChar(ch)) {

                    // End the quote now, and go back for more

                    styler.ColourTo(i - 1, state);

                    state = SCE_RB_DEFAULT;

                    i--;

                    chNext = ch;

                    chNext2 = chNext;

                    preferRE = false;

                } else if (HereDoc.Quoted) {

					if (ch == HereDoc.Quote) { // closing quote => end of delimiter

						styler.ColourTo(i, state);

						state = SCE_RB_DEFAULT;

                        preferRE = false;

                    } else {

						if (ch == '\\' && !isEOLChar(chNext)) {

                            advance_char(i, ch, chNext, chNext2);

						}

						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;

						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';

                    }

                } else { // an unquoted here-doc delimiter

					if (isalnum(ch) || ch == '_') {

						HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;

						HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';

					} else {

						styler.ColourTo(i - 1, state);

                        redo_char(i, ch, chNext, chNext2, state);

                        preferRE = false;

					}

                }

				if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {

					styler.ColourTo(i - 1, state);

					state = SCE_RB_ERROR;

                    preferRE = false;

				}

            }

        } else if (state == SCE_RB_HERE_Q) {

            // Not needed: HereDoc.State == 2

            // Indentable here docs: look backwards

            // Non-indentable: look forwards, like in Perl

            //

            // Why: so we can quickly resolve things like <<-" abc"



            if (!HereDoc.CanBeIndented) {

                if (isEOLChar(chPrev)

                    && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {

                    styler.ColourTo(i - 1, state);

                    i += HereDoc.DelimiterLength - 1;

                    chNext = styler.SafeGetCharAt(i + 1);

                    if (isEOLChar(chNext)) {

                        styler.ColourTo(i, SCE_RB_HERE_DELIM);

                        state = SCE_RB_DEFAULT;

                        HereDoc.State = 0;

                        preferRE = false;

                    }

                    // Otherwise we skipped through the here doc faster.

                }

            } else if (isEOLChar(chNext)

                       && lookingAtHereDocDelim(styler,

                                                i - HereDoc.DelimiterLength + 1,

                                                lengthDoc,

                                                HereDoc.Delimiter)) {

                styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);

                styler.ColourTo(i, SCE_RB_HERE_DELIM);

                state = SCE_RB_DEFAULT;

                preferRE = false;

                HereDoc.State = 0;

            }

        } else if (state == SCE_RB_CLASS_VAR

                   || state == SCE_RB_INSTANCE_VAR

                   || state == SCE_RB_SYMBOL) {

            if (!iswordchar(ch)) {

                styler.ColourTo(i - 1, state);

                redo_char(i, ch, chNext, chNext2, state); // pass by ref

                preferRE = false;

            }

        } else if (state == SCE_RB_GLOBAL) {

            if (!iswordchar(ch)) {

                // handle special globals here as well

                if (chPrev == '$') {

                    if (ch == '-') {

                        // Include the next char, like $-a

                        advance_char(i, ch, chNext, chNext2);

                    }

                    styler.ColourTo(i, state);

                    state = SCE_RB_DEFAULT;

                } else {

                    styler.ColourTo(i - 1, state);

                    redo_char(i, ch, chNext, chNext2, state); // pass by ref

                }

                preferRE = false;

            }

        } else if (state == SCE_RB_POD) {

            // PODs end with ^=end\s, -- any whitespace can follow =end

            if (strchr(" \t\n\r", ch) != NULL

                && i > 5

                && isEOLChar(styler[i - 5])

                && isMatch(styler, lengthDoc, i - 4, "=end")) {

                styler.ColourTo(i - 1, state);

                state = SCE_RB_DEFAULT;

                preferRE = false;

            }

        } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {

            if (ch == '\\' && Quote.Up != '\\') {

                // Skip one

                advance_char(i, ch, chNext, chNext2);

            } else if (ch == Quote.Down) {

                Quote.Count--;

                if (Quote.Count == 0) {

                    // Include the options

                    while (isSafeAlpha(chNext)) {

                        i++;

						ch = chNext;

                        chNext = styler.SafeGetCharAt(i + 1);

                    }

                    styler.ColourTo(i, state);

                    state = SCE_RB_DEFAULT;

                    preferRE = false;

                }

            } else if (ch == Quote.Up) {

                // Only if close quoter != open quoter

                Quote.Count++;

                

            } else if (ch == '#' ) {

                //todo: distinguish comments from pound chars

                // for now, handle as comment

                styler.ColourTo(i - 1, state);

                bool inEscape = false;

                while (++i < lengthDoc) {

                    ch = styler.SafeGetCharAt(i);

                    if (ch == '\\') {

                        inEscape = true;

                    } else if (isEOLChar(ch)) {

                        // Comment inside a regex

                        styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);

                        break;

                    } else if (inEscape) {

                        inEscape = false;  // don't look at char

                    } else if (ch == Quote.Down) {

                        // Have the regular handler deal with this

                        // to get trailing modifiers.

                        i--;

                        ch = styler[i];

						break;

                    }

                }

                chNext = styler.SafeGetCharAt(i + 1);

                chNext2 = styler.SafeGetCharAt(i + 2);

            }

        // Quotes of all kinds...

        } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ || 

                   state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||

                   state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||

                   state == SCE_RB_BACKTICKS) {

            if (!Quote.Down && !isspacechar(ch)) {

                Quote.Open(ch);

            } else if (ch == '\\' && Quote.Up != '\\') {

                //Riddle me this: Is it safe to skip *every* escaped char?

                advance_char(i, ch, chNext, chNext2);

            } else if (ch == Quote.Down) {

                Quote.Count--;

                if (Quote.Count == 0) {

                    styler.ColourTo(i, state);

                    state = SCE_RB_DEFAULT;

                    preferRE = false;

                }

            } else if (ch == Quote.Up) {

                Quote.Count++;

            }

        }

            

        if (state == SCE_RB_ERROR) {

            break;

        }

        chPrev = ch;

    }

    if (state == SCE_RB_WORD) {

        // We've ended on a word, possibly at EOF, and need to

        // classify it.

        (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);

    } else {

        styler.ColourTo(lengthDoc - 1, state);

    }

}



// Helper functions for folding



static void getPrevWord(int pos,

                        char *prevWord,

                        Accessor &styler,

                        int word_state)

{

    int i;

    styler.Flush();

    for (i = pos - 1; i > 0; i--) {

        if (actual_style(styler.StyleAt(i)) != word_state) {

            i++;

            break;

        }

    }

    if (i < pos - MAX_KEYWORD_LENGTH) // overflow

        i = pos - MAX_KEYWORD_LENGTH;

    char *dst = prevWord;

    for (; i <= pos; i++) {

        *dst++ = styler[i];

    }

	*dst = 0;

}



static bool keywordIsAmbiguous(const char *prevWord)

{

    // Order from most likely used to least likely

    // Lots of ways to do a loop in Ruby besides 'while/until'

    if (!strcmp(prevWord, "if")

        || !strcmp(prevWord, "do")

        || !strcmp(prevWord, "while")

        || !strcmp(prevWord, "unless")

        || !strcmp(prevWord, "until")) {

        return true;

    } else {

        return false;

    }

}



static bool inline iswhitespace(char ch) {

	return ch == ' ' || ch == '\t';

}



// Demote keywords in the following conditions:

// if, while, unless, until modify a statement

// do after a while or until, as a noise word (like then after if) 



static bool keywordIsModifier(const char *word,

                              int pos,

                              Accessor &styler)

{

    if (word[0] == 'd' && word[1] == 'o' && !word[2]) {

        return keywordDoStartsLoop(pos, styler);

    }

    char ch;

    int style = SCE_RB_DEFAULT;

	int lineStart = styler.GetLine(pos);

    int lineStartPosn = styler.LineStart(lineStart);

    styler.Flush();

    while (--pos >= lineStartPosn) {

        style = actual_style(styler.StyleAt(pos));

		if (style == SCE_RB_DEFAULT) {

			if (iswhitespace(ch = styler[pos])) {

				//continue

			} else if (ch == '\r' || ch == '\n') {

				// Scintilla's LineStart() and GetLine() routines aren't

				// platform-independent, so if we have text prepared with

				// a different system we can't rely on it.

				return false;

			}

		} else {

            break;

		}

    }

    if (pos < lineStartPosn) {

        return false; //XXX not quite right if the prev line is a continuation

    }

    // First things where the action is unambiguous

    switch (style) {

        case SCE_RB_DEFAULT:

        case SCE_RB_COMMENTLINE:

        case SCE_RB_POD:

        case SCE_RB_CLASSNAME:

        case SCE_RB_DEFNAME:

        case SCE_RB_MODULE_NAME:

            return false;

        case SCE_RB_OPERATOR:

            break;

        case SCE_RB_WORD:

            // Watch out for uses of 'else if'

            //XXX: Make a list of other keywords where 'if' isn't a modifier

            //     and can appear legitimately

            // Formulate this to avoid warnings from most compilers

            if (strcmp(word, "if") == 0) {

                char prevWord[MAX_KEYWORD_LENGTH + 1];

                getPrevWord(pos, prevWord, styler, SCE_RB_WORD);

                return strcmp(prevWord, "else") != 0;

            }

            return true;

        default:

            return true;

    }

    // Assume that if the keyword follows an operator,

    // usually it's a block assignment, like

    // a << if x then y else z

    

    ch = styler[pos];

    switch (ch) {

        case ')':

        case ']':

        case '}':

            return true;

        default:

            return false;

    }

}



#define WHILE_BACKWARDS "elihw"

#define UNTIL_BACKWARDS "litnu"



// Nothing fancy -- look to see if we follow a while/until somewhere

// on the current line



static bool keywordDoStartsLoop(int pos,

                                Accessor &styler)

{

    char ch;

    int style;

	int lineStart = styler.GetLine(pos);

    int lineStartPosn = styler.LineStart(lineStart);

    styler.Flush();

    while (--pos >= lineStartPosn) {

        style = actual_style(styler.StyleAt(pos));

		if (style == SCE_RB_DEFAULT) {

			if ((ch = styler[pos]) == '\r' || ch == '\n') {

				// Scintilla's LineStart() and GetLine() routines aren't

				// platform-independent, so if we have text prepared with

				// a different system we can't rely on it.

				return false;

			}

		} else if (style == SCE_RB_WORD) {

            // Check for while or until, but write the word in backwards

            char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero

            char *dst = prevWord;

            int wordLen = 0;

            int start_word;

            for (start_word = pos;

                 start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;

                 start_word--) {

                if (++wordLen < MAX_KEYWORD_LENGTH) {

                    *dst++ = styler[start_word];

                }

            }

            *dst = 0;

            // Did we see our keyword?

            if (!strcmp(prevWord, WHILE_BACKWARDS)

                || !strcmp(prevWord, UNTIL_BACKWARDS)) {

                return true;

            }

            // We can move pos to the beginning of the keyword, and then

            // accept another decrement, as we can never have two contiguous

            // keywords:

            // word1 word2

            //           ^

            //        <-  move to start_word

            //      ^

            //      <- loop decrement

            //     ^  # pointing to end of word1 is fine

            pos = start_word;

        }

    }

    return false;

}



/*

 *  Folding Ruby

 * 

 *  The language is quite complex to analyze without a full parse.

 *  For example, this line shouldn't affect fold level:

 * 

 *   print "hello" if feeling_friendly?

 * 

 *  Neither should this:

 * 

 *   print "hello" \

 *      if feeling_friendly?

 * 

 * 

 *  But this should:

 * 

 *   if feeling_friendly?  #++

 *     print "hello" \

 *     print "goodbye"

 *   end                   #--

 * 

 *  So we cheat, by actually looking at the existing indentation

 *  levels for each line, and just echoing it back.  Like Python.

 *  Then if we get better at it, we'll take braces into consideration,

 *  which always affect folding levels.



 *  How the keywords should work:

 *  No effect:

 *  __FILE__ __LINE__ BEGIN END alias and 

 *  defined? false in nil not or self super then

 *  true undef



 *  Always increment:

 *  begin  class def do for module when {

 * 

 *  Always decrement:

 *  end }

 * 

 *  Increment if these start a statement

 *  if unless until while -- do nothing if they're modifiers



 *  These end a block if there's no modifier, but don't bother

 *  break next redo retry return yield

 * 

 *  These temporarily de-indent, but re-indent

 *  case else elsif ensure rescue

 * 

 *  This means that the folder reflects indentation rather

 *  than setting it.  The language-service updates indentation

 *  when users type return and finishes entering de-denters.

 * 

 *  Later offer to fold POD, here-docs, strings, and blocks of comments

 */



static void FoldRbDoc(unsigned int startPos, int length, int initStyle,

                      WordList *[], Accessor &styler) {

	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;

	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;

    

    synchronizeDocStart(startPos, length, initStyle, styler, // ref args

                        false);

	unsigned int endPos = startPos + length;

	int visibleChars = 0;

	int lineCurrent = styler.GetLine(startPos);

	int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)

                                         & SC_FOLDLEVELNUMBERMASK

                                         & ~SC_FOLDLEVELBASE);

	int levelCurrent = levelPrev;

	char chNext = styler[startPos];

	int styleNext = styler.StyleAt(startPos);

	int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);

    bool buffer_ends_with_eol = false;

	for (unsigned int i = startPos; i < endPos; i++) {

		char ch = chNext;

		chNext = styler.SafeGetCharAt(i + 1);

		int style = styleNext;

		styleNext = styler.StyleAt(i + 1);

		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');

        if (style == SCE_RB_COMMENTLINE) {

            if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {

                if (chNext == '{') {

					levelCurrent++;

				} else if (chNext == '}') {

					levelCurrent--;

				}

            }

        } else if (style == SCE_RB_OPERATOR) {

			if (strchr("[{(", ch)) {

				levelCurrent++;

			} else if (strchr(")}]", ch)) {

                // Don't decrement below 0

                if (levelCurrent > 0)

                    levelCurrent--;

			}

        } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {

            // Look at the keyword on the left and decide what to do

            char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero

            prevWord[0] = 0;

            getPrevWord(i, prevWord, styler, SCE_RB_WORD);

            if (!strcmp(prevWord, "end")) {

                // Don't decrement below 0

                if (levelCurrent > 0)

                    levelCurrent--;

            } else if (   !strcmp(prevWord, "if")

                       || !strcmp(prevWord, "def")

                       || !strcmp(prevWord, "class")

                       || !strcmp(prevWord, "module")

                       || !strcmp(prevWord, "begin")

                       || !strcmp(prevWord, "case")

                       || !strcmp(prevWord, "do")

                       || !strcmp(prevWord, "while")

                       || !strcmp(prevWord, "unless")

                       || !strcmp(prevWord, "until")

                       || !strcmp(prevWord, "for")

                          ) {

				levelCurrent++;

            }

        }

		if (atEOL) {

			int lev = levelPrev;

			if (visibleChars == 0 && foldCompact)

				lev |= SC_FOLDLEVELWHITEFLAG;

			if ((levelCurrent > levelPrev) && (visibleChars > 0))

				lev |= SC_FOLDLEVELHEADERFLAG;

            styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);

			lineCurrent++;

			levelPrev = levelCurrent;

			visibleChars = 0;

            buffer_ends_with_eol = true;

		} else if (!isspacechar(ch)) {

			visibleChars++;

            buffer_ends_with_eol = false;

        }

    }

	// Fill in the real level of the next line, keeping the current flags as they will be filled in later

    if (!buffer_ends_with_eol) {

        lineCurrent++;

        int new_lev = levelCurrent;

        if (visibleChars == 0 && foldCompact)

            new_lev |= SC_FOLDLEVELWHITEFLAG;

			if ((levelCurrent > levelPrev) && (visibleChars > 0))

				new_lev |= SC_FOLDLEVELHEADERFLAG;

            levelCurrent = new_lev;

    }

	styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);

}



static const char * const rubyWordListDesc[] = {

	"Keywords",

	0

};



LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);
Alerts (20)

Complexity hotspot; lines 232 to 246 (total complexity: 15)
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
Complexity hotspot; lines 895 to 899 (total complexity: 15)
895 896 897 898 899