/cssed-0.4.0/scintilla/src/LexRuby.cxx
C++ | 1256 lines | 1015 code | 70 blank | 171 comment | 364 complexity | fcc18055fbdfb103385ab2e5e2394f6f MD5 | raw file
Possible License(s): GPL-2.0
- // Scintilla source code edit control
- /** @file LexRuby.cxx
- ** Lexer for Ruby.
- **/
- // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
- // The License.txt file describes the conditions under which this software may be distributed.
-
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include <stdio.h>
- #include <stdarg.h>
-
- #include "Platform.h"
-
- #include "PropSet.h"
- #include "Accessor.h"
- #include "KeyWords.h"
- #include "Scintilla.h"
- #include "SciLexer.h"
-
- #ifdef SCI_NAMESPACE
- using namespace Scintilla;
- #endif
-
- //XXX Identical to Perl, put in common area
- static inline bool isEOLChar(char ch) {
- return (ch == '\r') || (ch == '\n');
- }
-
- static inline bool isRubyOperatorChar(char ch) {
- return strchr("%^&*\\()-+=|{}[]:;<>,/?!.~",ch) != NULL;
- }
-
-
- static inline bool isSafeAlpha(char ch) {
- return ((unsigned int) ch <= 127) && isalpha(ch);
- }
-
- #define MAX_KEYWORD_LENGTH 200
-
- #define STYLE_MASK 63
- #define actual_style(style) (style & STYLE_MASK)
-
- static bool followsDot(unsigned int pos, Accessor &styler) {
- styler.Flush();
- for (; pos >= 1; --pos) {
- int style = actual_style(styler.StyleAt(pos));
- char ch;
- switch (style) {
- case SCE_RB_DEFAULT:
- ch = styler[pos];
- if (ch == ' ' || ch == '\t') {
- //continue
- } else {
- return false;
- }
- break;
-
- case SCE_RB_OPERATOR:
- return styler[pos] == '.';
-
- default:
- return false;
- }
- }
- return false;
- }
-
- // Forward declarations
- static bool keywordIsAmbiguous(const char *prevWord);
- static bool keywordDoStartsLoop(int pos,
- Accessor &styler);
- static bool keywordIsModifier(const char *word,
- int pos,
- Accessor &styler);
-
- static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
- char s[100];
- unsigned int i, j;
- unsigned int lim = end - start + 1; // num chars to copy
- if (lim >= MAX_KEYWORD_LENGTH) {
- lim = MAX_KEYWORD_LENGTH - 1;
- }
- for (i = start, j = 0; j < lim; i++, j++) {
- s[j] = styler[i];
- }
- s[j] = '\0';
- int chAttr;
- if (0 == strcmp(prevWord, "class"))
- chAttr = SCE_RB_CLASSNAME;
- else if (0 == strcmp(prevWord, "module"))
- chAttr = SCE_RB_MODULE_NAME;
- else if (0 == strcmp(prevWord, "def"))
- chAttr = SCE_RB_DEFNAME;
- else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
- if (keywordIsAmbiguous(s)
- && keywordIsModifier(s, start, styler)) {
-
- // Demoted keywords are colored as keywords,
- // but do not affect changes in indentation.
- //
- // Consider the word 'if':
- // 1. <<if test ...>> : normal
- // 2. <<stmt if test>> : demoted
- // 3. <<lhs = if ...>> : normal: start a new indent level
- // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
-
- chAttr = SCE_RB_WORD_DEMOTED;
- } else {
- chAttr = SCE_RB_WORD;
- }
- } else
- chAttr = SCE_RB_IDENTIFIER;
- styler.ColourTo(end, chAttr);
- if (chAttr == SCE_RB_WORD) {
- strcpy(prevWord, s);
- } else {
- prevWord[0] = 0;
- }
- return chAttr;
- }
-
-
- //XXX Identical to Perl, put in common area
- static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
- if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
- return false;
- }
- while (*val) {
- if (*val != styler[pos++]) {
- return false;
- }
- val++;
- }
- return true;
- }
-
- // Do Ruby better -- find the end of the line, work back,
- // and then check for leading white space
-
- // Precondition: the here-doc target can be indented
- static bool lookingAtHereDocDelim(Accessor &styler,
- int pos,
- int lengthDoc,
- const char *HereDocDelim)
- {
- if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
- return false;
- }
- while (--pos > 0) {
- char ch = styler[pos];
- if (isEOLChar(ch)) {
- return true;
- } else if (ch != ' ' && ch != '\t') {
- return false;
- }
- }
- return false;
- }
-
- //XXX Identical to Perl, put in common area
- static char opposite(char ch) {
- if (ch == '(')
- return ')';
- if (ch == '[')
- return ']';
- if (ch == '{')
- return '}';
- if (ch == '<')
- return '>';
- return ch;
- }
-
- // Null transitions when we see we've reached the end
- // and need to relex the curr char.
-
- static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
- int &state) {
- i--;
- chNext2 = chNext;
- chNext = ch;
- state = SCE_RB_DEFAULT;
- }
-
- static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
- i++;
- ch = chNext;
- chNext = chNext2;
- }
-
- // precondition: startPos points to one after the EOL char
- static bool currLineContainsHereDelims(int& startPos,
- Accessor &styler) {
- if (startPos <= 1)
- return false;
-
- int pos;
- for (pos = startPos - 1; pos > 0; pos--) {
- char ch = styler.SafeGetCharAt(pos);
- if (isEOLChar(ch)) {
- // Leave the pointers where they are -- there are no
- // here doc delims on the current line, even if
- // the EOL isn't default style
-
- return false;
- } else {
- styler.Flush();
- if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
- break;
- }
- }
- }
- if (pos == 0) {
- return false;
- }
- // Update the pointers so we don't have to re-analyze the string
- startPos = pos;
- return true;
- }
-
-
- static bool isEmptyLine(int pos,
- Accessor &styler) {
- int spaceFlags = 0;
- int lineCurrent = styler.GetLine(pos);
- int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
- return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
- }
-
- static bool RE_CanFollowKeyword(const char *keyword) {
- if (!strcmp(keyword, "and")
- || !strcmp(keyword, "begin")
- || !strcmp(keyword, "break")
- || !strcmp(keyword, "case")
- || !strcmp(keyword, "do")
- || !strcmp(keyword, "else")
- || !strcmp(keyword, "elsif")
- || !strcmp(keyword, "if")
- || !strcmp(keyword, "next")
- || !strcmp(keyword, "return")
- || !strcmp(keyword, "when")
- || !strcmp(keyword, "unless")
- || !strcmp(keyword, "until")
- || !strcmp(keyword, "not")
- || !strcmp(keyword, "or")) {
- return true;
- }
- return false;
- }
-
-
- //todo: if we aren't looking at a stdio character,
- // move to the start of the first line that is not in a
- // multi-line construct
-
- static void synchronizeDocStart(unsigned int& startPos,
- int &length,
- int &initStyle,
- Accessor &styler,
- bool skipWhiteSpace=false) {
-
- styler.Flush();
- int style = actual_style(styler.StyleAt(startPos));
- switch (style) {
- case SCE_RB_STDIN:
- case SCE_RB_STDOUT:
- case SCE_RB_STDERR:
- // Don't do anything else with these.
- return;
- }
-
- int pos = startPos;
- // Quick way to characterize each line
- int lineStart;
- for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
- // Now look at the style before the previous line's EOL
- pos = styler.LineStart(lineStart) - 1;
- if (pos <= 10) {
- lineStart = 0;
- break;
- }
- char ch = styler.SafeGetCharAt(pos);
- char chPrev = styler.SafeGetCharAt(pos - 1);
- if (ch == '\n' && chPrev == '\r') {
- pos--;
- }
- if (styler.SafeGetCharAt(pos - 1) == '\\') {
- // Continuation line -- keep going
- } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
- // Part of multi-line construct -- keep going
- } else if (currLineContainsHereDelims(pos, styler)) {
- // Keep going, with pos and length now pointing
- // at the end of the here-doc delimiter
- } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
- // Keep going
- } else {
- break;
- }
- }
- pos = styler.LineStart(lineStart);
- length += (startPos - pos);
- startPos = pos;
- initStyle = SCE_RB_DEFAULT;
- }
-
- static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
- WordList *keywordlists[], Accessor &styler) {
-
- // Lexer for Ruby often has to backtrack to start of current style to determine
- // which characters are being used as quotes, how deeply nested is the
- // start position and what the termination string is for here documents
-
- WordList &keywords = *keywordlists[0];
-
- class HereDocCls {
- public:
- int State;
- // States
- // 0: '<<' encountered
- // 1: collect the delimiter
- // 1b: text between the end of the delimiter and the EOL
- // 2: here doc text (lines after the delimiter)
- char Quote; // the char after '<<'
- bool Quoted; // true if Quote in ('\'','"','`')
- int DelimiterLength; // strlen(Delimiter)
- char Delimiter[256]; // the Delimiter, limit of 256: from Perl
- bool CanBeIndented;
- HereDocCls() {
- State = 0;
- DelimiterLength = 0;
- Delimiter[0] = '\0';
- CanBeIndented = false;
- }
- };
- HereDocCls HereDoc;
-
- class QuoteCls {
- public:
- int Count;
- char Up;
- char Down;
- QuoteCls() {
- this->New();
- }
- void New() {
- Count = 0;
- Up = '\0';
- Down = '\0';
- }
- void Open(char u) {
- Count++;
- Up = u;
- Down = opposite(Up);
- }
- };
- QuoteCls Quote;
-
- int numDots = 0; // For numbers --
- // Don't start lexing in the middle of a num
-
- synchronizeDocStart(startPos, length, initStyle, styler, // ref args
- false);
-
- bool preferRE = true;
- int state = initStyle;
- int lengthDoc = startPos + length;
-
- char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
- prevWord[0] = '\0';
- if (length == 0)
- return;
-
- char chPrev = styler.SafeGetCharAt(startPos - 1);
- char chNext = styler.SafeGetCharAt(startPos);
- // Ruby uses a different mask because bad indentation is marked by oring with 32
- styler.StartAt(startPos, 127);
- styler.StartSegment(startPos);
-
- static int q_states[] = {SCE_RB_STRING_Q,
- SCE_RB_STRING_QQ,
- SCE_RB_STRING_QR,
- SCE_RB_STRING_QW,
- SCE_RB_STRING_QW,
- SCE_RB_STRING_QX};
- static const char* q_chars = "qQrwWx";
-
- for (int i = startPos; i < lengthDoc; i++) {
- char ch = chNext;
- chNext = styler.SafeGetCharAt(i + 1);
- char chNext2 = styler.SafeGetCharAt(i + 2);
-
- if (styler.IsLeadByte(ch)) {
- chNext = chNext2;
- chPrev = ' ';
- i += 1;
- continue;
- }
-
- // skip on DOS/Windows
- //No, don't, because some things will get tagged on,
- // so we won't recognize keywords, for example
- #if 0
- if (ch == '\r' && chNext == '\n') {
- continue;
- }
- #endif
-
- if (HereDoc.State == 1 && isEOLChar(ch)) {
- // Begin of here-doc (the line after the here-doc delimiter):
- HereDoc.State = 2;
- styler.ColourTo(i-1, state);
- // Don't check for a missing quote, just jump into
- // the here-doc state
- state = SCE_RB_HERE_Q;
- }
-
- // Regular transitions
- if (state == SCE_RB_DEFAULT) {
- if (isdigit(ch)) {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_NUMBER;
- numDots = 0;
- } else if (iswordstart(ch)) {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_WORD;
- } else if (ch == '#') {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_COMMENTLINE;
- } else if (ch == '=') {
- // =begin indicates the start of a comment (doc) block
- if (i == 0 || isEOLChar(chPrev)
- && chNext == 'b'
- && styler.SafeGetCharAt(i + 2) == 'e'
- && styler.SafeGetCharAt(i + 3) == 'g'
- && styler.SafeGetCharAt(i + 4) == 'i'
- && styler.SafeGetCharAt(i + 5) == 'n'
- && !iswordchar(styler.SafeGetCharAt(i + 6))) {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_POD;
- } else {
- styler.ColourTo(i - 1, state);
- styler.ColourTo(i, SCE_RB_OPERATOR);
- preferRE = true;
- }
- } else if (ch == '"') {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_STRING;
- Quote.New();
- Quote.Open(ch);
- } else if (ch == '\'') {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_CHARACTER;
- Quote.New();
- Quote.Open(ch);
- } else if (ch == '`') {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_BACKTICKS;
- Quote.New();
- Quote.Open(ch);
- } else if (ch == '@') {
- // Instance or class var
- styler.ColourTo(i - 1, state);
- if (chNext == '@') {
- state = SCE_RB_CLASS_VAR;
- advance_char(i, ch, chNext, chNext2); // pass by ref
- } else {
- state = SCE_RB_INSTANCE_VAR;
- }
- } else if (ch == '$') {
- // Check for a builtin global
- styler.ColourTo(i - 1, state);
- // Recognize it bit by bit
- state = SCE_RB_GLOBAL;
- } else if (ch == '/' && preferRE) {
- // Ambigous operator
- styler.ColourTo(i - 1, state);
- state = SCE_RB_REGEX;
- Quote.New();
- Quote.Open(ch);
- } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
-
- // Recognise the '<<' symbol - either a here document or a binary op
-
- styler.ColourTo(i - 1, state);
- i++;
- chNext = chNext2;
- styler.ColourTo(i, SCE_RB_OPERATOR);
-
- if (preferRE) {
- state = SCE_RB_HERE_DELIM;
- HereDoc.State = 0;
- } else {
- // leave state as default
- // We don't have all the heuristics Perl has for indications
- // of a here-doc, because '<<' is overloadable and used
- // for so many other classes.
- preferRE = true;
- }
- } else if (ch == ':') {
- styler.ColourTo(i - 1, state);
- if (chNext == ':') {
- // Mark "::" as an operator, not symbol start
- styler.ColourTo(i + 1, SCE_RB_OPERATOR);
- advance_char(i, ch, chNext, chNext2); // pass by ref
- state = SCE_RB_DEFAULT;
- preferRE = false;
- } else if (iswordchar(chNext)) {
- state = SCE_RB_SYMBOL;
- } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
- // Do the operator analysis in-line, looking ahead
- // Based on the table in pickaxe 2nd ed., page 339
- bool doColoring = true;
- switch (chNext) {
- case '[':
- if (chNext2 == ']' ) {
- char ch_tmp = styler.SafeGetCharAt(i + 3);
- if (ch_tmp == '=') {
- i += 3;
- ch = ch_tmp;
- chNext = styler.SafeGetCharAt(i + 1);
- } else {
- i += 2;
- ch = chNext2;
- chNext = ch_tmp;
- }
- } else {
- doColoring = false;
- }
- break;
-
- case '*':
- if (chNext2 == '*') {
- i += 2;
- ch = chNext2;
- chNext = styler.SafeGetCharAt(i + 1);
- } else {
- advance_char(i, ch, chNext, chNext2);
- }
- break;
-
- case '!':
- if (chNext2 == '=' || chNext2 == '~') {
- i += 2;
- ch = chNext2;
- chNext = styler.SafeGetCharAt(i + 1);
- } else {
- advance_char(i, ch, chNext, chNext2);
- }
- break;
-
- case '<':
- if (chNext2 == '<') {
- i += 2;
- ch = chNext2;
- chNext = styler.SafeGetCharAt(i + 1);
- } else if (chNext2 == '=') {
- char ch_tmp = styler.SafeGetCharAt(i + 3);
- if (ch_tmp == '>') { // <=> operator
- i += 3;
- ch = ch_tmp;
- chNext = styler.SafeGetCharAt(i + 1);
- } else {
- i += 2;
- ch = chNext2;
- chNext = ch_tmp;
- }
- } else {
- advance_char(i, ch, chNext, chNext2);
- }
- break;
-
- default:
- // Simple one-character operators
- advance_char(i, ch, chNext, chNext2);
- break;
- }
- if (doColoring) {
- styler.ColourTo(i, SCE_RB_SYMBOL);
- state = SCE_RB_DEFAULT;
- }
- } else if (!preferRE) {
- // Don't color symbol strings (yet)
- // Just color the ":" and color rest as string
- styler.ColourTo(i, SCE_RB_SYMBOL);
- state = SCE_RB_DEFAULT;
- } else {
- styler.ColourTo(i, SCE_RB_OPERATOR);
- state = SCE_RB_DEFAULT;
- preferRE = true;
- }
- } else if (ch == '%') {
- styler.ColourTo(i - 1, state);
- bool have_string = false;
- if (strchr(q_chars, chNext) && !iswordchar(chNext2)) {
- Quote.New();
- const char *hit = strchr(q_chars, chNext);
- if (hit != NULL) {
- state = q_states[hit - q_chars];
- Quote.Open(chNext2);
- i += 2;
- ch = chNext2;
- chNext = styler.SafeGetCharAt(i + 1);
- have_string = true;
- }
- } else if (!iswordchar(chNext)) {
- state = SCE_RB_STRING_QQ;
- Quote.Open(chNext);
- advance_char(i, ch, chNext, chNext2); // pass by ref
- have_string = true;
- }
- if (!have_string) {
- styler.ColourTo(i, SCE_RB_OPERATOR);
- // stay in default
- preferRE = true;
- }
- } else if (isoperator(ch)) {
- styler.ColourTo(i - 1, state);
- styler.ColourTo(i, SCE_RB_OPERATOR);
- // If we're ending an expression or block,
- // assume it ends an object, and the ambivalent
- // constructs are binary operators
- //
- // So if we don't have one of these chars,
- // we aren't ending an object exp'n, and ops
- // like : << / are unary operators.
-
- preferRE = (strchr(")}]", ch) == NULL);
- // Stay in default state
- } else if (isEOLChar(ch)) {
- // Make sure it's a true line-end, with no backslash
- if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
- && chPrev != '\\') {
- // Assume we've hit the end of the statement.
- preferRE = true;
- }
- }
- } else if (state == SCE_RB_WORD) {
- if (ch == '.' || !iswordchar(ch)) {
- // Words include x? in all contexts,
- // and <letters>= after either 'def' or a dot
- // Move along until a complete word is on our left
-
- // Default accessor treats '.' as word-chars,
- // but we don't for now.
-
- if (ch == '='
- && iswordchar(chPrev)
- && (chNext == '('
- || strchr(" \t\n\r", chNext) != NULL)
- && (!strcmp(prevWord, "def")
- || followsDot(styler.GetStartSegment(), styler))) {
- // <name>= is a name only when being def'd -- Get it the next time
- // This means that <name>=<name> is always lexed as
- // <name>, (op, =), <name>
- } else if ((ch == '?' || ch == '!')
- && iswordchar(chPrev)
- && !iswordchar(chNext)) {
- // <name>? is a name -- Get it the next time
- // But <name>?<name> is always lexed as
- // <name>, (op, ?), <name>
- // Same with <name>! to indicate a method that
- // modifies its target
- } else if (isEOLChar(ch)
- && isMatch(styler, lengthDoc, i - 7, "__END__")) {
- styler.ColourTo(i, SCE_RB_DATASECTION);
- state = SCE_RB_DATASECTION;
- // No need to handle this state -- we'll just move to the end
- preferRE = false;
- } else {
- int wordStartPos = styler.GetStartSegment();
- int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
- switch (word_style) {
- case SCE_RB_WORD:
- preferRE = RE_CanFollowKeyword(prevWord);
- break;
-
- case SCE_RB_WORD_DEMOTED:
- preferRE = true;
- break;
-
- case SCE_RB_IDENTIFIER:
- if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
- preferRE = true;
- } else if (isEOLChar(ch)) {
- preferRE = true;
- } else {
- preferRE = false;
- }
- break;
- default:
- preferRE = false;
- }
- redo_char(i, ch, chNext, chNext2, state); // pass by ref
- }
- }
- } else if (state == SCE_RB_NUMBER) {
- if (isalnum(ch) || ch == '_') {
- // Keep going
- } else if (ch == '.' && ++numDots == 1) {
- // Keep going
- } else {
- styler.ColourTo(i - 1, state);
- redo_char(i, ch, chNext, chNext2, state); // pass by ref
- preferRE = false;
- }
- } else if (state == SCE_RB_COMMENTLINE) {
- if (isEOLChar(ch)) {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_DEFAULT;
- // Use whatever setting we had going into the comment
- }
- } else if (state == SCE_RB_HERE_DELIM) {
- // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
- // Slightly different: if we find an immediate '-',
- // the target can appear indented.
-
- if (HereDoc.State == 0) { // '<<' encountered
- HereDoc.State = 1;
- HereDoc.DelimiterLength = 0;
- if (ch == '-') {
- HereDoc.CanBeIndented = true;
- advance_char(i, ch, chNext, chNext2); // pass by ref
- } else {
- HereDoc.CanBeIndented = false;
- }
- if (isEOLChar(ch)) {
- // Bail out of doing a here doc if there's no target
- state = SCE_RB_DEFAULT;
- preferRE = false;
- } else {
- HereDoc.Quote = ch;
-
- if (ch == '\'' || ch == '"' || ch == '`') {
- HereDoc.Quoted = true;
- HereDoc.Delimiter[0] = '\0';
- } else {
- HereDoc.Quoted = false;
- HereDoc.Delimiter[0] = ch;
- HereDoc.Delimiter[1] = '\0';
- HereDoc.DelimiterLength = 1;
- }
- }
- } else if (HereDoc.State == 1) { // collect the delimiter
- if (isEOLChar(ch)) {
- // End the quote now, and go back for more
- styler.ColourTo(i - 1, state);
- state = SCE_RB_DEFAULT;
- i--;
- chNext = ch;
- chNext2 = chNext;
- preferRE = false;
- } else if (HereDoc.Quoted) {
- if (ch == HereDoc.Quote) { // closing quote => end of delimiter
- styler.ColourTo(i, state);
- state = SCE_RB_DEFAULT;
- preferRE = false;
- } else {
- if (ch == '\\' && !isEOLChar(chNext)) {
- advance_char(i, ch, chNext, chNext2);
- }
- HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
- HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
- }
- } else { // an unquoted here-doc delimiter
- if (isalnum(ch) || ch == '_') {
- HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
- HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
- } else {
- styler.ColourTo(i - 1, state);
- redo_char(i, ch, chNext, chNext2, state);
- preferRE = false;
- }
- }
- if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_ERROR;
- preferRE = false;
- }
- }
- } else if (state == SCE_RB_HERE_Q) {
- // Not needed: HereDoc.State == 2
- // Indentable here docs: look backwards
- // Non-indentable: look forwards, like in Perl
- //
- // Why: so we can quickly resolve things like <<-" abc"
-
- if (!HereDoc.CanBeIndented) {
- if (isEOLChar(chPrev)
- && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
- styler.ColourTo(i - 1, state);
- i += HereDoc.DelimiterLength - 1;
- chNext = styler.SafeGetCharAt(i + 1);
- if (isEOLChar(chNext)) {
- styler.ColourTo(i, SCE_RB_HERE_DELIM);
- state = SCE_RB_DEFAULT;
- HereDoc.State = 0;
- preferRE = false;
- }
- // Otherwise we skipped through the here doc faster.
- }
- } else if (isEOLChar(chNext)
- && lookingAtHereDocDelim(styler,
- i - HereDoc.DelimiterLength + 1,
- lengthDoc,
- HereDoc.Delimiter)) {
- styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
- styler.ColourTo(i, SCE_RB_HERE_DELIM);
- state = SCE_RB_DEFAULT;
- preferRE = false;
- HereDoc.State = 0;
- }
- } else if (state == SCE_RB_CLASS_VAR
- || state == SCE_RB_INSTANCE_VAR
- || state == SCE_RB_SYMBOL) {
- if (!iswordchar(ch)) {
- styler.ColourTo(i - 1, state);
- redo_char(i, ch, chNext, chNext2, state); // pass by ref
- preferRE = false;
- }
- } else if (state == SCE_RB_GLOBAL) {
- if (!iswordchar(ch)) {
- // handle special globals here as well
- if (chPrev == '$') {
- if (ch == '-') {
- // Include the next char, like $-a
- advance_char(i, ch, chNext, chNext2);
- }
- styler.ColourTo(i, state);
- state = SCE_RB_DEFAULT;
- } else {
- styler.ColourTo(i - 1, state);
- redo_char(i, ch, chNext, chNext2, state); // pass by ref
- }
- preferRE = false;
- }
- } else if (state == SCE_RB_POD) {
- // PODs end with ^=end\s, -- any whitespace can follow =end
- if (strchr(" \t\n\r", ch) != NULL
- && i > 5
- && isEOLChar(styler[i - 5])
- && isMatch(styler, lengthDoc, i - 4, "=end")) {
- styler.ColourTo(i - 1, state);
- state = SCE_RB_DEFAULT;
- preferRE = false;
- }
- } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
- if (ch == '\\' && Quote.Up != '\\') {
- // Skip one
- advance_char(i, ch, chNext, chNext2);
- } else if (ch == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0) {
- // Include the options
- while (isSafeAlpha(chNext)) {
- i++;
- ch = chNext;
- chNext = styler.SafeGetCharAt(i + 1);
- }
- styler.ColourTo(i, state);
- state = SCE_RB_DEFAULT;
- preferRE = false;
- }
- } else if (ch == Quote.Up) {
- // Only if close quoter != open quoter
- Quote.Count++;
-
- } else if (ch == '#' ) {
- //todo: distinguish comments from pound chars
- // for now, handle as comment
- styler.ColourTo(i - 1, state);
- bool inEscape = false;
- while (++i < lengthDoc) {
- ch = styler.SafeGetCharAt(i);
- if (ch == '\\') {
- inEscape = true;
- } else if (isEOLChar(ch)) {
- // Comment inside a regex
- styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
- break;
- } else if (inEscape) {
- inEscape = false; // don't look at char
- } else if (ch == Quote.Down) {
- // Have the regular handler deal with this
- // to get trailing modifiers.
- i--;
- ch = styler[i];
- break;
- }
- }
- chNext = styler.SafeGetCharAt(i + 1);
- chNext2 = styler.SafeGetCharAt(i + 2);
- }
- // Quotes of all kinds...
- } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
- state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
- state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
- state == SCE_RB_BACKTICKS) {
- if (!Quote.Down && !isspacechar(ch)) {
- Quote.Open(ch);
- } else if (ch == '\\' && Quote.Up != '\\') {
- //Riddle me this: Is it safe to skip *every* escaped char?
- advance_char(i, ch, chNext, chNext2);
- } else if (ch == Quote.Down) {
- Quote.Count--;
- if (Quote.Count == 0) {
- styler.ColourTo(i, state);
- state = SCE_RB_DEFAULT;
- preferRE = false;
- }
- } else if (ch == Quote.Up) {
- Quote.Count++;
- }
- }
-
- if (state == SCE_RB_ERROR) {
- break;
- }
- chPrev = ch;
- }
- if (state == SCE_RB_WORD) {
- // We've ended on a word, possibly at EOF, and need to
- // classify it.
- (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
- } else {
- styler.ColourTo(lengthDoc - 1, state);
- }
- }
-
- // Helper functions for folding
-
- static void getPrevWord(int pos,
- char *prevWord,
- Accessor &styler,
- int word_state)
- {
- int i;
- styler.Flush();
- for (i = pos - 1; i > 0; i--) {
- if (actual_style(styler.StyleAt(i)) != word_state) {
- i++;
- break;
- }
- }
- if (i < pos - MAX_KEYWORD_LENGTH) // overflow
- i = pos - MAX_KEYWORD_LENGTH;
- char *dst = prevWord;
- for (; i <= pos; i++) {
- *dst++ = styler[i];
- }
- *dst = 0;
- }
-
- static bool keywordIsAmbiguous(const char *prevWord)
- {
- // Order from most likely used to least likely
- // Lots of ways to do a loop in Ruby besides 'while/until'
- if (!strcmp(prevWord, "if")
- || !strcmp(prevWord, "do")
- || !strcmp(prevWord, "while")
- || !strcmp(prevWord, "unless")
- || !strcmp(prevWord, "until")) {
- return true;
- } else {
- return false;
- }
- }
-
- static bool inline iswhitespace(char ch) {
- return ch == ' ' || ch == '\t';
- }
-
- // Demote keywords in the following conditions:
- // if, while, unless, until modify a statement
- // do after a while or until, as a noise word (like then after if)
-
- static bool keywordIsModifier(const char *word,
- int pos,
- Accessor &styler)
- {
- if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
- return keywordDoStartsLoop(pos, styler);
- }
- char ch;
- int style = SCE_RB_DEFAULT;
- int lineStart = styler.GetLine(pos);
- int lineStartPosn = styler.LineStart(lineStart);
- styler.Flush();
- while (--pos >= lineStartPosn) {
- style = actual_style(styler.StyleAt(pos));
- if (style == SCE_RB_DEFAULT) {
- if (iswhitespace(ch = styler[pos])) {
- //continue
- } else if (ch == '\r' || ch == '\n') {
- // Scintilla's LineStart() and GetLine() routines aren't
- // platform-independent, so if we have text prepared with
- // a different system we can't rely on it.
- return false;
- }
- } else {
- break;
- }
- }
- if (pos < lineStartPosn) {
- return false; //XXX not quite right if the prev line is a continuation
- }
- // First things where the action is unambiguous
- switch (style) {
- case SCE_RB_DEFAULT:
- case SCE_RB_COMMENTLINE:
- case SCE_RB_POD:
- case SCE_RB_CLASSNAME:
- case SCE_RB_DEFNAME:
- case SCE_RB_MODULE_NAME:
- return false;
- case SCE_RB_OPERATOR:
- break;
- case SCE_RB_WORD:
- // Watch out for uses of 'else if'
- //XXX: Make a list of other keywords where 'if' isn't a modifier
- // and can appear legitimately
- // Formulate this to avoid warnings from most compilers
- if (strcmp(word, "if") == 0) {
- char prevWord[MAX_KEYWORD_LENGTH + 1];
- getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
- return strcmp(prevWord, "else") != 0;
- }
- return true;
- default:
- return true;
- }
- // Assume that if the keyword follows an operator,
- // usually it's a block assignment, like
- // a << if x then y else z
-
- ch = styler[pos];
- switch (ch) {
- case ')':
- case ']':
- case '}':
- return true;
- default:
- return false;
- }
- }
-
- #define WHILE_BACKWARDS "elihw"
- #define UNTIL_BACKWARDS "litnu"
-
- // Nothing fancy -- look to see if we follow a while/until somewhere
- // on the current line
-
- static bool keywordDoStartsLoop(int pos,
- Accessor &styler)
- {
- char ch;
- int style;
- int lineStart = styler.GetLine(pos);
- int lineStartPosn = styler.LineStart(lineStart);
- styler.Flush();
- while (--pos >= lineStartPosn) {
- style = actual_style(styler.StyleAt(pos));
- if (style == SCE_RB_DEFAULT) {
- if ((ch = styler[pos]) == '\r' || ch == '\n') {
- // Scintilla's LineStart() and GetLine() routines aren't
- // platform-independent, so if we have text prepared with
- // a different system we can't rely on it.
- return false;
- }
- } else if (style == SCE_RB_WORD) {
- // Check for while or until, but write the word in backwards
- char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
- char *dst = prevWord;
- int wordLen = 0;
- int start_word;
- for (start_word = pos;
- start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
- start_word--) {
- if (++wordLen < MAX_KEYWORD_LENGTH) {
- *dst++ = styler[start_word];
- }
- }
- *dst = 0;
- // Did we see our keyword?
- if (!strcmp(prevWord, WHILE_BACKWARDS)
- || !strcmp(prevWord, UNTIL_BACKWARDS)) {
- return true;
- }
- // We can move pos to the beginning of the keyword, and then
- // accept another decrement, as we can never have two contiguous
- // keywords:
- // word1 word2
- // ^
- // <- move to start_word
- // ^
- // <- loop decrement
- // ^ # pointing to end of word1 is fine
- pos = start_word;
- }
- }
- return false;
- }
-
- /*
- * Folding Ruby
- *
- * The language is quite complex to analyze without a full parse.
- * For example, this line shouldn't affect fold level:
- *
- * print "hello" if feeling_friendly?
- *
- * Neither should this:
- *
- * print "hello" \
- * if feeling_friendly?
- *
- *
- * But this should:
- *
- * if feeling_friendly? #++
- * print "hello" \
- * print "goodbye"
- * end #--
- *
- * So we cheat, by actually looking at the existing indentation
- * levels for each line, and just echoing it back. Like Python.
- * Then if we get better at it, we'll take braces into consideration,
- * which always affect folding levels.
-
- * How the keywords should work:
- * No effect:
- * __FILE__ __LINE__ BEGIN END alias and
- * defined? false in nil not or self super then
- * true undef
-
- * Always increment:
- * begin class def do for module when {
- *
- * Always decrement:
- * end }
- *
- * Increment if these start a statement
- * if unless until while -- do nothing if they're modifiers
-
- * These end a block if there's no modifier, but don't bother
- * break next redo retry return yield
- *
- * These temporarily de-indent, but re-indent
- * case else elsif ensure rescue
- *
- * This means that the folder reflects indentation rather
- * than setting it. The language-service updates indentation
- * when users type return and finishes entering de-denters.
- *
- * Later offer to fold POD, here-docs, strings, and blocks of comments
- */
-
- static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
- WordList *[], Accessor &styler) {
- const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
- bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
-
- synchronizeDocStart(startPos, length, initStyle, styler, // ref args
- false);
- unsigned int endPos = startPos + length;
- int visibleChars = 0;
- int lineCurrent = styler.GetLine(startPos);
- int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
- & SC_FOLDLEVELNUMBERMASK
- & ~SC_FOLDLEVELBASE);
- int levelCurrent = levelPrev;
- char chNext = styler[startPos];
- int styleNext = styler.StyleAt(startPos);
- int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
- bool buffer_ends_with_eol = false;
- for (unsigned int i = startPos; i < endPos; i++) {
- char ch = chNext;
- chNext = styler.SafeGetCharAt(i + 1);
- int style = styleNext;
- styleNext = styler.StyleAt(i + 1);
- bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
- if (style == SCE_RB_COMMENTLINE) {
- if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
- if (chNext == '{') {
- levelCurrent++;
- } else if (chNext == '}') {
- levelCurrent--;
- }
- }
- } else if (style == SCE_RB_OPERATOR) {
- if (strchr("[{(", ch)) {
- levelCurrent++;
- } else if (strchr(")}]", ch)) {
- // Don't decrement below 0
- if (levelCurrent > 0)
- levelCurrent--;
- }
- } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
- // Look at the keyword on the left and decide what to do
- char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
- prevWord[0] = 0;
- getPrevWord(i, prevWord, styler, SCE_RB_WORD);
- if (!strcmp(prevWord, "end")) {
- // Don't decrement below 0
- if (levelCurrent > 0)
- levelCurrent--;
- } else if ( !strcmp(prevWord, "if")
- || !strcmp(prevWord, "def")
- || !strcmp(prevWord, "class")
- || !strcmp(prevWord, "module")
- || !strcmp(prevWord, "begin")
- || !strcmp(prevWord, "case")
- || !strcmp(prevWord, "do")
- || !strcmp(prevWord, "while")
- || !strcmp(prevWord, "unless")
- || !strcmp(prevWord, "until")
- || !strcmp(prevWord, "for")
- ) {
- levelCurrent++;
- }
- }
- if (atEOL) {
- int lev = levelPrev;
- if (visibleChars == 0 && foldCompact)
- lev |= SC_FOLDLEVELWHITEFLAG;
- if ((levelCurrent > levelPrev) && (visibleChars > 0))
- lev |= SC_FOLDLEVELHEADERFLAG;
- styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
- lineCurrent++;
- levelPrev = levelCurrent;
- visibleChars = 0;
- buffer_ends_with_eol = true;
- } else if (!isspacechar(ch)) {
- visibleChars++;
- buffer_ends_with_eol = false;
- }
- }
- // Fill in the real level of the next line, keeping the current flags as they will be filled in later
- if (!buffer_ends_with_eol) {
- lineCurrent++;
- int new_lev = levelCurrent;
- if (visibleChars == 0 && foldCompact)
- new_lev |= SC_FOLDLEVELWHITEFLAG;
- if ((levelCurrent > levelPrev) && (visibleChars > 0))
- new_lev |= SC_FOLDLEVELHEADERFLAG;
- levelCurrent = new_lev;
- }
- styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
- }
-
- static const char * const rubyWordListDesc[] = {
- "Keywords",
- 0
- };
-
- LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);