PageRenderTime 45ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/cssed-0.4.0/scintilla/src/LexRuby.cxx

#
C++ | 1256 lines | 1015 code | 70 blank | 171 comment | 364 complexity | fcc18055fbdfb103385ab2e5e2394f6f MD5 | raw file
Possible License(s): GPL-2.0
  1. // Scintilla source code edit control
  2. /** @file LexRuby.cxx
  3. ** Lexer for Ruby.
  4. **/
  5. // Copyright 2001- by Clemens Wyss <wys@helbling.ch>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <ctype.h>
  10. #include <stdio.h>
  11. #include <stdarg.h>
  12. #include "Platform.h"
  13. #include "PropSet.h"
  14. #include "Accessor.h"
  15. #include "KeyWords.h"
  16. #include "Scintilla.h"
  17. #include "SciLexer.h"
  18. #ifdef SCI_NAMESPACE
  19. using namespace Scintilla;
  20. #endif
  21. //XXX Identical to Perl, put in common area
  22. static inline bool isEOLChar(char ch) {
  23. return (ch == '\r') || (ch == '\n');
  24. }
  25. static inline bool isRubyOperatorChar(char ch) {
  26. return strchr("%^&*\\()-+=|{}[]:;<>,/?!.~",ch) != NULL;
  27. }
  28. static inline bool isSafeAlpha(char ch) {
  29. return ((unsigned int) ch <= 127) && isalpha(ch);
  30. }
  31. #define MAX_KEYWORD_LENGTH 200
  32. #define STYLE_MASK 63
  33. #define actual_style(style) (style & STYLE_MASK)
  34. static bool followsDot(unsigned int pos, Accessor &styler) {
  35. styler.Flush();
  36. for (; pos >= 1; --pos) {
  37. int style = actual_style(styler.StyleAt(pos));
  38. char ch;
  39. switch (style) {
  40. case SCE_RB_DEFAULT:
  41. ch = styler[pos];
  42. if (ch == ' ' || ch == '\t') {
  43. //continue
  44. } else {
  45. return false;
  46. }
  47. break;
  48. case SCE_RB_OPERATOR:
  49. return styler[pos] == '.';
  50. default:
  51. return false;
  52. }
  53. }
  54. return false;
  55. }
  56. // Forward declarations
  57. static bool keywordIsAmbiguous(const char *prevWord);
  58. static bool keywordDoStartsLoop(int pos,
  59. Accessor &styler);
  60. static bool keywordIsModifier(const char *word,
  61. int pos,
  62. Accessor &styler);
  63. static int ClassifyWordRb(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord) {
  64. char s[100];
  65. unsigned int i, j;
  66. unsigned int lim = end - start + 1; // num chars to copy
  67. if (lim >= MAX_KEYWORD_LENGTH) {
  68. lim = MAX_KEYWORD_LENGTH - 1;
  69. }
  70. for (i = start, j = 0; j < lim; i++, j++) {
  71. s[j] = styler[i];
  72. }
  73. s[j] = '\0';
  74. int chAttr;
  75. if (0 == strcmp(prevWord, "class"))
  76. chAttr = SCE_RB_CLASSNAME;
  77. else if (0 == strcmp(prevWord, "module"))
  78. chAttr = SCE_RB_MODULE_NAME;
  79. else if (0 == strcmp(prevWord, "def"))
  80. chAttr = SCE_RB_DEFNAME;
  81. else if (keywords.InList(s) && !followsDot(start - 1, styler)) {
  82. if (keywordIsAmbiguous(s)
  83. && keywordIsModifier(s, start, styler)) {
  84. // Demoted keywords are colored as keywords,
  85. // but do not affect changes in indentation.
  86. //
  87. // Consider the word 'if':
  88. // 1. <<if test ...>> : normal
  89. // 2. <<stmt if test>> : demoted
  90. // 3. <<lhs = if ...>> : normal: start a new indent level
  91. // 4. <<obj.if = 10>> : color as identifer, since it follows '.'
  92. chAttr = SCE_RB_WORD_DEMOTED;
  93. } else {
  94. chAttr = SCE_RB_WORD;
  95. }
  96. } else
  97. chAttr = SCE_RB_IDENTIFIER;
  98. styler.ColourTo(end, chAttr);
  99. if (chAttr == SCE_RB_WORD) {
  100. strcpy(prevWord, s);
  101. } else {
  102. prevWord[0] = 0;
  103. }
  104. return chAttr;
  105. }
  106. //XXX Identical to Perl, put in common area
  107. static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
  108. if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
  109. return false;
  110. }
  111. while (*val) {
  112. if (*val != styler[pos++]) {
  113. return false;
  114. }
  115. val++;
  116. }
  117. return true;
  118. }
  119. // Do Ruby better -- find the end of the line, work back,
  120. // and then check for leading white space
  121. // Precondition: the here-doc target can be indented
  122. static bool lookingAtHereDocDelim(Accessor &styler,
  123. int pos,
  124. int lengthDoc,
  125. const char *HereDocDelim)
  126. {
  127. if (!isMatch(styler, lengthDoc, pos, HereDocDelim)) {
  128. return false;
  129. }
  130. while (--pos > 0) {
  131. char ch = styler[pos];
  132. if (isEOLChar(ch)) {
  133. return true;
  134. } else if (ch != ' ' && ch != '\t') {
  135. return false;
  136. }
  137. }
  138. return false;
  139. }
  140. //XXX Identical to Perl, put in common area
  141. static char opposite(char ch) {
  142. if (ch == '(')
  143. return ')';
  144. if (ch == '[')
  145. return ']';
  146. if (ch == '{')
  147. return '}';
  148. if (ch == '<')
  149. return '>';
  150. return ch;
  151. }
  152. // Null transitions when we see we've reached the end
  153. // and need to relex the curr char.
  154. static void redo_char(int &i, char &ch, char &chNext, char &chNext2,
  155. int &state) {
  156. i--;
  157. chNext2 = chNext;
  158. chNext = ch;
  159. state = SCE_RB_DEFAULT;
  160. }
  161. static void advance_char(int &i, char &ch, char &chNext, char &chNext2) {
  162. i++;
  163. ch = chNext;
  164. chNext = chNext2;
  165. }
  166. // precondition: startPos points to one after the EOL char
  167. static bool currLineContainsHereDelims(int& startPos,
  168. Accessor &styler) {
  169. if (startPos <= 1)
  170. return false;
  171. int pos;
  172. for (pos = startPos - 1; pos > 0; pos--) {
  173. char ch = styler.SafeGetCharAt(pos);
  174. if (isEOLChar(ch)) {
  175. // Leave the pointers where they are -- there are no
  176. // here doc delims on the current line, even if
  177. // the EOL isn't default style
  178. return false;
  179. } else {
  180. styler.Flush();
  181. if (actual_style(styler.StyleAt(pos)) == SCE_RB_HERE_DELIM) {
  182. break;
  183. }
  184. }
  185. }
  186. if (pos == 0) {
  187. return false;
  188. }
  189. // Update the pointers so we don't have to re-analyze the string
  190. startPos = pos;
  191. return true;
  192. }
  193. static bool isEmptyLine(int pos,
  194. Accessor &styler) {
  195. int spaceFlags = 0;
  196. int lineCurrent = styler.GetLine(pos);
  197. int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
  198. return (indentCurrent & SC_FOLDLEVELWHITEFLAG) != 0;
  199. }
  200. static bool RE_CanFollowKeyword(const char *keyword) {
  201. if (!strcmp(keyword, "and")
  202. || !strcmp(keyword, "begin")
  203. || !strcmp(keyword, "break")
  204. || !strcmp(keyword, "case")
  205. || !strcmp(keyword, "do")
  206. || !strcmp(keyword, "else")
  207. || !strcmp(keyword, "elsif")
  208. || !strcmp(keyword, "if")
  209. || !strcmp(keyword, "next")
  210. || !strcmp(keyword, "return")
  211. || !strcmp(keyword, "when")
  212. || !strcmp(keyword, "unless")
  213. || !strcmp(keyword, "until")
  214. || !strcmp(keyword, "not")
  215. || !strcmp(keyword, "or")) {
  216. return true;
  217. }
  218. return false;
  219. }
  220. //todo: if we aren't looking at a stdio character,
  221. // move to the start of the first line that is not in a
  222. // multi-line construct
  223. static void synchronizeDocStart(unsigned int& startPos,
  224. int &length,
  225. int &initStyle,
  226. Accessor &styler,
  227. bool skipWhiteSpace=false) {
  228. styler.Flush();
  229. int style = actual_style(styler.StyleAt(startPos));
  230. switch (style) {
  231. case SCE_RB_STDIN:
  232. case SCE_RB_STDOUT:
  233. case SCE_RB_STDERR:
  234. // Don't do anything else with these.
  235. return;
  236. }
  237. int pos = startPos;
  238. // Quick way to characterize each line
  239. int lineStart;
  240. for (lineStart = styler.GetLine(pos); lineStart > 0; lineStart--) {
  241. // Now look at the style before the previous line's EOL
  242. pos = styler.LineStart(lineStart) - 1;
  243. if (pos <= 10) {
  244. lineStart = 0;
  245. break;
  246. }
  247. char ch = styler.SafeGetCharAt(pos);
  248. char chPrev = styler.SafeGetCharAt(pos - 1);
  249. if (ch == '\n' && chPrev == '\r') {
  250. pos--;
  251. }
  252. if (styler.SafeGetCharAt(pos - 1) == '\\') {
  253. // Continuation line -- keep going
  254. } else if (actual_style(styler.StyleAt(pos)) != SCE_RB_DEFAULT) {
  255. // Part of multi-line construct -- keep going
  256. } else if (currLineContainsHereDelims(pos, styler)) {
  257. // Keep going, with pos and length now pointing
  258. // at the end of the here-doc delimiter
  259. } else if (skipWhiteSpace && isEmptyLine(pos, styler)) {
  260. // Keep going
  261. } else {
  262. break;
  263. }
  264. }
  265. pos = styler.LineStart(lineStart);
  266. length += (startPos - pos);
  267. startPos = pos;
  268. initStyle = SCE_RB_DEFAULT;
  269. }
  270. static void ColouriseRbDoc(unsigned int startPos, int length, int initStyle,
  271. WordList *keywordlists[], Accessor &styler) {
  272. // Lexer for Ruby often has to backtrack to start of current style to determine
  273. // which characters are being used as quotes, how deeply nested is the
  274. // start position and what the termination string is for here documents
  275. WordList &keywords = *keywordlists[0];
  276. class HereDocCls {
  277. public:
  278. int State;
  279. // States
  280. // 0: '<<' encountered
  281. // 1: collect the delimiter
  282. // 1b: text between the end of the delimiter and the EOL
  283. // 2: here doc text (lines after the delimiter)
  284. char Quote; // the char after '<<'
  285. bool Quoted; // true if Quote in ('\'','"','`')
  286. int DelimiterLength; // strlen(Delimiter)
  287. char Delimiter[256]; // the Delimiter, limit of 256: from Perl
  288. bool CanBeIndented;
  289. HereDocCls() {
  290. State = 0;
  291. DelimiterLength = 0;
  292. Delimiter[0] = '\0';
  293. CanBeIndented = false;
  294. }
  295. };
  296. HereDocCls HereDoc;
  297. class QuoteCls {
  298. public:
  299. int Count;
  300. char Up;
  301. char Down;
  302. QuoteCls() {
  303. this->New();
  304. }
  305. void New() {
  306. Count = 0;
  307. Up = '\0';
  308. Down = '\0';
  309. }
  310. void Open(char u) {
  311. Count++;
  312. Up = u;
  313. Down = opposite(Up);
  314. }
  315. };
  316. QuoteCls Quote;
  317. int numDots = 0; // For numbers --
  318. // Don't start lexing in the middle of a num
  319. synchronizeDocStart(startPos, length, initStyle, styler, // ref args
  320. false);
  321. bool preferRE = true;
  322. int state = initStyle;
  323. int lengthDoc = startPos + length;
  324. char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
  325. prevWord[0] = '\0';
  326. if (length == 0)
  327. return;
  328. char chPrev = styler.SafeGetCharAt(startPos - 1);
  329. char chNext = styler.SafeGetCharAt(startPos);
  330. // Ruby uses a different mask because bad indentation is marked by oring with 32
  331. styler.StartAt(startPos, 127);
  332. styler.StartSegment(startPos);
  333. static int q_states[] = {SCE_RB_STRING_Q,
  334. SCE_RB_STRING_QQ,
  335. SCE_RB_STRING_QR,
  336. SCE_RB_STRING_QW,
  337. SCE_RB_STRING_QW,
  338. SCE_RB_STRING_QX};
  339. static const char* q_chars = "qQrwWx";
  340. for (int i = startPos; i < lengthDoc; i++) {
  341. char ch = chNext;
  342. chNext = styler.SafeGetCharAt(i + 1);
  343. char chNext2 = styler.SafeGetCharAt(i + 2);
  344. if (styler.IsLeadByte(ch)) {
  345. chNext = chNext2;
  346. chPrev = ' ';
  347. i += 1;
  348. continue;
  349. }
  350. // skip on DOS/Windows
  351. //No, don't, because some things will get tagged on,
  352. // so we won't recognize keywords, for example
  353. #if 0
  354. if (ch == '\r' && chNext == '\n') {
  355. continue;
  356. }
  357. #endif
  358. if (HereDoc.State == 1 && isEOLChar(ch)) {
  359. // Begin of here-doc (the line after the here-doc delimiter):
  360. HereDoc.State = 2;
  361. styler.ColourTo(i-1, state);
  362. // Don't check for a missing quote, just jump into
  363. // the here-doc state
  364. state = SCE_RB_HERE_Q;
  365. }
  366. // Regular transitions
  367. if (state == SCE_RB_DEFAULT) {
  368. if (isdigit(ch)) {
  369. styler.ColourTo(i - 1, state);
  370. state = SCE_RB_NUMBER;
  371. numDots = 0;
  372. } else if (iswordstart(ch)) {
  373. styler.ColourTo(i - 1, state);
  374. state = SCE_RB_WORD;
  375. } else if (ch == '#') {
  376. styler.ColourTo(i - 1, state);
  377. state = SCE_RB_COMMENTLINE;
  378. } else if (ch == '=') {
  379. // =begin indicates the start of a comment (doc) block
  380. if (i == 0 || isEOLChar(chPrev)
  381. && chNext == 'b'
  382. && styler.SafeGetCharAt(i + 2) == 'e'
  383. && styler.SafeGetCharAt(i + 3) == 'g'
  384. && styler.SafeGetCharAt(i + 4) == 'i'
  385. && styler.SafeGetCharAt(i + 5) == 'n'
  386. && !iswordchar(styler.SafeGetCharAt(i + 6))) {
  387. styler.ColourTo(i - 1, state);
  388. state = SCE_RB_POD;
  389. } else {
  390. styler.ColourTo(i - 1, state);
  391. styler.ColourTo(i, SCE_RB_OPERATOR);
  392. preferRE = true;
  393. }
  394. } else if (ch == '"') {
  395. styler.ColourTo(i - 1, state);
  396. state = SCE_RB_STRING;
  397. Quote.New();
  398. Quote.Open(ch);
  399. } else if (ch == '\'') {
  400. styler.ColourTo(i - 1, state);
  401. state = SCE_RB_CHARACTER;
  402. Quote.New();
  403. Quote.Open(ch);
  404. } else if (ch == '`') {
  405. styler.ColourTo(i - 1, state);
  406. state = SCE_RB_BACKTICKS;
  407. Quote.New();
  408. Quote.Open(ch);
  409. } else if (ch == '@') {
  410. // Instance or class var
  411. styler.ColourTo(i - 1, state);
  412. if (chNext == '@') {
  413. state = SCE_RB_CLASS_VAR;
  414. advance_char(i, ch, chNext, chNext2); // pass by ref
  415. } else {
  416. state = SCE_RB_INSTANCE_VAR;
  417. }
  418. } else if (ch == '$') {
  419. // Check for a builtin global
  420. styler.ColourTo(i - 1, state);
  421. // Recognize it bit by bit
  422. state = SCE_RB_GLOBAL;
  423. } else if (ch == '/' && preferRE) {
  424. // Ambigous operator
  425. styler.ColourTo(i - 1, state);
  426. state = SCE_RB_REGEX;
  427. Quote.New();
  428. Quote.Open(ch);
  429. } else if (ch == '<' && chNext == '<' && chNext2 != '=') {
  430. // Recognise the '<<' symbol - either a here document or a binary op
  431. styler.ColourTo(i - 1, state);
  432. i++;
  433. chNext = chNext2;
  434. styler.ColourTo(i, SCE_RB_OPERATOR);
  435. if (preferRE) {
  436. state = SCE_RB_HERE_DELIM;
  437. HereDoc.State = 0;
  438. } else {
  439. // leave state as default
  440. // We don't have all the heuristics Perl has for indications
  441. // of a here-doc, because '<<' is overloadable and used
  442. // for so many other classes.
  443. preferRE = true;
  444. }
  445. } else if (ch == ':') {
  446. styler.ColourTo(i - 1, state);
  447. if (chNext == ':') {
  448. // Mark "::" as an operator, not symbol start
  449. styler.ColourTo(i + 1, SCE_RB_OPERATOR);
  450. advance_char(i, ch, chNext, chNext2); // pass by ref
  451. state = SCE_RB_DEFAULT;
  452. preferRE = false;
  453. } else if (iswordchar(chNext)) {
  454. state = SCE_RB_SYMBOL;
  455. } else if (strchr("[*!~+-*/%=<>&^|", chNext)) {
  456. // Do the operator analysis in-line, looking ahead
  457. // Based on the table in pickaxe 2nd ed., page 339
  458. bool doColoring = true;
  459. switch (chNext) {
  460. case '[':
  461. if (chNext2 == ']' ) {
  462. char ch_tmp = styler.SafeGetCharAt(i + 3);
  463. if (ch_tmp == '=') {
  464. i += 3;
  465. ch = ch_tmp;
  466. chNext = styler.SafeGetCharAt(i + 1);
  467. } else {
  468. i += 2;
  469. ch = chNext2;
  470. chNext = ch_tmp;
  471. }
  472. } else {
  473. doColoring = false;
  474. }
  475. break;
  476. case '*':
  477. if (chNext2 == '*') {
  478. i += 2;
  479. ch = chNext2;
  480. chNext = styler.SafeGetCharAt(i + 1);
  481. } else {
  482. advance_char(i, ch, chNext, chNext2);
  483. }
  484. break;
  485. case '!':
  486. if (chNext2 == '=' || chNext2 == '~') {
  487. i += 2;
  488. ch = chNext2;
  489. chNext = styler.SafeGetCharAt(i + 1);
  490. } else {
  491. advance_char(i, ch, chNext, chNext2);
  492. }
  493. break;
  494. case '<':
  495. if (chNext2 == '<') {
  496. i += 2;
  497. ch = chNext2;
  498. chNext = styler.SafeGetCharAt(i + 1);
  499. } else if (chNext2 == '=') {
  500. char ch_tmp = styler.SafeGetCharAt(i + 3);
  501. if (ch_tmp == '>') { // <=> operator
  502. i += 3;
  503. ch = ch_tmp;
  504. chNext = styler.SafeGetCharAt(i + 1);
  505. } else {
  506. i += 2;
  507. ch = chNext2;
  508. chNext = ch_tmp;
  509. }
  510. } else {
  511. advance_char(i, ch, chNext, chNext2);
  512. }
  513. break;
  514. default:
  515. // Simple one-character operators
  516. advance_char(i, ch, chNext, chNext2);
  517. break;
  518. }
  519. if (doColoring) {
  520. styler.ColourTo(i, SCE_RB_SYMBOL);
  521. state = SCE_RB_DEFAULT;
  522. }
  523. } else if (!preferRE) {
  524. // Don't color symbol strings (yet)
  525. // Just color the ":" and color rest as string
  526. styler.ColourTo(i, SCE_RB_SYMBOL);
  527. state = SCE_RB_DEFAULT;
  528. } else {
  529. styler.ColourTo(i, SCE_RB_OPERATOR);
  530. state = SCE_RB_DEFAULT;
  531. preferRE = true;
  532. }
  533. } else if (ch == '%') {
  534. styler.ColourTo(i - 1, state);
  535. bool have_string = false;
  536. if (strchr(q_chars, chNext) && !iswordchar(chNext2)) {
  537. Quote.New();
  538. const char *hit = strchr(q_chars, chNext);
  539. if (hit != NULL) {
  540. state = q_states[hit - q_chars];
  541. Quote.Open(chNext2);
  542. i += 2;
  543. ch = chNext2;
  544. chNext = styler.SafeGetCharAt(i + 1);
  545. have_string = true;
  546. }
  547. } else if (!iswordchar(chNext)) {
  548. state = SCE_RB_STRING_QQ;
  549. Quote.Open(chNext);
  550. advance_char(i, ch, chNext, chNext2); // pass by ref
  551. have_string = true;
  552. }
  553. if (!have_string) {
  554. styler.ColourTo(i, SCE_RB_OPERATOR);
  555. // stay in default
  556. preferRE = true;
  557. }
  558. } else if (isoperator(ch)) {
  559. styler.ColourTo(i - 1, state);
  560. styler.ColourTo(i, SCE_RB_OPERATOR);
  561. // If we're ending an expression or block,
  562. // assume it ends an object, and the ambivalent
  563. // constructs are binary operators
  564. //
  565. // So if we don't have one of these chars,
  566. // we aren't ending an object exp'n, and ops
  567. // like : << / are unary operators.
  568. preferRE = (strchr(")}]", ch) == NULL);
  569. // Stay in default state
  570. } else if (isEOLChar(ch)) {
  571. // Make sure it's a true line-end, with no backslash
  572. if ((ch == '\r' || (ch == '\n' && chPrev != '\r'))
  573. && chPrev != '\\') {
  574. // Assume we've hit the end of the statement.
  575. preferRE = true;
  576. }
  577. }
  578. } else if (state == SCE_RB_WORD) {
  579. if (ch == '.' || !iswordchar(ch)) {
  580. // Words include x? in all contexts,
  581. // and <letters>= after either 'def' or a dot
  582. // Move along until a complete word is on our left
  583. // Default accessor treats '.' as word-chars,
  584. // but we don't for now.
  585. if (ch == '='
  586. && iswordchar(chPrev)
  587. && (chNext == '('
  588. || strchr(" \t\n\r", chNext) != NULL)
  589. && (!strcmp(prevWord, "def")
  590. || followsDot(styler.GetStartSegment(), styler))) {
  591. // <name>= is a name only when being def'd -- Get it the next time
  592. // This means that <name>=<name> is always lexed as
  593. // <name>, (op, =), <name>
  594. } else if ((ch == '?' || ch == '!')
  595. && iswordchar(chPrev)
  596. && !iswordchar(chNext)) {
  597. // <name>? is a name -- Get it the next time
  598. // But <name>?<name> is always lexed as
  599. // <name>, (op, ?), <name>
  600. // Same with <name>! to indicate a method that
  601. // modifies its target
  602. } else if (isEOLChar(ch)
  603. && isMatch(styler, lengthDoc, i - 7, "__END__")) {
  604. styler.ColourTo(i, SCE_RB_DATASECTION);
  605. state = SCE_RB_DATASECTION;
  606. // No need to handle this state -- we'll just move to the end
  607. preferRE = false;
  608. } else {
  609. int wordStartPos = styler.GetStartSegment();
  610. int word_style = ClassifyWordRb(wordStartPos, i - 1, keywords, styler, prevWord);
  611. switch (word_style) {
  612. case SCE_RB_WORD:
  613. preferRE = RE_CanFollowKeyword(prevWord);
  614. break;
  615. case SCE_RB_WORD_DEMOTED:
  616. preferRE = true;
  617. break;
  618. case SCE_RB_IDENTIFIER:
  619. if (isMatch(styler, lengthDoc, wordStartPos, "print")) {
  620. preferRE = true;
  621. } else if (isEOLChar(ch)) {
  622. preferRE = true;
  623. } else {
  624. preferRE = false;
  625. }
  626. break;
  627. default:
  628. preferRE = false;
  629. }
  630. redo_char(i, ch, chNext, chNext2, state); // pass by ref
  631. }
  632. }
  633. } else if (state == SCE_RB_NUMBER) {
  634. if (isalnum(ch) || ch == '_') {
  635. // Keep going
  636. } else if (ch == '.' && ++numDots == 1) {
  637. // Keep going
  638. } else {
  639. styler.ColourTo(i - 1, state);
  640. redo_char(i, ch, chNext, chNext2, state); // pass by ref
  641. preferRE = false;
  642. }
  643. } else if (state == SCE_RB_COMMENTLINE) {
  644. if (isEOLChar(ch)) {
  645. styler.ColourTo(i - 1, state);
  646. state = SCE_RB_DEFAULT;
  647. // Use whatever setting we had going into the comment
  648. }
  649. } else if (state == SCE_RB_HERE_DELIM) {
  650. // See the comment for SCE_RB_HERE_DELIM in LexPerl.cxx
  651. // Slightly different: if we find an immediate '-',
  652. // the target can appear indented.
  653. if (HereDoc.State == 0) { // '<<' encountered
  654. HereDoc.State = 1;
  655. HereDoc.DelimiterLength = 0;
  656. if (ch == '-') {
  657. HereDoc.CanBeIndented = true;
  658. advance_char(i, ch, chNext, chNext2); // pass by ref
  659. } else {
  660. HereDoc.CanBeIndented = false;
  661. }
  662. if (isEOLChar(ch)) {
  663. // Bail out of doing a here doc if there's no target
  664. state = SCE_RB_DEFAULT;
  665. preferRE = false;
  666. } else {
  667. HereDoc.Quote = ch;
  668. if (ch == '\'' || ch == '"' || ch == '`') {
  669. HereDoc.Quoted = true;
  670. HereDoc.Delimiter[0] = '\0';
  671. } else {
  672. HereDoc.Quoted = false;
  673. HereDoc.Delimiter[0] = ch;
  674. HereDoc.Delimiter[1] = '\0';
  675. HereDoc.DelimiterLength = 1;
  676. }
  677. }
  678. } else if (HereDoc.State == 1) { // collect the delimiter
  679. if (isEOLChar(ch)) {
  680. // End the quote now, and go back for more
  681. styler.ColourTo(i - 1, state);
  682. state = SCE_RB_DEFAULT;
  683. i--;
  684. chNext = ch;
  685. chNext2 = chNext;
  686. preferRE = false;
  687. } else if (HereDoc.Quoted) {
  688. if (ch == HereDoc.Quote) { // closing quote => end of delimiter
  689. styler.ColourTo(i, state);
  690. state = SCE_RB_DEFAULT;
  691. preferRE = false;
  692. } else {
  693. if (ch == '\\' && !isEOLChar(chNext)) {
  694. advance_char(i, ch, chNext, chNext2);
  695. }
  696. HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
  697. HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
  698. }
  699. } else { // an unquoted here-doc delimiter
  700. if (isalnum(ch) || ch == '_') {
  701. HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
  702. HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
  703. } else {
  704. styler.ColourTo(i - 1, state);
  705. redo_char(i, ch, chNext, chNext2, state);
  706. preferRE = false;
  707. }
  708. }
  709. if (HereDoc.DelimiterLength >= static_cast<int>(sizeof(HereDoc.Delimiter)) - 1) {
  710. styler.ColourTo(i - 1, state);
  711. state = SCE_RB_ERROR;
  712. preferRE = false;
  713. }
  714. }
  715. } else if (state == SCE_RB_HERE_Q) {
  716. // Not needed: HereDoc.State == 2
  717. // Indentable here docs: look backwards
  718. // Non-indentable: look forwards, like in Perl
  719. //
  720. // Why: so we can quickly resolve things like <<-" abc"
  721. if (!HereDoc.CanBeIndented) {
  722. if (isEOLChar(chPrev)
  723. && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
  724. styler.ColourTo(i - 1, state);
  725. i += HereDoc.DelimiterLength - 1;
  726. chNext = styler.SafeGetCharAt(i + 1);
  727. if (isEOLChar(chNext)) {
  728. styler.ColourTo(i, SCE_RB_HERE_DELIM);
  729. state = SCE_RB_DEFAULT;
  730. HereDoc.State = 0;
  731. preferRE = false;
  732. }
  733. // Otherwise we skipped through the here doc faster.
  734. }
  735. } else if (isEOLChar(chNext)
  736. && lookingAtHereDocDelim(styler,
  737. i - HereDoc.DelimiterLength + 1,
  738. lengthDoc,
  739. HereDoc.Delimiter)) {
  740. styler.ColourTo(i - 1 - HereDoc.DelimiterLength, state);
  741. styler.ColourTo(i, SCE_RB_HERE_DELIM);
  742. state = SCE_RB_DEFAULT;
  743. preferRE = false;
  744. HereDoc.State = 0;
  745. }
  746. } else if (state == SCE_RB_CLASS_VAR
  747. || state == SCE_RB_INSTANCE_VAR
  748. || state == SCE_RB_SYMBOL) {
  749. if (!iswordchar(ch)) {
  750. styler.ColourTo(i - 1, state);
  751. redo_char(i, ch, chNext, chNext2, state); // pass by ref
  752. preferRE = false;
  753. }
  754. } else if (state == SCE_RB_GLOBAL) {
  755. if (!iswordchar(ch)) {
  756. // handle special globals here as well
  757. if (chPrev == '$') {
  758. if (ch == '-') {
  759. // Include the next char, like $-a
  760. advance_char(i, ch, chNext, chNext2);
  761. }
  762. styler.ColourTo(i, state);
  763. state = SCE_RB_DEFAULT;
  764. } else {
  765. styler.ColourTo(i - 1, state);
  766. redo_char(i, ch, chNext, chNext2, state); // pass by ref
  767. }
  768. preferRE = false;
  769. }
  770. } else if (state == SCE_RB_POD) {
  771. // PODs end with ^=end\s, -- any whitespace can follow =end
  772. if (strchr(" \t\n\r", ch) != NULL
  773. && i > 5
  774. && isEOLChar(styler[i - 5])
  775. && isMatch(styler, lengthDoc, i - 4, "=end")) {
  776. styler.ColourTo(i - 1, state);
  777. state = SCE_RB_DEFAULT;
  778. preferRE = false;
  779. }
  780. } else if (state == SCE_RB_REGEX || state == SCE_RB_STRING_QR) {
  781. if (ch == '\\' && Quote.Up != '\\') {
  782. // Skip one
  783. advance_char(i, ch, chNext, chNext2);
  784. } else if (ch == Quote.Down) {
  785. Quote.Count--;
  786. if (Quote.Count == 0) {
  787. // Include the options
  788. while (isSafeAlpha(chNext)) {
  789. i++;
  790. ch = chNext;
  791. chNext = styler.SafeGetCharAt(i + 1);
  792. }
  793. styler.ColourTo(i, state);
  794. state = SCE_RB_DEFAULT;
  795. preferRE = false;
  796. }
  797. } else if (ch == Quote.Up) {
  798. // Only if close quoter != open quoter
  799. Quote.Count++;
  800. } else if (ch == '#' ) {
  801. //todo: distinguish comments from pound chars
  802. // for now, handle as comment
  803. styler.ColourTo(i - 1, state);
  804. bool inEscape = false;
  805. while (++i < lengthDoc) {
  806. ch = styler.SafeGetCharAt(i);
  807. if (ch == '\\') {
  808. inEscape = true;
  809. } else if (isEOLChar(ch)) {
  810. // Comment inside a regex
  811. styler.ColourTo(i - 1, SCE_RB_COMMENTLINE);
  812. break;
  813. } else if (inEscape) {
  814. inEscape = false; // don't look at char
  815. } else if (ch == Quote.Down) {
  816. // Have the regular handler deal with this
  817. // to get trailing modifiers.
  818. i--;
  819. ch = styler[i];
  820. break;
  821. }
  822. }
  823. chNext = styler.SafeGetCharAt(i + 1);
  824. chNext2 = styler.SafeGetCharAt(i + 2);
  825. }
  826. // Quotes of all kinds...
  827. } else if (state == SCE_RB_STRING_Q || state == SCE_RB_STRING_QQ ||
  828. state == SCE_RB_STRING_QX || state == SCE_RB_STRING_QW ||
  829. state == SCE_RB_STRING || state == SCE_RB_CHARACTER ||
  830. state == SCE_RB_BACKTICKS) {
  831. if (!Quote.Down && !isspacechar(ch)) {
  832. Quote.Open(ch);
  833. } else if (ch == '\\' && Quote.Up != '\\') {
  834. //Riddle me this: Is it safe to skip *every* escaped char?
  835. advance_char(i, ch, chNext, chNext2);
  836. } else if (ch == Quote.Down) {
  837. Quote.Count--;
  838. if (Quote.Count == 0) {
  839. styler.ColourTo(i, state);
  840. state = SCE_RB_DEFAULT;
  841. preferRE = false;
  842. }
  843. } else if (ch == Quote.Up) {
  844. Quote.Count++;
  845. }
  846. }
  847. if (state == SCE_RB_ERROR) {
  848. break;
  849. }
  850. chPrev = ch;
  851. }
  852. if (state == SCE_RB_WORD) {
  853. // We've ended on a word, possibly at EOF, and need to
  854. // classify it.
  855. (void) ClassifyWordRb(styler.GetStartSegment(), lengthDoc - 1, keywords, styler, prevWord);
  856. } else {
  857. styler.ColourTo(lengthDoc - 1, state);
  858. }
  859. }
  860. // Helper functions for folding
  861. static void getPrevWord(int pos,
  862. char *prevWord,
  863. Accessor &styler,
  864. int word_state)
  865. {
  866. int i;
  867. styler.Flush();
  868. for (i = pos - 1; i > 0; i--) {
  869. if (actual_style(styler.StyleAt(i)) != word_state) {
  870. i++;
  871. break;
  872. }
  873. }
  874. if (i < pos - MAX_KEYWORD_LENGTH) // overflow
  875. i = pos - MAX_KEYWORD_LENGTH;
  876. char *dst = prevWord;
  877. for (; i <= pos; i++) {
  878. *dst++ = styler[i];
  879. }
  880. *dst = 0;
  881. }
  882. static bool keywordIsAmbiguous(const char *prevWord)
  883. {
  884. // Order from most likely used to least likely
  885. // Lots of ways to do a loop in Ruby besides 'while/until'
  886. if (!strcmp(prevWord, "if")
  887. || !strcmp(prevWord, "do")
  888. || !strcmp(prevWord, "while")
  889. || !strcmp(prevWord, "unless")
  890. || !strcmp(prevWord, "until")) {
  891. return true;
  892. } else {
  893. return false;
  894. }
  895. }
  896. static bool inline iswhitespace(char ch) {
  897. return ch == ' ' || ch == '\t';
  898. }
  899. // Demote keywords in the following conditions:
  900. // if, while, unless, until modify a statement
  901. // do after a while or until, as a noise word (like then after if)
  902. static bool keywordIsModifier(const char *word,
  903. int pos,
  904. Accessor &styler)
  905. {
  906. if (word[0] == 'd' && word[1] == 'o' && !word[2]) {
  907. return keywordDoStartsLoop(pos, styler);
  908. }
  909. char ch;
  910. int style = SCE_RB_DEFAULT;
  911. int lineStart = styler.GetLine(pos);
  912. int lineStartPosn = styler.LineStart(lineStart);
  913. styler.Flush();
  914. while (--pos >= lineStartPosn) {
  915. style = actual_style(styler.StyleAt(pos));
  916. if (style == SCE_RB_DEFAULT) {
  917. if (iswhitespace(ch = styler[pos])) {
  918. //continue
  919. } else if (ch == '\r' || ch == '\n') {
  920. // Scintilla's LineStart() and GetLine() routines aren't
  921. // platform-independent, so if we have text prepared with
  922. // a different system we can't rely on it.
  923. return false;
  924. }
  925. } else {
  926. break;
  927. }
  928. }
  929. if (pos < lineStartPosn) {
  930. return false; //XXX not quite right if the prev line is a continuation
  931. }
  932. // First things where the action is unambiguous
  933. switch (style) {
  934. case SCE_RB_DEFAULT:
  935. case SCE_RB_COMMENTLINE:
  936. case SCE_RB_POD:
  937. case SCE_RB_CLASSNAME:
  938. case SCE_RB_DEFNAME:
  939. case SCE_RB_MODULE_NAME:
  940. return false;
  941. case SCE_RB_OPERATOR:
  942. break;
  943. case SCE_RB_WORD:
  944. // Watch out for uses of 'else if'
  945. //XXX: Make a list of other keywords where 'if' isn't a modifier
  946. // and can appear legitimately
  947. // Formulate this to avoid warnings from most compilers
  948. if (strcmp(word, "if") == 0) {
  949. char prevWord[MAX_KEYWORD_LENGTH + 1];
  950. getPrevWord(pos, prevWord, styler, SCE_RB_WORD);
  951. return strcmp(prevWord, "else") != 0;
  952. }
  953. return true;
  954. default:
  955. return true;
  956. }
  957. // Assume that if the keyword follows an operator,
  958. // usually it's a block assignment, like
  959. // a << if x then y else z
  960. ch = styler[pos];
  961. switch (ch) {
  962. case ')':
  963. case ']':
  964. case '}':
  965. return true;
  966. default:
  967. return false;
  968. }
  969. }
  970. #define WHILE_BACKWARDS "elihw"
  971. #define UNTIL_BACKWARDS "litnu"
  972. // Nothing fancy -- look to see if we follow a while/until somewhere
  973. // on the current line
  974. static bool keywordDoStartsLoop(int pos,
  975. Accessor &styler)
  976. {
  977. char ch;
  978. int style;
  979. int lineStart = styler.GetLine(pos);
  980. int lineStartPosn = styler.LineStart(lineStart);
  981. styler.Flush();
  982. while (--pos >= lineStartPosn) {
  983. style = actual_style(styler.StyleAt(pos));
  984. if (style == SCE_RB_DEFAULT) {
  985. if ((ch = styler[pos]) == '\r' || ch == '\n') {
  986. // Scintilla's LineStart() and GetLine() routines aren't
  987. // platform-independent, so if we have text prepared with
  988. // a different system we can't rely on it.
  989. return false;
  990. }
  991. } else if (style == SCE_RB_WORD) {
  992. // Check for while or until, but write the word in backwards
  993. char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
  994. char *dst = prevWord;
  995. int wordLen = 0;
  996. int start_word;
  997. for (start_word = pos;
  998. start_word >= lineStartPosn && actual_style(styler.StyleAt(start_word)) == SCE_RB_WORD;
  999. start_word--) {
  1000. if (++wordLen < MAX_KEYWORD_LENGTH) {
  1001. *dst++ = styler[start_word];
  1002. }
  1003. }
  1004. *dst = 0;
  1005. // Did we see our keyword?
  1006. if (!strcmp(prevWord, WHILE_BACKWARDS)
  1007. || !strcmp(prevWord, UNTIL_BACKWARDS)) {
  1008. return true;
  1009. }
  1010. // We can move pos to the beginning of the keyword, and then
  1011. // accept another decrement, as we can never have two contiguous
  1012. // keywords:
  1013. // word1 word2
  1014. // ^
  1015. // <- move to start_word
  1016. // ^
  1017. // <- loop decrement
  1018. // ^ # pointing to end of word1 is fine
  1019. pos = start_word;
  1020. }
  1021. }
  1022. return false;
  1023. }
  1024. /*
  1025. * Folding Ruby
  1026. *
  1027. * The language is quite complex to analyze without a full parse.
  1028. * For example, this line shouldn't affect fold level:
  1029. *
  1030. * print "hello" if feeling_friendly?
  1031. *
  1032. * Neither should this:
  1033. *
  1034. * print "hello" \
  1035. * if feeling_friendly?
  1036. *
  1037. *
  1038. * But this should:
  1039. *
  1040. * if feeling_friendly? #++
  1041. * print "hello" \
  1042. * print "goodbye"
  1043. * end #--
  1044. *
  1045. * So we cheat, by actually looking at the existing indentation
  1046. * levels for each line, and just echoing it back. Like Python.
  1047. * Then if we get better at it, we'll take braces into consideration,
  1048. * which always affect folding levels.
  1049. * How the keywords should work:
  1050. * No effect:
  1051. * __FILE__ __LINE__ BEGIN END alias and
  1052. * defined? false in nil not or self super then
  1053. * true undef
  1054. * Always increment:
  1055. * begin class def do for module when {
  1056. *
  1057. * Always decrement:
  1058. * end }
  1059. *
  1060. * Increment if these start a statement
  1061. * if unless until while -- do nothing if they're modifiers
  1062. * These end a block if there's no modifier, but don't bother
  1063. * break next redo retry return yield
  1064. *
  1065. * These temporarily de-indent, but re-indent
  1066. * case else elsif ensure rescue
  1067. *
  1068. * This means that the folder reflects indentation rather
  1069. * than setting it. The language-service updates indentation
  1070. * when users type return and finishes entering de-denters.
  1071. *
  1072. * Later offer to fold POD, here-docs, strings, and blocks of comments
  1073. */
  1074. static void FoldRbDoc(unsigned int startPos, int length, int initStyle,
  1075. WordList *[], Accessor &styler) {
  1076. const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  1077. bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
  1078. synchronizeDocStart(startPos, length, initStyle, styler, // ref args
  1079. false);
  1080. unsigned int endPos = startPos + length;
  1081. int visibleChars = 0;
  1082. int lineCurrent = styler.GetLine(startPos);
  1083. int levelPrev = startPos == 0 ? 0 : (styler.LevelAt(lineCurrent)
  1084. & SC_FOLDLEVELNUMBERMASK
  1085. & ~SC_FOLDLEVELBASE);
  1086. int levelCurrent = levelPrev;
  1087. char chNext = styler[startPos];
  1088. int styleNext = styler.StyleAt(startPos);
  1089. int stylePrev = startPos <= 1 ? SCE_RB_DEFAULT : styler.StyleAt(startPos - 1);
  1090. bool buffer_ends_with_eol = false;
  1091. for (unsigned int i = startPos; i < endPos; i++) {
  1092. char ch = chNext;
  1093. chNext = styler.SafeGetCharAt(i + 1);
  1094. int style = styleNext;
  1095. styleNext = styler.StyleAt(i + 1);
  1096. bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
  1097. if (style == SCE_RB_COMMENTLINE) {
  1098. if (foldComment && stylePrev != SCE_RB_COMMENTLINE) {
  1099. if (chNext == '{') {
  1100. levelCurrent++;
  1101. } else if (chNext == '}') {
  1102. levelCurrent--;
  1103. }
  1104. }
  1105. } else if (style == SCE_RB_OPERATOR) {
  1106. if (strchr("[{(", ch)) {
  1107. levelCurrent++;
  1108. } else if (strchr(")}]", ch)) {
  1109. // Don't decrement below 0
  1110. if (levelCurrent > 0)
  1111. levelCurrent--;
  1112. }
  1113. } else if (style == SCE_RB_WORD && styleNext != SCE_RB_WORD) {
  1114. // Look at the keyword on the left and decide what to do
  1115. char prevWord[MAX_KEYWORD_LENGTH + 1]; // 1 byte for zero
  1116. prevWord[0] = 0;
  1117. getPrevWord(i, prevWord, styler, SCE_RB_WORD);
  1118. if (!strcmp(prevWord, "end")) {
  1119. // Don't decrement below 0
  1120. if (levelCurrent > 0)
  1121. levelCurrent--;
  1122. } else if ( !strcmp(prevWord, "if")
  1123. || !strcmp(prevWord, "def")
  1124. || !strcmp(prevWord, "class")
  1125. || !strcmp(prevWord, "module")
  1126. || !strcmp(prevWord, "begin")
  1127. || !strcmp(prevWord, "case")
  1128. || !strcmp(prevWord, "do")
  1129. || !strcmp(prevWord, "while")
  1130. || !strcmp(prevWord, "unless")
  1131. || !strcmp(prevWord, "until")
  1132. || !strcmp(prevWord, "for")
  1133. ) {
  1134. levelCurrent++;
  1135. }
  1136. }
  1137. if (atEOL) {
  1138. int lev = levelPrev;
  1139. if (visibleChars == 0 && foldCompact)
  1140. lev |= SC_FOLDLEVELWHITEFLAG;
  1141. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  1142. lev |= SC_FOLDLEVELHEADERFLAG;
  1143. styler.SetLevel(lineCurrent, lev|SC_FOLDLEVELBASE);
  1144. lineCurrent++;
  1145. levelPrev = levelCurrent;
  1146. visibleChars = 0;
  1147. buffer_ends_with_eol = true;
  1148. } else if (!isspacechar(ch)) {
  1149. visibleChars++;
  1150. buffer_ends_with_eol = false;
  1151. }
  1152. }
  1153. // Fill in the real level of the next line, keeping the current flags as they will be filled in later
  1154. if (!buffer_ends_with_eol) {
  1155. lineCurrent++;
  1156. int new_lev = levelCurrent;
  1157. if (visibleChars == 0 && foldCompact)
  1158. new_lev |= SC_FOLDLEVELWHITEFLAG;
  1159. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  1160. new_lev |= SC_FOLDLEVELHEADERFLAG;
  1161. levelCurrent = new_lev;
  1162. }
  1163. styler.SetLevel(lineCurrent, levelCurrent|SC_FOLDLEVELBASE);
  1164. }
  1165. static const char * const rubyWordListDesc[] = {
  1166. "Keywords",
  1167. 0
  1168. };
  1169. LexerModule lmRuby(SCLEX_RUBY, ColouriseRbDoc, "ruby", FoldRbDoc, rubyWordListDesc);