/pnwtl/third_party/scintilla/lexers/LexErlang.cxx

https://code.google.com/p/pnotepad/ · C++ · 624 lines · 512 code · 70 blank · 42 comment · 247 complexity · 263bd19e699b31a6dda2cac6c3229b25 MD5 · raw file

  1. // Scintilla source code edit control
  2. // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
  3. // The License.txt file describes the conditions under which this software may be distributed.
  4. /** @file LexErlang.cxx
  5. ** Lexer for Erlang.
  6. ** Enhanced by Etienne 'Lenain' Girondel (lenaing@gmail.com)
  7. ** Originally wrote by Peter-Henry Mander,
  8. ** based on Matlab lexer by José Fonseca.
  9. **/
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <stdio.h>
  13. #include <stdarg.h>
  14. #include <assert.h>
  15. #include <ctype.h>
  16. #include "ILexer.h"
  17. #include "Scintilla.h"
  18. #include "SciLexer.h"
  19. #include "PropSetSimple.h"
  20. #include "WordList.h"
  21. #include "LexAccessor.h"
  22. #include "Accessor.h"
  23. #include "StyleContext.h"
  24. #include "CharacterSet.h"
  25. #include "LexerModule.h"
  26. #ifdef SCI_NAMESPACE
  27. using namespace Scintilla;
  28. #endif
  29. static int is_radix(int radix, int ch) {
  30. int digit;
  31. if (36 < radix || 2 > radix)
  32. return 0;
  33. if (isdigit(ch)) {
  34. digit = ch - '0';
  35. } else if (isalnum(ch)) {
  36. digit = toupper(ch) - 'A' + 10;
  37. } else {
  38. return 0;
  39. }
  40. return (digit < radix);
  41. }
  42. typedef enum {
  43. STATE_NULL,
  44. COMMENT,
  45. COMMENT_FUNCTION,
  46. COMMENT_MODULE,
  47. COMMENT_DOC,
  48. COMMENT_DOC_MACRO,
  49. ATOM_UNQUOTED,
  50. ATOM_QUOTED,
  51. NODE_NAME_UNQUOTED,
  52. NODE_NAME_QUOTED,
  53. MACRO_START,
  54. MACRO_UNQUOTED,
  55. MACRO_QUOTED,
  56. RECORD_START,
  57. RECORD_UNQUOTED,
  58. RECORD_QUOTED,
  59. NUMERAL_START,
  60. NUMERAL_BASE_VALUE,
  61. NUMERAL_FLOAT,
  62. NUMERAL_EXPONENT,
  63. PREPROCESSOR
  64. } atom_parse_state_t;
  65. static inline bool IsAWordChar(const int ch) {
  66. return (ch < 0x80) && (ch != ' ') && (isalnum(ch) || ch == '_');
  67. }
  68. static void ColouriseErlangDoc(unsigned int startPos, int length, int initStyle,
  69. WordList *keywordlists[], Accessor &styler) {
  70. StyleContext sc(startPos, length, initStyle, styler);
  71. WordList &reservedWords = *keywordlists[0];
  72. WordList &erlangBIFs = *keywordlists[1];
  73. WordList &erlangPreproc = *keywordlists[2];
  74. WordList &erlangModulesAtt = *keywordlists[3];
  75. WordList &erlangDoc = *keywordlists[4];
  76. WordList &erlangDocMacro = *keywordlists[5];
  77. int radix_digits = 0;
  78. int exponent_digits = 0;
  79. atom_parse_state_t parse_state = STATE_NULL;
  80. atom_parse_state_t old_parse_state = STATE_NULL;
  81. bool to_late_to_comment = false;
  82. char cur[100];
  83. int old_style = SCE_ERLANG_DEFAULT;
  84. styler.StartAt(startPos);
  85. for (; sc.More(); sc.Forward()) {
  86. int style = SCE_ERLANG_DEFAULT;
  87. if (STATE_NULL != parse_state) {
  88. switch (parse_state) {
  89. case STATE_NULL : sc.SetState(SCE_ERLANG_DEFAULT); break;
  90. /* COMMENTS ------------------------------------------------------*/
  91. case COMMENT : {
  92. if (sc.ch != '%') {
  93. to_late_to_comment = true;
  94. } else if (!to_late_to_comment && sc.ch == '%') {
  95. // Switch to comment level 2 (Function)
  96. sc.ChangeState(SCE_ERLANG_COMMENT_FUNCTION);
  97. old_style = SCE_ERLANG_COMMENT_FUNCTION;
  98. parse_state = COMMENT_FUNCTION;
  99. sc.Forward();
  100. }
  101. }
  102. // V--- Falling through!
  103. case COMMENT_FUNCTION : {
  104. if (sc.ch != '%') {
  105. to_late_to_comment = true;
  106. } else if (!to_late_to_comment && sc.ch == '%') {
  107. // Switch to comment level 3 (Module)
  108. sc.ChangeState(SCE_ERLANG_COMMENT_MODULE);
  109. old_style = SCE_ERLANG_COMMENT_MODULE;
  110. parse_state = COMMENT_MODULE;
  111. sc.Forward();
  112. }
  113. }
  114. // V--- Falling through!
  115. case COMMENT_MODULE : {
  116. if (parse_state != COMMENT) {
  117. // Search for comment documentation
  118. if (sc.chNext == '@') {
  119. old_parse_state = parse_state;
  120. parse_state = ('{' == sc.ch)
  121. ? COMMENT_DOC_MACRO
  122. : COMMENT_DOC;
  123. sc.ForwardSetState(sc.state);
  124. }
  125. }
  126. // All comments types fall here.
  127. if (sc.atLineEnd) {
  128. to_late_to_comment = false;
  129. sc.SetState(SCE_ERLANG_DEFAULT);
  130. parse_state = STATE_NULL;
  131. }
  132. } break;
  133. case COMMENT_DOC :
  134. // V--- Falling through!
  135. case COMMENT_DOC_MACRO : {
  136. if (!isalnum(sc.ch)) {
  137. // Try to match documentation comment
  138. sc.GetCurrent(cur, sizeof(cur));
  139. if (parse_state == COMMENT_DOC_MACRO
  140. && erlangDocMacro.InList(cur)) {
  141. sc.ChangeState(SCE_ERLANG_COMMENT_DOC_MACRO);
  142. while (sc.ch != '}' && !sc.atLineEnd)
  143. sc.Forward();
  144. } else if (erlangDoc.InList(cur)) {
  145. sc.ChangeState(SCE_ERLANG_COMMENT_DOC);
  146. } else {
  147. sc.ChangeState(old_style);
  148. }
  149. // Switch back to old state
  150. sc.SetState(old_style);
  151. parse_state = old_parse_state;
  152. }
  153. if (sc.atLineEnd) {
  154. to_late_to_comment = false;
  155. sc.ChangeState(old_style);
  156. sc.SetState(SCE_ERLANG_DEFAULT);
  157. parse_state = STATE_NULL;
  158. }
  159. } break;
  160. /* -------------------------------------------------------------- */
  161. /* Atoms ---------------------------------------------------------*/
  162. case ATOM_UNQUOTED : {
  163. if ('@' == sc.ch){
  164. parse_state = NODE_NAME_UNQUOTED;
  165. } else if (sc.ch == ':') {
  166. // Searching for module name
  167. if (sc.chNext == ' ') {
  168. // error
  169. sc.ChangeState(SCE_ERLANG_UNKNOWN);
  170. parse_state = STATE_NULL;
  171. } else {
  172. sc.Forward();
  173. if (isalnum(sc.ch)) {
  174. sc.GetCurrent(cur, sizeof(cur));
  175. sc.ChangeState(SCE_ERLANG_MODULES);
  176. sc.SetState(SCE_ERLANG_MODULES);
  177. }
  178. }
  179. } else if (!IsAWordChar(sc.ch)) {
  180. sc.GetCurrent(cur, sizeof(cur));
  181. if (reservedWords.InList(cur)) {
  182. style = SCE_ERLANG_KEYWORD;
  183. } else if (erlangBIFs.InList(cur)
  184. && strcmp(cur,"erlang:")){
  185. style = SCE_ERLANG_BIFS;
  186. } else if (sc.ch == '(' || '/' == sc.ch){
  187. style = SCE_ERLANG_FUNCTION_NAME;
  188. } else {
  189. style = SCE_ERLANG_ATOM;
  190. }
  191. sc.ChangeState(style);
  192. sc.SetState(SCE_ERLANG_DEFAULT);
  193. parse_state = STATE_NULL;
  194. }
  195. } break;
  196. case ATOM_QUOTED : {
  197. if ( '@' == sc.ch ){
  198. parse_state = NODE_NAME_QUOTED;
  199. } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
  200. sc.ChangeState(SCE_ERLANG_ATOM);
  201. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  202. parse_state = STATE_NULL;
  203. }
  204. } break;
  205. /* -------------------------------------------------------------- */
  206. /* Node names ----------------------------------------------------*/
  207. case NODE_NAME_UNQUOTED : {
  208. if ('@' == sc.ch) {
  209. sc.SetState(SCE_ERLANG_DEFAULT);
  210. parse_state = STATE_NULL;
  211. } else if (!IsAWordChar(sc.ch)) {
  212. sc.ChangeState(SCE_ERLANG_NODE_NAME);
  213. sc.SetState(SCE_ERLANG_DEFAULT);
  214. parse_state = STATE_NULL;
  215. }
  216. } break;
  217. case NODE_NAME_QUOTED : {
  218. if ('@' == sc.ch) {
  219. sc.SetState(SCE_ERLANG_DEFAULT);
  220. parse_state = STATE_NULL;
  221. } else if ('\'' == sc.ch && '\\' != sc.chPrev) {
  222. sc.ChangeState(SCE_ERLANG_NODE_NAME_QUOTED);
  223. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  224. parse_state = STATE_NULL;
  225. }
  226. } break;
  227. /* -------------------------------------------------------------- */
  228. /* Records -------------------------------------------------------*/
  229. case RECORD_START : {
  230. if ('\'' == sc.ch) {
  231. parse_state = RECORD_QUOTED;
  232. } else if (isalpha(sc.ch) && islower(sc.ch)) {
  233. parse_state = RECORD_UNQUOTED;
  234. } else { // error
  235. sc.SetState(SCE_ERLANG_DEFAULT);
  236. parse_state = STATE_NULL;
  237. }
  238. } break;
  239. case RECORD_UNQUOTED : {
  240. if (!IsAWordChar(sc.ch)) {
  241. sc.ChangeState(SCE_ERLANG_RECORD);
  242. sc.SetState(SCE_ERLANG_DEFAULT);
  243. parse_state = STATE_NULL;
  244. }
  245. } break;
  246. case RECORD_QUOTED : {
  247. if ('\'' == sc.ch && '\\' != sc.chPrev) {
  248. sc.ChangeState(SCE_ERLANG_RECORD_QUOTED);
  249. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  250. parse_state = STATE_NULL;
  251. }
  252. } break;
  253. /* -------------------------------------------------------------- */
  254. /* Macros --------------------------------------------------------*/
  255. case MACRO_START : {
  256. if ('\'' == sc.ch) {
  257. parse_state = MACRO_QUOTED;
  258. } else if (isalpha(sc.ch)) {
  259. parse_state = MACRO_UNQUOTED;
  260. } else { // error
  261. sc.SetState(SCE_ERLANG_DEFAULT);
  262. parse_state = STATE_NULL;
  263. }
  264. } break;
  265. case MACRO_UNQUOTED : {
  266. if (!IsAWordChar(sc.ch)) {
  267. sc.ChangeState(SCE_ERLANG_MACRO);
  268. sc.SetState(SCE_ERLANG_DEFAULT);
  269. parse_state = STATE_NULL;
  270. }
  271. } break;
  272. case MACRO_QUOTED : {
  273. if ('\'' == sc.ch && '\\' != sc.chPrev) {
  274. sc.ChangeState(SCE_ERLANG_MACRO_QUOTED);
  275. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  276. parse_state = STATE_NULL;
  277. }
  278. } break;
  279. /* -------------------------------------------------------------- */
  280. /* Numerics ------------------------------------------------------*/
  281. /* Simple integer */
  282. case NUMERAL_START : {
  283. if (isdigit(sc.ch)) {
  284. radix_digits *= 10;
  285. radix_digits += sc.ch - '0'; // Assuming ASCII here!
  286. } else if ('#' == sc.ch) {
  287. if (2 > radix_digits || 36 < radix_digits) {
  288. sc.SetState(SCE_ERLANG_DEFAULT);
  289. parse_state = STATE_NULL;
  290. } else {
  291. parse_state = NUMERAL_BASE_VALUE;
  292. }
  293. } else if ('.' == sc.ch && isdigit(sc.chNext)) {
  294. radix_digits = 0;
  295. parse_state = NUMERAL_FLOAT;
  296. } else if ('e' == sc.ch || 'E' == sc.ch) {
  297. exponent_digits = 0;
  298. parse_state = NUMERAL_EXPONENT;
  299. } else {
  300. radix_digits = 0;
  301. sc.ChangeState(SCE_ERLANG_NUMBER);
  302. sc.SetState(SCE_ERLANG_DEFAULT);
  303. parse_state = STATE_NULL;
  304. }
  305. } break;
  306. /* Integer in other base than 10 (x#yyy) */
  307. case NUMERAL_BASE_VALUE : {
  308. if (!is_radix(radix_digits,sc.ch)) {
  309. radix_digits = 0;
  310. if (!isalnum(sc.ch))
  311. sc.ChangeState(SCE_ERLANG_NUMBER);
  312. sc.SetState(SCE_ERLANG_DEFAULT);
  313. parse_state = STATE_NULL;
  314. }
  315. } break;
  316. /* Float (x.yyy) */
  317. case NUMERAL_FLOAT : {
  318. if ('e' == sc.ch || 'E' == sc.ch) {
  319. exponent_digits = 0;
  320. parse_state = NUMERAL_EXPONENT;
  321. } else if (!isdigit(sc.ch)) {
  322. sc.ChangeState(SCE_ERLANG_NUMBER);
  323. sc.SetState(SCE_ERLANG_DEFAULT);
  324. parse_state = STATE_NULL;
  325. }
  326. } break;
  327. /* Exponent, either integer or float (xEyy, x.yyEzzz) */
  328. case NUMERAL_EXPONENT : {
  329. if (('-' == sc.ch || '+' == sc.ch)
  330. && (isdigit(sc.chNext))) {
  331. sc.Forward();
  332. } else if (!isdigit(sc.ch)) {
  333. if (0 < exponent_digits)
  334. sc.ChangeState(SCE_ERLANG_NUMBER);
  335. sc.SetState(SCE_ERLANG_DEFAULT);
  336. parse_state = STATE_NULL;
  337. } else {
  338. ++exponent_digits;
  339. }
  340. } break;
  341. /* -------------------------------------------------------------- */
  342. /* Preprocessor --------------------------------------------------*/
  343. case PREPROCESSOR : {
  344. if (!IsAWordChar(sc.ch)) {
  345. sc.GetCurrent(cur, sizeof(cur));
  346. if (erlangPreproc.InList(cur)) {
  347. style = SCE_ERLANG_PREPROC;
  348. } else if (erlangModulesAtt.InList(cur)) {
  349. style = SCE_ERLANG_MODULES_ATT;
  350. }
  351. sc.ChangeState(style);
  352. sc.SetState(SCE_ERLANG_DEFAULT);
  353. parse_state = STATE_NULL;
  354. }
  355. } break;
  356. }
  357. } /* End of : STATE_NULL != parse_state */
  358. else
  359. {
  360. switch (sc.state) {
  361. case SCE_ERLANG_VARIABLE : {
  362. if (!IsAWordChar(sc.ch))
  363. sc.SetState(SCE_ERLANG_DEFAULT);
  364. } break;
  365. case SCE_ERLANG_STRING : {
  366. if (sc.ch == '\"' && sc.chPrev != '\\')
  367. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  368. } break;
  369. case SCE_ERLANG_COMMENT : {
  370. if (sc.atLineEnd)
  371. sc.SetState(SCE_ERLANG_DEFAULT);
  372. } break;
  373. case SCE_ERLANG_CHARACTER : {
  374. if (sc.chPrev == '\\') {
  375. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  376. } else if (sc.ch != '\\') {
  377. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  378. }
  379. } break;
  380. case SCE_ERLANG_OPERATOR : {
  381. if (sc.chPrev == '.') {
  382. if (sc.ch == '*' || sc.ch == '/' || sc.ch == '\\'
  383. || sc.ch == '^') {
  384. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  385. } else if (sc.ch == '\'') {
  386. sc.ForwardSetState(SCE_ERLANG_DEFAULT);
  387. } else {
  388. sc.SetState(SCE_ERLANG_DEFAULT);
  389. }
  390. } else {
  391. sc.SetState(SCE_ERLANG_DEFAULT);
  392. }
  393. } break;
  394. }
  395. }
  396. if (sc.state == SCE_ERLANG_DEFAULT) {
  397. bool no_new_state = false;
  398. switch (sc.ch) {
  399. case '\"' : sc.SetState(SCE_ERLANG_STRING); break;
  400. case '$' : sc.SetState(SCE_ERLANG_CHARACTER); break;
  401. case '%' : {
  402. parse_state = COMMENT;
  403. sc.SetState(SCE_ERLANG_COMMENT);
  404. } break;
  405. case '#' : {
  406. parse_state = RECORD_START;
  407. sc.SetState(SCE_ERLANG_UNKNOWN);
  408. } break;
  409. case '?' : {
  410. parse_state = MACRO_START;
  411. sc.SetState(SCE_ERLANG_UNKNOWN);
  412. } break;
  413. case '\'' : {
  414. parse_state = ATOM_QUOTED;
  415. sc.SetState(SCE_ERLANG_UNKNOWN);
  416. } break;
  417. case '+' :
  418. case '-' : {
  419. if (IsADigit(sc.chNext)) {
  420. parse_state = NUMERAL_START;
  421. radix_digits = 0;
  422. sc.SetState(SCE_ERLANG_UNKNOWN);
  423. } else if (sc.ch != '+') {
  424. parse_state = PREPROCESSOR;
  425. sc.SetState(SCE_ERLANG_UNKNOWN);
  426. }
  427. } break;
  428. default : no_new_state = true;
  429. }
  430. if (no_new_state) {
  431. if (isdigit(sc.ch)) {
  432. parse_state = NUMERAL_START;
  433. radix_digits = sc.ch - '0';
  434. sc.SetState(SCE_ERLANG_UNKNOWN);
  435. } else if (isupper(sc.ch) || '_' == sc.ch) {
  436. sc.SetState(SCE_ERLANG_VARIABLE);
  437. } else if (isalpha(sc.ch)) {
  438. parse_state = ATOM_UNQUOTED;
  439. sc.SetState(SCE_ERLANG_UNKNOWN);
  440. } else if (isoperator(static_cast<char>(sc.ch))
  441. || sc.ch == '\\') {
  442. sc.SetState(SCE_ERLANG_OPERATOR);
  443. }
  444. }
  445. }
  446. }
  447. sc.Complete();
  448. }
  449. static int ClassifyErlangFoldPoint(
  450. Accessor &styler,
  451. int styleNext,
  452. int keyword_start
  453. ) {
  454. int lev = 0;
  455. if (styler.Match(keyword_start,"case")
  456. || (
  457. styler.Match(keyword_start,"fun")
  458. && (SCE_ERLANG_FUNCTION_NAME != styleNext)
  459. )
  460. || styler.Match(keyword_start,"if")
  461. || styler.Match(keyword_start,"query")
  462. || styler.Match(keyword_start,"receive")
  463. ) {
  464. ++lev;
  465. } else if (styler.Match(keyword_start,"end")) {
  466. --lev;
  467. }
  468. return lev;
  469. }
  470. static void FoldErlangDoc(
  471. unsigned int startPos, int length, int initStyle,
  472. WordList** /*keywordlists*/, Accessor &styler
  473. ) {
  474. unsigned int endPos = startPos + length;
  475. int currentLine = styler.GetLine(startPos);
  476. int lev;
  477. int previousLevel = styler.LevelAt(currentLine) & SC_FOLDLEVELNUMBERMASK;
  478. int currentLevel = previousLevel;
  479. int styleNext = styler.StyleAt(startPos);
  480. int style = initStyle;
  481. int stylePrev;
  482. int keyword_start = 0;
  483. char ch;
  484. char chNext = styler.SafeGetCharAt(startPos);
  485. bool atEOL;
  486. for (unsigned int i = startPos; i < endPos; i++) {
  487. ch = chNext;
  488. chNext = styler.SafeGetCharAt(i + 1);
  489. // Get styles
  490. stylePrev = style;
  491. style = styleNext;
  492. styleNext = styler.StyleAt(i + 1);
  493. atEOL = ((ch == '\r') && (chNext != '\n')) || (ch == '\n');
  494. if (stylePrev != SCE_ERLANG_KEYWORD
  495. && style == SCE_ERLANG_KEYWORD) {
  496. keyword_start = i;
  497. }
  498. // Fold on keywords
  499. if (stylePrev == SCE_ERLANG_KEYWORD
  500. && style != SCE_ERLANG_KEYWORD
  501. && style != SCE_ERLANG_ATOM
  502. ) {
  503. currentLevel += ClassifyErlangFoldPoint(styler,
  504. styleNext,
  505. keyword_start);
  506. }
  507. // Fold on comments
  508. if (style == SCE_ERLANG_COMMENT
  509. || style == SCE_ERLANG_COMMENT_MODULE
  510. || style == SCE_ERLANG_COMMENT_FUNCTION) {
  511. if (ch == '%' && chNext == '{') {
  512. currentLevel++;
  513. } else if (ch == '%' && chNext == '}') {
  514. currentLevel--;
  515. }
  516. }
  517. // Fold on braces
  518. if (style == SCE_ERLANG_OPERATOR) {
  519. if (ch == '{' || ch == '(' || ch == '[') {
  520. currentLevel++;
  521. } else if (ch == '}' || ch == ')' || ch == ']') {
  522. currentLevel--;
  523. }
  524. }
  525. if (atEOL) {
  526. lev = previousLevel;
  527. if (currentLevel > previousLevel)
  528. lev |= SC_FOLDLEVELHEADERFLAG;
  529. if (lev != styler.LevelAt(currentLine))
  530. styler.SetLevel(currentLine, lev);
  531. currentLine++;
  532. previousLevel = currentLevel;
  533. }
  534. }
  535. // Fill in the real level of the next line, keeping the current flags as they will be filled in later
  536. styler.SetLevel(currentLine,
  537. previousLevel
  538. | (styler.LevelAt(currentLine) & ~SC_FOLDLEVELNUMBERMASK));
  539. }
  540. static const char * const erlangWordListDesc[] = {
  541. "Erlang Reserved words",
  542. "Erlang BIFs",
  543. "Erlang Preprocessor",
  544. "Erlang Module Attributes",
  545. "Erlang Documentation",
  546. "Erlang Documentation Macro",
  547. 0
  548. };
  549. LexerModule lmErlang(
  550. SCLEX_ERLANG,
  551. ColouriseErlangDoc,
  552. "erlang",
  553. FoldErlangDoc,
  554. erlangWordListDesc);