PageRenderTime 62ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/LuaStudio/scintilla/src/LexPerl.cxx

http://starworld.googlecode.com/
C++ | 1392 lines | 1280 code | 38 blank | 74 comment | 448 complexity | a1235b614cd331447fb4e28198580b51 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
  1. // Scintilla source code edit control
  2. /** @file LexPerl.cxx
  3. ** Lexer for subset of Perl.
  4. **/
  5. // Copyright 1998-2008 by Neil Hodgson <neilh@scintilla.org>
  6. // Lexical analysis fixes by Kein-Hong Man <mkh@pl.jaring.my>
  7. // The License.txt file describes the conditions under which this software may be distributed.
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <ctype.h>
  11. #include <stdio.h>
  12. #include <stdarg.h>
  13. #include "Platform.h"
  14. #include "PropSet.h"
  15. #include "Accessor.h"
  16. #include "KeyWords.h"
  17. #include "Scintilla.h"
  18. #include "SciLexer.h"
  19. #ifdef SCI_NAMESPACE
  20. using namespace Scintilla;
  21. #endif
  22. #define PERLNUM_BINARY 1 // order is significant: 1-4 cannot have a dot
  23. #define PERLNUM_HEX 2
  24. #define PERLNUM_OCTAL 3
  25. #define PERLNUM_FLOAT 4 // actually exponent part
  26. #define PERLNUM_DECIMAL 5 // 1-5 are numbers; 6-7 are strings
  27. #define PERLNUM_VECTOR 6
  28. #define PERLNUM_V_VECTOR 7
  29. #define PERLNUM_BAD 8
  30. #define BACK_NONE 0 // lookback state for bareword disambiguation:
  31. #define BACK_OPERATOR 1 // whitespace/comments are insignificant
  32. #define BACK_KEYWORD 2 // operators/keywords are needed for disambiguation
  33. #define HERE_DELIM_MAX 256
  34. static inline bool isEOLChar(char ch) {
  35. return (ch == '\r') || (ch == '\n');
  36. }
  37. static bool isSingleCharOp(char ch) {
  38. char strCharSet[2];
  39. strCharSet[0] = ch;
  40. strCharSet[1] = '\0';
  41. return (NULL != strstr("rwxoRWXOezsfdlpSbctugkTBMAC", strCharSet));
  42. }
  43. static inline bool isPerlOperator(char ch) {
  44. if (ch == '^' || ch == '&' || ch == '\\' ||
  45. ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  46. ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  47. ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  48. ch == '>' || ch == ',' ||
  49. ch == '?' || ch == '!' || ch == '.' || ch == '~')
  50. return true;
  51. // these chars are already tested before this call
  52. // ch == '%' || ch == '*' || ch == '<' || ch == '/' ||
  53. return false;
  54. }
  55. static bool isPerlKeyword(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  56. char s[100];
  57. unsigned int i, len = end - start;
  58. if (len > 30) { len = 30; }
  59. for (i = 0; i < len; i++, start++) s[i] = styler[start];
  60. s[i] = '\0';
  61. return keywords.InList(s);
  62. }
  63. // Note: as lexer uses chars, UTF-8 bytes are considered as <0 values
  64. // Note: iswordchar() was used in only one place in LexPerl, it is
  65. // unnecessary as '.' is processed as the concatenation operator, so
  66. // only isWordStart() is used in LexPerl
  67. static inline bool isWordStart(char ch) {
  68. return !isascii(ch) || isalnum(ch) || ch == '_';
  69. }
  70. static inline bool isEndVar(char ch) {
  71. return isascii(ch) && !isalnum(ch) && ch != '#' && ch != '$' &&
  72. ch != '_' && ch != '\'';
  73. }
  74. static inline bool isNonQuote(char ch) {
  75. return !isascii(ch) || isalnum(ch) || ch == '_';
  76. }
  77. static inline char actualNumStyle(int numberStyle) {
  78. if (numberStyle == PERLNUM_VECTOR || numberStyle == PERLNUM_V_VECTOR) {
  79. return SCE_PL_STRING;
  80. } else if (numberStyle == PERLNUM_BAD) {
  81. return SCE_PL_ERROR;
  82. }
  83. return SCE_PL_NUMBER;
  84. }
  85. static bool isMatch(Accessor &styler, int lengthDoc, int pos, const char *val) {
  86. if ((pos + static_cast<int>(strlen(val))) >= lengthDoc) {
  87. return false;
  88. }
  89. while (*val) {
  90. if (*val != styler[pos++]) {
  91. return false;
  92. }
  93. val++;
  94. }
  95. return true;
  96. }
  97. static char opposite(char ch) {
  98. if (ch == '(')
  99. return ')';
  100. if (ch == '[')
  101. return ']';
  102. if (ch == '{')
  103. return '}';
  104. if (ch == '<')
  105. return '>';
  106. return ch;
  107. }
  108. static void ColourisePerlDoc(unsigned int startPos, int length, int initStyle,
  109. WordList *keywordlists[], Accessor &styler) {
  110. // Lexer for perl often has to backtrack to start of current style to determine
  111. // which characters are being used as quotes, how deeply nested is the
  112. // start position and what the termination string is for here documents
  113. WordList &keywords = *keywordlists[0];
  114. // keywords that forces /PATTERN/ at all times
  115. WordList reWords;
  116. reWords.Set("elsif if split while");
  117. class HereDocCls {
  118. public:
  119. int State; // 0: '<<' encountered
  120. // 1: collect the delimiter
  121. // 2: here doc text (lines after the delimiter)
  122. char Quote; // the char after '<<'
  123. bool Quoted; // true if Quote in ('\'','"','`')
  124. int DelimiterLength; // strlen(Delimiter)
  125. char *Delimiter; // the Delimiter, 256: sizeof PL_tokenbuf
  126. HereDocCls() {
  127. State = 0;
  128. Quote = 0;
  129. Quoted = false;
  130. DelimiterLength = 0;
  131. Delimiter = new char[HERE_DELIM_MAX];
  132. Delimiter[0] = '\0';
  133. }
  134. ~HereDocCls() {
  135. delete []Delimiter;
  136. }
  137. };
  138. HereDocCls HereDoc; // TODO: FIFO for stacked here-docs
  139. class QuoteCls {
  140. public:
  141. int Rep;
  142. int Count;
  143. char Up;
  144. char Down;
  145. QuoteCls() {
  146. this->New(1);
  147. }
  148. void New(int r) {
  149. Rep = r;
  150. Count = 0;
  151. Up = '\0';
  152. Down = '\0';
  153. }
  154. void Open(char u) {
  155. Count++;
  156. Up = u;
  157. Down = opposite(Up);
  158. }
  159. };
  160. QuoteCls Quote;
  161. int state = initStyle;
  162. char numState = PERLNUM_DECIMAL;
  163. int dotCount = 0;
  164. unsigned int lengthDoc = startPos + length;
  165. //int sookedpos = 0; // these have no apparent use, see POD state
  166. //char sooked[100];
  167. //sooked[sookedpos] = '\0';
  168. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  169. // If in a long distance lexical state, seek to the beginning to find quote characters
  170. // Perl strings can be multi-line with embedded newlines, so backtrack.
  171. // Perl numbers have additional state during lexing, so backtrack too.
  172. if (state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX) {
  173. while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_HERE_DELIM)) {
  174. startPos--;
  175. }
  176. startPos = styler.LineStart(styler.GetLine(startPos));
  177. state = styler.StyleAt(startPos - 1);
  178. }
  179. // Backtrack for format body.
  180. if (state == SCE_PL_FORMAT) {
  181. while ((startPos > 1) && (styler.StyleAt(startPos) != SCE_PL_FORMAT_IDENT)) {
  182. startPos--;
  183. }
  184. startPos = styler.LineStart(styler.GetLine(startPos));
  185. state = styler.StyleAt(startPos - 1);
  186. }
  187. if ( state == SCE_PL_STRING_Q
  188. || state == SCE_PL_STRING_QQ
  189. || state == SCE_PL_STRING_QX
  190. || state == SCE_PL_STRING_QR
  191. || state == SCE_PL_STRING_QW
  192. || state == SCE_PL_REGEX
  193. || state == SCE_PL_REGSUBST
  194. || state == SCE_PL_STRING
  195. || state == SCE_PL_BACKTICKS
  196. || state == SCE_PL_CHARACTER
  197. || state == SCE_PL_NUMBER
  198. || state == SCE_PL_IDENTIFIER
  199. || state == SCE_PL_ERROR
  200. || state == SCE_PL_SUB_PROTOTYPE
  201. ) {
  202. while ((startPos > 1) && (styler.StyleAt(startPos - 1) == state)) {
  203. startPos--;
  204. }
  205. state = SCE_PL_DEFAULT;
  206. }
  207. // lookback at start of lexing to set proper state for backflag
  208. // after this, they are updated when elements are lexed
  209. int backflag = BACK_NONE;
  210. unsigned int backPos = startPos;
  211. if (backPos > 0) {
  212. backPos--;
  213. int sty = SCE_PL_DEFAULT;
  214. while ((backPos > 0) && (sty = styler.StyleAt(backPos),
  215. sty == SCE_PL_DEFAULT || sty == SCE_PL_COMMENTLINE))
  216. backPos--;
  217. if (sty == SCE_PL_OPERATOR)
  218. backflag = BACK_OPERATOR;
  219. else if (sty == SCE_PL_WORD)
  220. backflag = BACK_KEYWORD;
  221. }
  222. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  223. char chPrev = styler.SafeGetCharAt(startPos - 1);
  224. if (startPos == 0)
  225. chPrev = '\n';
  226. char chNext = styler[startPos];
  227. styler.StartSegment(startPos);
  228. for (unsigned int i = startPos; i < lengthDoc; i++) {
  229. char ch = chNext;
  230. // if the current character is not consumed due to the completion of an
  231. // earlier style, lexing can be restarted via a simple goto
  232. restartLexer:
  233. chNext = styler.SafeGetCharAt(i + 1);
  234. char chNext2 = styler.SafeGetCharAt(i + 2);
  235. if (styler.IsLeadByte(ch)) {
  236. chNext = styler.SafeGetCharAt(i + 2);
  237. chPrev = ' ';
  238. i += 1;
  239. continue;
  240. }
  241. if ((chPrev == '\r' && ch == '\n')) { // skip on DOS/Windows
  242. styler.ColourTo(i, state);
  243. chPrev = ch;
  244. continue;
  245. }
  246. if (HereDoc.State == 1 && isEOLChar(ch)) {
  247. // Begin of here-doc (the line after the here-doc delimiter):
  248. // Lexically, the here-doc starts from the next line after the >>, but the
  249. // first line of here-doc seem to follow the style of the last EOL sequence
  250. HereDoc.State = 2;
  251. if (HereDoc.Quoted) {
  252. if (state == SCE_PL_HERE_DELIM) {
  253. // Missing quote at end of string! We are stricter than perl.
  254. // Colour here-doc anyway while marking this bit as an error.
  255. state = SCE_PL_ERROR;
  256. }
  257. styler.ColourTo(i - 1, state);
  258. switch (HereDoc.Quote) {
  259. case '\'':
  260. state = SCE_PL_HERE_Q ;
  261. break;
  262. case '"':
  263. state = SCE_PL_HERE_QQ;
  264. break;
  265. case '`':
  266. state = SCE_PL_HERE_QX;
  267. break;
  268. }
  269. } else {
  270. styler.ColourTo(i - 1, state);
  271. switch (HereDoc.Quote) {
  272. case '\\':
  273. state = SCE_PL_HERE_Q ;
  274. break;
  275. default :
  276. state = SCE_PL_HERE_QQ;
  277. }
  278. }
  279. }
  280. if (HereDoc.State == 4 && isEOLChar(ch)) {
  281. // Start of format body.
  282. HereDoc.State = 0;
  283. styler.ColourTo(i - 1, state);
  284. state = SCE_PL_FORMAT;
  285. }
  286. if (state == SCE_PL_DEFAULT) {
  287. if ((isascii(ch) && isdigit(ch)) || (isascii(chNext) && isdigit(chNext) &&
  288. (ch == '.' || ch == 'v'))) {
  289. state = SCE_PL_NUMBER;
  290. backflag = BACK_NONE;
  291. numState = PERLNUM_DECIMAL;
  292. dotCount = 0;
  293. if (ch == '0') { // hex,bin,octal
  294. if (chNext == 'x') {
  295. numState = PERLNUM_HEX;
  296. } else if (chNext == 'b') {
  297. numState = PERLNUM_BINARY;
  298. } else if (isascii(chNext) && isdigit(chNext)) {
  299. numState = PERLNUM_OCTAL;
  300. }
  301. if (numState != PERLNUM_DECIMAL) {
  302. i++;
  303. ch = chNext;
  304. chNext = chNext2;
  305. }
  306. } else if (ch == 'v') { // vector
  307. numState = PERLNUM_V_VECTOR;
  308. }
  309. } else if (isWordStart(ch)) {
  310. // if immediately prefixed by '::', always a bareword
  311. state = SCE_PL_WORD;
  312. if (chPrev == ':' && styler.SafeGetCharAt(i - 2) == ':') {
  313. state = SCE_PL_IDENTIFIER;
  314. }
  315. unsigned int kw = i + 1;
  316. // first check for possible quote-like delimiter
  317. if (ch == 's' && !isNonQuote(chNext)) {
  318. state = SCE_PL_REGSUBST;
  319. Quote.New(2);
  320. } else if (ch == 'm' && !isNonQuote(chNext)) {
  321. state = SCE_PL_REGEX;
  322. Quote.New(1);
  323. } else if (ch == 'q' && !isNonQuote(chNext)) {
  324. state = SCE_PL_STRING_Q;
  325. Quote.New(1);
  326. } else if (ch == 'y' && !isNonQuote(chNext)) {
  327. state = SCE_PL_REGSUBST;
  328. Quote.New(2);
  329. } else if (ch == 't' && chNext == 'r' && !isNonQuote(chNext2)) {
  330. state = SCE_PL_REGSUBST;
  331. Quote.New(2);
  332. kw++;
  333. } else if (ch == 'q' && (chNext == 'q' || chNext == 'r' || chNext == 'w' || chNext == 'x') && !isNonQuote(chNext2)) {
  334. if (chNext == 'q') state = SCE_PL_STRING_QQ;
  335. else if (chNext == 'x') state = SCE_PL_STRING_QX;
  336. else if (chNext == 'r') state = SCE_PL_STRING_QR;
  337. else if (chNext == 'w') state = SCE_PL_STRING_QW;
  338. Quote.New(1);
  339. kw++;
  340. } else if (ch == 'x' && (chNext == '=' || // repetition
  341. !isWordStart(chNext) ||
  342. (isdigit(chPrev) && isdigit(chNext)))) {
  343. state = SCE_PL_OPERATOR;
  344. }
  345. // if potentially a keyword, scan forward and grab word, then check
  346. // if it's really one; if yes, disambiguation test is performed
  347. // otherwise it is always a bareword and we skip a lot of scanning
  348. // note: keywords assumed to be limited to [_a-zA-Z] only
  349. if (state == SCE_PL_WORD) {
  350. while (isWordStart(styler.SafeGetCharAt(kw))) kw++;
  351. if (!isPerlKeyword(styler.GetStartSegment(), kw, keywords, styler)) {
  352. state = SCE_PL_IDENTIFIER;
  353. }
  354. }
  355. // if already SCE_PL_IDENTIFIER, then no ambiguity, skip this
  356. // for quote-like delimiters/keywords, attempt to disambiguate
  357. // to select for bareword, change state -> SCE_PL_IDENTIFIER
  358. if (state != SCE_PL_IDENTIFIER && i > 0) {
  359. unsigned int j = i;
  360. bool moreback = false; // true if passed newline/comments
  361. bool brace = false; // true if opening brace found
  362. char ch2;
  363. // first look backwards past whitespace/comments for EOLs
  364. // if BACK_NONE, neither operator nor keyword, so skip test
  365. if (backflag != BACK_NONE) {
  366. while (--j > backPos) {
  367. if (isEOLChar(styler.SafeGetCharAt(j)))
  368. moreback = true;
  369. }
  370. ch2 = styler.SafeGetCharAt(j);
  371. if (ch2 == '{' && !moreback) {
  372. // {bareword: possible variable spec
  373. brace = true;
  374. } else if ((ch2 == '&' && styler.SafeGetCharAt(j - 1) != '&')
  375. // &bareword: subroutine call
  376. || (ch2 == '>' && styler.SafeGetCharAt(j - 1) == '-')
  377. // ->bareword: part of variable spec
  378. || (ch2 == 'b' && styler.Match(j - 2, "su"))) {
  379. // sub bareword: subroutine declaration
  380. // (implied BACK_KEYWORD, no keywords end in 'sub'!)
  381. state = SCE_PL_IDENTIFIER;
  382. }
  383. // if status still ambiguous, look forward after word past
  384. // tabs/spaces only; if ch2 isn't one of '[{(,' it can never
  385. // match anything, so skip the whole thing
  386. j = kw;
  387. if (state != SCE_PL_IDENTIFIER
  388. && (ch2 == '{' || ch2 == '(' || ch2 == '['|| ch2 == ',')
  389. && kw < lengthDoc) {
  390. while (ch2 = styler.SafeGetCharAt(j),
  391. (ch2 == ' ' || ch2 == '\t') && j < lengthDoc) {
  392. j++;
  393. }
  394. if ((ch2 == '}' && brace)
  395. // {bareword}: variable spec
  396. || (ch2 == '=' && styler.SafeGetCharAt(j + 1) == '>')) {
  397. // [{(, bareword=>: hash literal
  398. state = SCE_PL_IDENTIFIER;
  399. }
  400. }
  401. }
  402. }
  403. backflag = BACK_NONE;
  404. // an identifier or bareword
  405. if (state == SCE_PL_IDENTIFIER) {
  406. if ((!isWordStart(chNext) && chNext != '\'')
  407. || (chNext == '.' && chNext2 == '.')) {
  408. // We need that if length of word == 1!
  409. // This test is copied from the SCE_PL_WORD handler.
  410. styler.ColourTo(i, SCE_PL_IDENTIFIER);
  411. state = SCE_PL_DEFAULT;
  412. }
  413. // a keyword
  414. } else if (state == SCE_PL_WORD) {
  415. i = kw - 1;
  416. if (ch == '_' && chNext == '_' &&
  417. (isMatch(styler, lengthDoc, styler.GetStartSegment(), "__DATA__")
  418. || isMatch(styler, lengthDoc, styler.GetStartSegment(), "__END__"))) {
  419. styler.ColourTo(i, SCE_PL_DATASECTION);
  420. state = SCE_PL_DATASECTION;
  421. } else {
  422. if (isMatch(styler, lengthDoc, styler.GetStartSegment(), "format")) {
  423. state = SCE_PL_FORMAT_IDENT;
  424. HereDoc.State = 0;
  425. } else {
  426. state = SCE_PL_DEFAULT;
  427. }
  428. styler.ColourTo(i, SCE_PL_WORD);
  429. backflag = BACK_KEYWORD;
  430. backPos = i;
  431. }
  432. ch = styler.SafeGetCharAt(i);
  433. chNext = styler.SafeGetCharAt(i + 1);
  434. // a repetition operator 'x'
  435. } else if (state == SCE_PL_OPERATOR) {
  436. state = SCE_PL_DEFAULT;
  437. goto handleOperator;
  438. // quote-like delimiter, skip one char if double-char delimiter
  439. } else {
  440. i = kw - 1;
  441. chNext = styler.SafeGetCharAt(i + 1);
  442. }
  443. } else if (ch == '#') {
  444. state = SCE_PL_COMMENTLINE;
  445. } else if (ch == '\"') {
  446. state = SCE_PL_STRING;
  447. Quote.New(1);
  448. Quote.Open(ch);
  449. backflag = BACK_NONE;
  450. } else if (ch == '\'') {
  451. if (chPrev == '&') {
  452. // Archaic call
  453. styler.ColourTo(i, state);
  454. } else {
  455. state = SCE_PL_CHARACTER;
  456. Quote.New(1);
  457. Quote.Open(ch);
  458. }
  459. backflag = BACK_NONE;
  460. } else if (ch == '`') {
  461. state = SCE_PL_BACKTICKS;
  462. Quote.New(1);
  463. Quote.Open(ch);
  464. backflag = BACK_NONE;
  465. } else if (ch == '$') {
  466. if ((chNext == '{') || isspacechar(chNext)) {
  467. styler.ColourTo(i, SCE_PL_SCALAR);
  468. } else {
  469. state = SCE_PL_SCALAR;
  470. if ((chNext == '`' && chNext2 == '`')
  471. || (chNext == ':' && chNext2 == ':')) {
  472. i += 2;
  473. ch = styler.SafeGetCharAt(i);
  474. chNext = styler.SafeGetCharAt(i + 1);
  475. } else {
  476. i++;
  477. ch = chNext;
  478. chNext = chNext2;
  479. }
  480. }
  481. backflag = BACK_NONE;
  482. } else if (ch == '@') {
  483. if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$'
  484. || chNext == '_' || chNext == '+' || chNext == '-') {
  485. state = SCE_PL_ARRAY;
  486. } else if (chNext == ':' && chNext2 == ':') {
  487. state = SCE_PL_ARRAY;
  488. i += 2;
  489. ch = styler.SafeGetCharAt(i);
  490. chNext = styler.SafeGetCharAt(i + 1);
  491. } else if (chNext != '{' && chNext != '[') {
  492. styler.ColourTo(i, SCE_PL_ARRAY);
  493. } else {
  494. styler.ColourTo(i, SCE_PL_ARRAY);
  495. }
  496. backflag = BACK_NONE;
  497. } else if (ch == '%') {
  498. backflag = BACK_NONE;
  499. if (!isascii(chNext) || isalpha(chNext) || chNext == '#' || chNext == '$'
  500. || chNext == '_' || chNext == '!' || chNext == '^') {
  501. state = SCE_PL_HASH;
  502. i++;
  503. ch = chNext;
  504. chNext = chNext2;
  505. } else if (chNext == ':' && chNext2 == ':') {
  506. state = SCE_PL_HASH;
  507. i += 2;
  508. ch = styler.SafeGetCharAt(i);
  509. chNext = styler.SafeGetCharAt(i + 1);
  510. } else if (chNext == '{') {
  511. styler.ColourTo(i, SCE_PL_HASH);
  512. } else {
  513. goto handleOperator;
  514. }
  515. } else if (ch == '*') {
  516. backflag = BACK_NONE;
  517. char strch[2];
  518. strch[0] = chNext;
  519. strch[1] = '\0';
  520. if (chNext == ':' && chNext2 == ':') {
  521. state = SCE_PL_SYMBOLTABLE;
  522. i += 2;
  523. ch = styler.SafeGetCharAt(i);
  524. chNext = styler.SafeGetCharAt(i + 1);
  525. } else if (!isascii(chNext) || isalpha(chNext) || chNext == '_'
  526. || NULL != strstr("^/|,\\\";#%^:?<>)[]", strch)) {
  527. state = SCE_PL_SYMBOLTABLE;
  528. i++;
  529. ch = chNext;
  530. chNext = chNext2;
  531. } else if (chNext == '{') {
  532. styler.ColourTo(i, SCE_PL_SYMBOLTABLE);
  533. } else {
  534. if (chNext == '*') { // exponentiation
  535. i++;
  536. ch = chNext;
  537. chNext = chNext2;
  538. }
  539. goto handleOperator;
  540. }
  541. } else if (ch == '/' || (ch == '<' && chNext == '<')) {
  542. // Explicit backward peeking to set a consistent preferRE for
  543. // any slash found, so no longer need to track preferRE state.
  544. // Find first previous significant lexed element and interpret.
  545. // Test for HERE doc start '<<' shares this code, helps to
  546. // determine if it should be an operator.
  547. bool preferRE = false;
  548. bool isHereDoc = (ch == '<');
  549. bool hereDocSpace = false; // these are for corner case:
  550. bool hereDocScalar = false; // SCALAR [whitespace] '<<'
  551. unsigned int bk = (i > 0)? i - 1: 0;
  552. unsigned int bkend;
  553. char bkch;
  554. styler.Flush();
  555. if (styler.StyleAt(bk) == SCE_PL_DEFAULT)
  556. hereDocSpace = true;
  557. while ((bk > 0) && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
  558. styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
  559. bk--;
  560. }
  561. if (bk == 0) {
  562. // position 0 won't really be checked; rarely happens
  563. // hard to fix due to an unsigned index i
  564. preferRE = true;
  565. } else {
  566. int bkstyle = styler.StyleAt(bk);
  567. bkch = styler.SafeGetCharAt(bk);
  568. switch(bkstyle) {
  569. case SCE_PL_OPERATOR:
  570. preferRE = true;
  571. if (bkch == ')' || bkch == ']') {
  572. preferRE = false;
  573. } else if (bkch == '}') {
  574. // backtrack further, count balanced brace pairs
  575. // if a brace pair found, see if it's a variable
  576. int braceCount = 1;
  577. while (--bk > 0) {
  578. bkstyle = styler.StyleAt(bk);
  579. if (bkstyle == SCE_PL_OPERATOR) {
  580. bkch = styler.SafeGetCharAt(bk);
  581. if (bkch == ';') { // early out
  582. break;
  583. } else if (bkch == '}') {
  584. braceCount++;
  585. } else if (bkch == '{') {
  586. if (--braceCount == 0)
  587. break;
  588. }
  589. }
  590. }
  591. if (bk == 0) {
  592. // at beginning, true
  593. } else if (braceCount == 0) {
  594. // balanced { found, bk>0, skip more whitespace
  595. if (styler.StyleAt(--bk) == SCE_PL_DEFAULT) {
  596. while (bk > 0) {
  597. bkstyle = styler.StyleAt(--bk);
  598. if (bkstyle != SCE_PL_DEFAULT)
  599. break;
  600. }
  601. }
  602. bkstyle = styler.StyleAt(bk);
  603. if (bkstyle == SCE_PL_SCALAR
  604. || bkstyle == SCE_PL_ARRAY
  605. || bkstyle == SCE_PL_HASH
  606. || bkstyle == SCE_PL_SYMBOLTABLE
  607. || bkstyle == SCE_PL_OPERATOR) {
  608. preferRE = false;
  609. }
  610. }
  611. }
  612. break;
  613. case SCE_PL_IDENTIFIER:
  614. preferRE = true;
  615. if (bkch == '>') { // inputsymbol
  616. preferRE = false;
  617. break;
  618. }
  619. // backtrack to find "->" or "::" before identifier
  620. while (bk > 0 && styler.StyleAt(bk) == SCE_PL_IDENTIFIER) {
  621. bk--;
  622. }
  623. while (bk > 0) {
  624. bkstyle = styler.StyleAt(bk);
  625. if (bkstyle == SCE_PL_DEFAULT ||
  626. bkstyle == SCE_PL_COMMENTLINE) {
  627. } else if (bkstyle == SCE_PL_OPERATOR) {
  628. bkch = styler.SafeGetCharAt(bk);
  629. // test for "->" and "::"
  630. if ((bkch == '>' && styler.SafeGetCharAt(bk - 1) == '-')
  631. || (bkch == ':' && styler.SafeGetCharAt(bk - 1) == ':')) {
  632. preferRE = false;
  633. break;
  634. }
  635. } else {
  636. // bare identifier, if '/', /PATTERN/ unless digit/space immediately after '/'
  637. // if '//', always expect defined-or operator to follow identifier
  638. if (!isHereDoc &&
  639. (isspacechar(chNext) || isdigit(chNext) || chNext == '/'))
  640. preferRE = false;
  641. // HERE docs cannot have a space after the >>
  642. if (isspacechar(chNext))
  643. preferRE = false;
  644. break;
  645. }
  646. bk--;
  647. }
  648. break;
  649. case SCE_PL_SCALAR: // for $var<< case
  650. hereDocScalar = true;
  651. break;
  652. // for HERE docs, always true for preferRE
  653. case SCE_PL_WORD:
  654. preferRE = true;
  655. if (isHereDoc)
  656. break;
  657. // adopt heuristics similar to vim-style rules:
  658. // keywords always forced as /PATTERN/: split, if, elsif, while
  659. // everything else /PATTERN/ unless digit/space immediately after '/'
  660. // for '//', defined-or favoured unless special keywords
  661. bkend = bk + 1;
  662. while (bk > 0 && styler.StyleAt(bk-1) == SCE_PL_WORD) {
  663. bk--;
  664. }
  665. if (isPerlKeyword(bk, bkend, reWords, styler))
  666. break;
  667. if (isspacechar(chNext) || isdigit(chNext) || chNext == '/')
  668. preferRE = false;
  669. break;
  670. // other styles uses the default, preferRE=false
  671. case SCE_PL_POD:
  672. case SCE_PL_POD_VERB:
  673. case SCE_PL_HERE_Q:
  674. case SCE_PL_HERE_QQ:
  675. case SCE_PL_HERE_QX:
  676. preferRE = true;
  677. break;
  678. }
  679. }
  680. backflag = BACK_NONE;
  681. if (isHereDoc) { // handle HERE doc
  682. // if SCALAR whitespace '<<', *always* a HERE doc
  683. if (preferRE || (hereDocSpace && hereDocScalar)) {
  684. state = SCE_PL_HERE_DELIM;
  685. HereDoc.State = 0;
  686. } else { // << operator
  687. i++;
  688. ch = chNext;
  689. chNext = chNext2;
  690. goto handleOperator;
  691. }
  692. } else { // handle regexp
  693. if (preferRE) {
  694. state = SCE_PL_REGEX;
  695. Quote.New(1);
  696. Quote.Open(ch);
  697. } else { // / and // operators
  698. if (chNext == '/') {
  699. i++;
  700. ch = chNext;
  701. chNext = chNext2;
  702. }
  703. goto handleOperator;
  704. }
  705. }
  706. } else if (ch == '<') {
  707. // looks forward for matching > on same line
  708. unsigned int fw = i + 1;
  709. while (fw < lengthDoc) {
  710. char fwch = styler.SafeGetCharAt(fw);
  711. if (fwch == ' ') {
  712. if (styler.SafeGetCharAt(fw-1) != '\\' ||
  713. styler.SafeGetCharAt(fw-2) != '\\')
  714. goto handleOperator;
  715. } else if (isEOLChar(fwch) || isspacechar(fwch)) {
  716. goto handleOperator;
  717. } else if (fwch == '>') {
  718. if ((fw - i) == 2 && // '<=>' case
  719. styler.SafeGetCharAt(fw-1) == '=') {
  720. goto handleOperator;
  721. }
  722. styler.ColourTo(fw, SCE_PL_IDENTIFIER);
  723. i = fw;
  724. ch = fwch;
  725. chNext = styler.SafeGetCharAt(i+1);
  726. }
  727. fw++;
  728. }
  729. if (fw == lengthDoc)
  730. goto handleOperator;
  731. } else if (ch == '=' // POD
  732. && isalpha(chNext)
  733. && (isEOLChar(chPrev))) {
  734. state = SCE_PL_POD;
  735. backflag = BACK_NONE;
  736. //sookedpos = 0;
  737. //sooked[sookedpos] = '\0';
  738. } else if (ch == '-' // file test operators
  739. && isSingleCharOp(chNext)
  740. && !isalnum((chNext2 = styler.SafeGetCharAt(i+2)))) {
  741. styler.ColourTo(i + 1, SCE_PL_WORD);
  742. state = SCE_PL_DEFAULT;
  743. i++;
  744. ch = chNext;
  745. chNext = chNext2;
  746. backflag = BACK_NONE;
  747. } else if (ch == '-' // bareword promotion (-FOO cases)
  748. && ((isascii(chNext) && isalpha(chNext)) || chNext == '_')
  749. && backflag != BACK_NONE) {
  750. state = SCE_PL_IDENTIFIER;
  751. backflag = BACK_NONE;
  752. } else if (ch == '(' && i > 0) {
  753. // backtrack to identify if we're starting a sub prototype
  754. // for generality, we need to ignore whitespace/comments
  755. unsigned int bk = i - 1; // i > 0 tested above
  756. styler.Flush();
  757. while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
  758. styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
  759. bk--;
  760. }
  761. if (bk == 0 || styler.StyleAt(bk) != SCE_PL_IDENTIFIER) // check identifier
  762. goto handleOperator;
  763. while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_IDENTIFIER)) {
  764. bk--;
  765. }
  766. while (bk > 0 && (styler.StyleAt(bk) == SCE_PL_DEFAULT ||
  767. styler.StyleAt(bk) == SCE_PL_COMMENTLINE)) {
  768. bk--;
  769. }
  770. if (bk < 2 || styler.StyleAt(bk) != SCE_PL_WORD // check "sub" keyword
  771. || !styler.Match(bk - 2, "sub")) // assume suffix is unique!
  772. goto handleOperator;
  773. state = SCE_PL_SUB_PROTOTYPE;
  774. backflag = BACK_NONE;
  775. backPos = i; // needed for restart
  776. } else if (isPerlOperator(ch)) {
  777. if (ch == '.' && chNext == '.') { // .. and ...
  778. i++;
  779. if (chNext2 == '.') { i++; }
  780. state = SCE_PL_DEFAULT;
  781. ch = styler.SafeGetCharAt(i);
  782. chNext = styler.SafeGetCharAt(i + 1);
  783. }
  784. handleOperator:
  785. styler.ColourTo(i, SCE_PL_OPERATOR);
  786. backflag = BACK_OPERATOR;
  787. backPos = i;
  788. } else if (ch == 4 || ch == 26) { // ^D and ^Z ends valid perl source
  789. styler.ColourTo(i, SCE_PL_DATASECTION);
  790. state = SCE_PL_DATASECTION;
  791. } else {
  792. // keep colouring defaults to make restart easier
  793. styler.ColourTo(i, SCE_PL_DEFAULT);
  794. }
  795. } else if (state == SCE_PL_NUMBER) {
  796. if (ch == '.') {
  797. if (chNext == '.') {
  798. // double dot is always an operator
  799. goto numAtEnd;
  800. } else if (numState <= PERLNUM_FLOAT) {
  801. // non-decimal number or float exponent, consume next dot
  802. styler.ColourTo(i - 1, SCE_PL_NUMBER);
  803. state = SCE_PL_DEFAULT;
  804. goto handleOperator;
  805. } else { // decimal or vectors allows dots
  806. dotCount++;
  807. if (numState == PERLNUM_DECIMAL) {
  808. if (dotCount > 1) {
  809. if (isdigit(chNext)) { // really a vector
  810. numState = PERLNUM_VECTOR;
  811. } else // number then dot
  812. goto numAtEnd;
  813. }
  814. } else { // vectors
  815. if (!isdigit(chNext)) // vector then dot
  816. goto numAtEnd;
  817. }
  818. }
  819. } else if (ch == '_') {
  820. // permissive underscoring for number and vector literals
  821. } else if (!isascii(ch) || isalnum(ch)) {
  822. if (numState == PERLNUM_VECTOR || numState == PERLNUM_V_VECTOR) {
  823. if (!isascii(ch) || isalpha(ch)) {
  824. if (dotCount == 0) { // change to word
  825. state = SCE_PL_IDENTIFIER;
  826. } else { // vector then word
  827. goto numAtEnd;
  828. }
  829. }
  830. } else if (numState == PERLNUM_DECIMAL) {
  831. if (ch == 'E' || ch == 'e') { // exponent
  832. numState = PERLNUM_FLOAT;
  833. if (chNext == '+' || chNext == '-') {
  834. i++;
  835. ch = chNext;
  836. chNext = chNext2;
  837. }
  838. } else if (!isascii(ch) || !isdigit(ch)) { // number then word
  839. goto numAtEnd;
  840. }
  841. } else if (numState == PERLNUM_FLOAT) {
  842. if (!isdigit(ch)) { // float then word
  843. goto numAtEnd;
  844. }
  845. } else if (numState == PERLNUM_OCTAL) {
  846. if (!isdigit(ch))
  847. goto numAtEnd;
  848. else if (ch > '7')
  849. numState = PERLNUM_BAD;
  850. } else if (numState == PERLNUM_BINARY) {
  851. if (!isdigit(ch))
  852. goto numAtEnd;
  853. else if (ch > '1')
  854. numState = PERLNUM_BAD;
  855. } else if (numState == PERLNUM_HEX) {
  856. int ch2 = toupper(ch);
  857. if (!isdigit(ch) && !(ch2 >= 'A' && ch2 <= 'F'))
  858. goto numAtEnd;
  859. } else {//(numState == PERLNUM_BAD) {
  860. if (!isdigit(ch))
  861. goto numAtEnd;
  862. }
  863. } else {
  864. // complete current number or vector
  865. numAtEnd:
  866. styler.ColourTo(i - 1, actualNumStyle(numState));
  867. state = SCE_PL_DEFAULT;
  868. goto restartLexer;
  869. }
  870. } else if (state == SCE_PL_IDENTIFIER) {
  871. if (!isWordStart(chNext) && chNext != '\'') {
  872. styler.ColourTo(i, SCE_PL_IDENTIFIER);
  873. state = SCE_PL_DEFAULT;
  874. ch = ' ';
  875. }
  876. } else {
  877. if (state == SCE_PL_COMMENTLINE) {
  878. if (isEOLChar(ch)) {
  879. styler.ColourTo(i - 1, state);
  880. state = SCE_PL_DEFAULT;
  881. goto restartLexer;
  882. } else if (isEOLChar(chNext)) {
  883. styler.ColourTo(i, state);
  884. state = SCE_PL_DEFAULT;
  885. }
  886. } else if (state == SCE_PL_HERE_DELIM) {
  887. //
  888. // From perldata.pod:
  889. // ------------------
  890. // A line-oriented form of quoting is based on the shell ``here-doc''
  891. // syntax.
  892. // Following a << you specify a string to terminate the quoted material,
  893. // and all lines following the current line down to the terminating
  894. // string are the value of the item.
  895. // The terminating string may be either an identifier (a word),
  896. // or some quoted text.
  897. // If quoted, the type of quotes you use determines the treatment of
  898. // the text, just as in regular quoting.
  899. // An unquoted identifier works like double quotes.
  900. // There must be no space between the << and the identifier.
  901. // (If you put a space it will be treated as a null identifier,
  902. // which is valid, and matches the first empty line.)
  903. // (This is deprecated, -w warns of this syntax)
  904. // The terminating string must appear by itself (unquoted and with no
  905. // surrounding whitespace) on the terminating line.
  906. //
  907. // From Bash info:
  908. // ---------------
  909. // Specifier format is: <<[-]WORD
  910. // Optional '-' is for removal of leading tabs from here-doc.
  911. // Whitespace acceptable after <<[-] operator.
  912. //
  913. if (HereDoc.State == 0) { // '<<' encountered
  914. bool gotspace = false;
  915. unsigned int oldi = i;
  916. if (chNext == ' ' || chNext == '\t') {
  917. // skip whitespace; legal for quoted delimiters
  918. gotspace = true;
  919. do {
  920. i++;
  921. chNext = styler.SafeGetCharAt(i + 1);
  922. } while ((i + 1 < lengthDoc) && (chNext == ' ' || chNext == '\t'));
  923. chNext2 = styler.SafeGetCharAt(i + 2);
  924. }
  925. HereDoc.State = 1;
  926. HereDoc.Quote = chNext;
  927. HereDoc.Quoted = false;
  928. HereDoc.DelimiterLength = 0;
  929. HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
  930. if (chNext == '\'' || chNext == '"' || chNext == '`') {
  931. // a quoted here-doc delimiter
  932. i++;
  933. ch = chNext;
  934. chNext = chNext2;
  935. HereDoc.Quoted = true;
  936. } else if (isspacechar(chNext) || isdigit(chNext) || chNext == '\\'
  937. || chNext == '=' || chNext == '$' || chNext == '@'
  938. || ((isalpha(chNext) || chNext == '_') && gotspace)) {
  939. // left shift << or <<= operator cases
  940. // restore position if operator
  941. i = oldi;
  942. styler.ColourTo(i, SCE_PL_OPERATOR);
  943. state = SCE_PL_DEFAULT;
  944. backflag = BACK_OPERATOR;
  945. backPos = i;
  946. HereDoc.State = 0;
  947. goto restartLexer;
  948. } else {
  949. // an unquoted here-doc delimiter, no special handling
  950. // (cannot be prefixed by spaces/tabs), or
  951. // symbols terminates; deprecated zero-length delimiter
  952. }
  953. } else if (HereDoc.State == 1) { // collect the delimiter
  954. backflag = BACK_NONE;
  955. if (HereDoc.Quoted) { // a quoted here-doc delimiter
  956. if (ch == HereDoc.Quote) { // closing quote => end of delimiter
  957. styler.ColourTo(i, state);
  958. state = SCE_PL_DEFAULT;
  959. } else {
  960. if (ch == '\\' && chNext == HereDoc.Quote) { // escaped quote
  961. i++;
  962. ch = chNext;
  963. chNext = chNext2;
  964. }
  965. HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
  966. HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
  967. }
  968. } else { // an unquoted here-doc delimiter
  969. if (isalnum(ch) || ch == '_') {
  970. HereDoc.Delimiter[HereDoc.DelimiterLength++] = ch;
  971. HereDoc.Delimiter[HereDoc.DelimiterLength] = '\0';
  972. } else {
  973. styler.ColourTo(i - 1, state);
  974. state = SCE_PL_DEFAULT;
  975. goto restartLexer;
  976. }
  977. }
  978. if (HereDoc.DelimiterLength >= HERE_DELIM_MAX - 1) {
  979. styler.ColourTo(i - 1, state);
  980. state = SCE_PL_ERROR;
  981. goto restartLexer;
  982. }
  983. }
  984. } else if (HereDoc.State == 2) {
  985. // state == SCE_PL_HERE_Q || state == SCE_PL_HERE_QQ || state == SCE_PL_HERE_QX
  986. if (isEOLChar(chPrev) && isMatch(styler, lengthDoc, i, HereDoc.Delimiter)) {
  987. i += HereDoc.DelimiterLength;
  988. chPrev = styler.SafeGetCharAt(i - 1);
  989. ch = styler.SafeGetCharAt(i);
  990. if (isEOLChar(ch)) {
  991. styler.ColourTo(i - 1, state);
  992. state = SCE_PL_DEFAULT;
  993. backflag = BACK_NONE;
  994. HereDoc.State = 0;
  995. goto restartLexer;
  996. }
  997. chNext = styler.SafeGetCharAt(i + 1);
  998. }
  999. } else if (state == SCE_PL_POD
  1000. || state == SCE_PL_POD_VERB) {
  1001. if (isEOLChar(chPrev)) {
  1002. if (ch == ' ' || ch == '\t') {
  1003. styler.ColourTo(i - 1, state);
  1004. state = SCE_PL_POD_VERB;
  1005. } else {
  1006. styler.ColourTo(i - 1, state);
  1007. state = SCE_PL_POD;
  1008. if (ch == '=') {
  1009. if (isMatch(styler, lengthDoc, i, "=cut")) {
  1010. styler.ColourTo(i - 1 + 4, state);
  1011. i += 4;
  1012. state = SCE_PL_DEFAULT;
  1013. ch = styler.SafeGetCharAt(i);
  1014. //chNext = styler.SafeGetCharAt(i + 1);
  1015. goto restartLexer;
  1016. }
  1017. }
  1018. }
  1019. }
  1020. } else if (state == SCE_PL_SCALAR // variable names
  1021. || state == SCE_PL_ARRAY
  1022. || state == SCE_PL_HASH
  1023. || state == SCE_PL_SYMBOLTABLE) {
  1024. if (ch == ':' && chNext == ':') { // skip ::
  1025. i++;
  1026. ch = chNext;
  1027. chNext = chNext2;
  1028. }
  1029. else if (isEndVar(ch)) {
  1030. if (i == (styler.GetStartSegment() + 1)) {
  1031. // Special variable: $(, $_ etc.
  1032. styler.ColourTo(i, state);
  1033. state = SCE_PL_DEFAULT;
  1034. } else {
  1035. styler.ColourTo(i - 1, state);
  1036. state = SCE_PL_DEFAULT;
  1037. goto restartLexer;
  1038. }
  1039. }
  1040. } else if (state == SCE_PL_REGEX
  1041. || state == SCE_PL_STRING_QR
  1042. ) {
  1043. if (!Quote.Up && !isspacechar(ch)) {
  1044. Quote.Open(ch);
  1045. } else if (ch == '\\' && Quote.Up != '\\') {
  1046. // SG: Is it save to skip *every* escaped char?
  1047. i++;
  1048. ch = chNext;
  1049. chNext = styler.SafeGetCharAt(i + 1);
  1050. } else {
  1051. if (ch == Quote.Down /*&& chPrev != '\\'*/) {
  1052. Quote.Count--;
  1053. if (Quote.Count == 0) {
  1054. Quote.Rep--;
  1055. if (Quote.Up == Quote.Down) {
  1056. Quote.Count++;
  1057. }
  1058. }
  1059. if (!isalpha(chNext)) {
  1060. if (Quote.Rep <= 0) {
  1061. styler.ColourTo(i, state);
  1062. state = SCE_PL_DEFAULT;
  1063. ch = ' ';
  1064. }
  1065. }
  1066. } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
  1067. Quote.Count++;
  1068. } else if (!isascii(chNext) || !isalpha(chNext)) {
  1069. if (Quote.Rep <= 0) {
  1070. styler.ColourTo(i, state);
  1071. state = SCE_PL_DEFAULT;
  1072. ch = ' ';
  1073. }
  1074. }
  1075. }
  1076. } else if (state == SCE_PL_REGSUBST) {
  1077. if (!Quote.Up && !isspacechar(ch)) {
  1078. Quote.Open(ch);
  1079. } else if (ch == '\\' && Quote.Up != '\\') {
  1080. // SG: Is it save to skip *every* escaped char?
  1081. i++;
  1082. ch = chNext;
  1083. chNext = styler.SafeGetCharAt(i + 1);
  1084. } else {
  1085. if (Quote.Count == 0 && Quote.Rep == 1) {
  1086. /* We matched something like s(...) or tr{...}
  1087. * and are looking for the next matcher characters,
  1088. * which could be either bracketed ({...}) or non-bracketed
  1089. * (/.../).
  1090. *
  1091. * Number-signs are problematic. If they occur after
  1092. * the close of the first part, treat them like
  1093. * a Quote.Up char, even if they actually start comments.
  1094. *
  1095. * If we find an alnum, we end the regsubst, and punt.
  1096. *
  1097. * Eric Promislow ericp@activestate.com Aug 9,2000
  1098. */
  1099. if (isspacechar(ch)) {
  1100. // Keep going
  1101. }
  1102. else if (!isascii(ch) || isalnum(ch)) {
  1103. styler.ColourTo(i, state);
  1104. state = SCE_PL_DEFAULT;
  1105. ch = ' ';
  1106. } else {
  1107. Quote.Open(ch);
  1108. }
  1109. } else if (ch == Quote.Down /*&& chPrev != '\\'*/) {
  1110. Quote.Count--;
  1111. if (Quote.Count == 0) {
  1112. Quote.Rep--;
  1113. }
  1114. if (!isascii(chNext) || !isalpha(chNext)) {
  1115. if (Quote.Rep <= 0) {
  1116. styler.ColourTo(i, state);
  1117. state = SCE_PL_DEFAULT;
  1118. ch = ' ';
  1119. }
  1120. }
  1121. if (Quote.Up == Quote.Down) {
  1122. Quote.Count++;
  1123. }
  1124. } else if (ch == Quote.Up /*&& chPrev != '\\'*/) {
  1125. Quote.Count++;
  1126. } else if (!isascii(chNext) || !isalpha(chNext)) {
  1127. if (Quote.Rep <= 0) {
  1128. styler.ColourTo(i, state);
  1129. state = SCE_PL_DEFAULT;
  1130. ch = ' ';
  1131. }
  1132. }
  1133. }
  1134. } else if (state == SCE_PL_STRING_Q
  1135. || state == SCE_PL_STRING_QQ
  1136. || state == SCE_PL_STRING_QX
  1137. || state == SCE_PL_STRING_QW
  1138. || state == SCE_PL_STRING
  1139. || state == SCE_PL_CHARACTER
  1140. || state == SCE_PL_BACKTICKS
  1141. ) {
  1142. if (!Quote.Down && !isspacechar(ch)) {
  1143. Quote.Open(ch);
  1144. } else if (ch == '\\' && Quote.Up != '\\') {
  1145. i++;
  1146. ch = chNext;
  1147. chNext = styler.SafeGetCharAt(i + 1);
  1148. } else if (ch == Quote.Down) {
  1149. Quote.Count--;
  1150. if (Quote.Count == 0) {
  1151. Quote.Rep--;
  1152. if (Quote.Rep <= 0) {
  1153. styler.ColourTo(i, state);
  1154. state = SCE_PL_DEFAULT;
  1155. ch = ' ';
  1156. }
  1157. if (Quote.Up == Quote.Down) {
  1158. Quote.Count++;
  1159. }
  1160. }
  1161. } else if (ch == Quote.Up) {
  1162. Quote.Count++;
  1163. }
  1164. } else if (state == SCE_PL_SUB_PROTOTYPE) {
  1165. char strch[2];
  1166. strch[0] = ch;
  1167. strch[1] = '\0';
  1168. if (NULL != strstr("\\[$@%&*];", strch)) {
  1169. // keep going
  1170. } else if (ch == ')') {
  1171. styler.ColourTo(i, state);
  1172. state = SCE_PL_DEFAULT;
  1173. } else {
  1174. // abandon prototype, restart from '('
  1175. i = backPos;
  1176. styler.ColourTo(i, SCE_PL_OPERATOR);
  1177. ch = styler.SafeGetCharAt(i);
  1178. chNext = styler.SafeGetCharAt(i + 1);
  1179. state = SCE_PL_DEFAULT;
  1180. }
  1181. } else if (state == SCE_PL_FORMAT_IDENT) {
  1182. // occupies different HereDoc states to avoid clashing with HERE docs
  1183. if (HereDoc.State == 0) {
  1184. if ((isascii(ch) && isalpha(ch)) || ch == '_' // probable identifier
  1185. || ch == '=') { // no identifier
  1186. HereDoc.State = 3;
  1187. HereDoc.Quoted = false; // whitespace flag
  1188. } else if (ch == ' ' || ch == '\t') {
  1189. styler.ColourTo(i, SCE_PL_DEFAULT);
  1190. } else {
  1191. state = SCE_PL_DEFAULT;
  1192. HereDoc.State = 0;
  1193. goto restartLexer;
  1194. }
  1195. }
  1196. if (HereDoc.State == 3) { // with just a '=', state goes 0->3->4
  1197. if (ch == '=') {
  1198. styler.ColourTo(i, SCE_PL_FORMAT_IDENT);
  1199. state = SCE_PL_DEFAULT;
  1200. HereDoc.State = 4;
  1201. } else if (ch == ' ' || ch == '\t') {
  1202. HereDoc.Quoted = true;
  1203. } else if (isEOLChar(ch) || (HereDoc.Quoted && ch != '=')) {
  1204. // abandon format, restart from after 'format'
  1205. i = backPos + 1;
  1206. ch = styler.SafeGetCharAt(i);
  1207. chNext = styler.SafeGetCharAt(i + 1);
  1208. state = SCE_PL_DEFAULT;
  1209. HereDoc.State = 0;
  1210. }
  1211. }
  1212. } else if (state == SCE_PL_FORMAT) {
  1213. if (isEOLChar(chPrev)) {
  1214. styler.ColourTo(i - 1, state);
  1215. if (ch == '.' && isEOLChar(chNext)) {
  1216. styler.ColourTo(i, state);
  1217. state = SCE_PL_DEFAULT;
  1218. }
  1219. }
  1220. }
  1221. }
  1222. if (state == SCE_PL_ERROR) {
  1223. break;
  1224. }
  1225. chPrev = ch;
  1226. }
  1227. styler.ColourTo(lengthDoc - 1, state);
  1228. }
  1229. static bool IsCommentLine(int line, Accessor &styler) {
  1230. int pos = styler.LineStart(line);
  1231. int eol_pos = styler.LineStart(line + 1) - 1;
  1232. for (int i = pos; i < eol_pos; i++) {
  1233. char ch = styler[i];
  1234. int style = styler.StyleAt(i);
  1235. if (ch == '#' && style == SCE_PL_COMMENTLINE)
  1236. return true;
  1237. else if (ch != ' ' && ch != '\t')
  1238. return false;
  1239. }
  1240. return false;
  1241. }
  1242. static void FoldPerlDoc(unsigned int startPos, int length, int, WordList *[],
  1243. Accessor &styler) {
  1244. bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
  1245. bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  1246. // Custom folding of POD and packages
  1247. bool foldPOD = styler.GetPropertyInt("fold.perl.pod", 1) != 0;
  1248. bool foldPackage = styler.GetPropertyInt("fold.perl.package", 1) != 0;
  1249. unsigned int endPos = startPos + length;
  1250. int visibleChars = 0;
  1251. int lineCurrent = styler.GetLine(startPos);
  1252. int levelPrev = SC_FOLDLEVELBASE;
  1253. if (lineCurrent > 0)
  1254. levelPrev = styler.LevelAt(lineCurrent - 1) >> 16;
  1255. int levelCurrent = levelPrev;
  1256. char chNext = styler[startPos];
  1257. char chPrev = styler.SafeGetCharAt(startPos - 1);
  1258. int styleNext = styler.StyleAt(startPos);
  1259. // Used at end of line to determine if the line was a package definition
  1260. bool isPackageLine = false;
  1261. bool isPodHeading = false;
  1262. for (unsigned int i = startPos; i < endPos; i++) {
  1263. char ch = chNext;
  1264. chNext = styler.SafeGetCharAt(i + 1);
  1265. int style = styleNext;
  1266. styleNext = styler.StyleAt(i + 1);
  1267. bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
  1268. bool atLineStart = isEOLChar(chPrev) || i == 0;
  1269. // Comment folding
  1270. if (foldComment && atEOL && IsCommentLine(lineCurrent, styler))
  1271. {
  1272. if (!IsCommentLine(lineCurrent - 1, styler)
  1273. && IsCommentLine(lineCurrent + 1, styler))
  1274. levelCurrent++;
  1275. else if (IsCommentLine(lineCurrent - 1, styler)
  1276. && !IsCommentLine(lineCurrent+1, styler))
  1277. levelCurrent--;
  1278. }
  1279. if (style == SCE_C_OPERATOR) {
  1280. if (ch == '{') {
  1281. levelCurrent++;
  1282. } else if (ch == '}') {
  1283. levelCurrent--;
  1284. }
  1285. }
  1286. // Custom POD folding
  1287. if (foldPOD && atLineStart) {
  1288. int stylePrevCh = (i) ? styler.StyleAt(i - 1):SCE_PL_DEFAULT;
  1289. if (style == SCE_PL_POD) {
  1290. if (stylePrevCh != SCE_PL_POD && stylePrevCh != SCE_PL_POD_VERB)
  1291. levelCurrent++;
  1292. else if (styler.Match(i, "=cut"))
  1293. levelCurrent--;
  1294. else if (styler.Match(i, "=head"))
  1295. isPodHeading = true;
  1296. } else if (style == SCE_PL_DATASECTION) {
  1297. if (ch == '=' && isalpha(chNext) && levelCurrent == SC_FOLDLEVELBASE)
  1298. levelCurrent++;
  1299. else if (styler.Match(i, "=cut") && levelCurrent > SC_FOLDLEVELBASE)
  1300. levelCurrent--;
  1301. else if (styler.Match(i, "=head"))
  1302. isPodHeading = true;
  1303. // if package used or unclosed brace, level > SC_FOLDLEVELBASE!
  1304. // reset needed as level test is vs. SC_FOLDLEVELBASE
  1305. else if (styler.Match(i, "__END__"))
  1306. levelCurrent = SC_FOLDLEVELBASE;
  1307. }
  1308. }
  1309. // Custom package folding
  1310. if (foldPackage && atLineStart) {
  1311. if (style == SCE_PL_WORD && styler.Match(i, "package")) {
  1312. isPackageLine = true;
  1313. }
  1314. }
  1315. if (atEOL) {
  1316. int lev = levelPrev;
  1317. if (isPodHeading) {
  1318. lev = levelPrev - 1;
  1319. lev |= SC_FOLDLEVELHEADERFLAG;
  1320. isPodHeading = false;
  1321. }
  1322. // Check if line was a package declaration
  1323. // because packages need "special" treatment
  1324. if (isPackageLine) {
  1325. lev = SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG;
  1326. levelCurrent = SC_FOLDLEVELBASE + 1;
  1327. isPackageLine = false;
  1328. }
  1329. lev |= levelCurrent << 16;
  1330. if (visibleChars == 0 && foldCompact)
  1331. lev |= SC_FOLDLEVELWHITEFLAG;
  1332. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  1333. lev |= SC_FOLDLEVELHEADERFLAG;
  1334. if (lev != styler.LevelAt(lineCurrent)) {
  1335. styler.SetLevel(lineCurrent, lev);
  1336. }
  1337. lineCurrent++;
  1338. levelPrev = levelCurrent;
  1339. visibleChars = 0;
  1340. }
  1341. if (!isspacechar(ch))
  1342. visibleChars++;
  1343. chPrev = ch;
  1344. }
  1345. // Fill in the real level of the next line, keeping the current flags as they will be filled in later
  1346. int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
  1347. styler.SetLevel(lineCurrent, levelPrev | flagsNext);
  1348. }
  1349. static const char * const perlWordListDesc[] = {
  1350. "Keywords",
  1351. 0
  1352. };
  1353. LexerModule lmPerl(SCLEX_PERL, ColourisePerlDoc, "perl", FoldPerlDoc, perlWordListDesc, 8);