PageRenderTime 62ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/ext/scintilla/lexers/LexHTML.cxx

http://github.com/jwu/exlibs
C++ | 2077 lines | 1860 code | 111 blank | 106 comment | 1390 complexity | 1de90cd72c96e72f5aa6c5c44c95fedf MD5 | raw file
Possible License(s): LGPL-3.0, GPL-3.0, LGPL-2.1
  1. // Scintilla source code edit control
  2. /** @file LexHTML.cxx
  3. ** Lexer for HTML.
  4. **/
  5. // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <stdio.h>
  10. #include <stdarg.h>
  11. #include <assert.h>
  12. #include <ctype.h>
  13. #include "ILexer.h"
  14. #include "Scintilla.h"
  15. #include "SciLexer.h"
  16. #include "PropSetSimple.h"
  17. #include "WordList.h"
  18. #include "LexAccessor.h"
  19. #include "Accessor.h"
  20. #include "StyleContext.h"
  21. #include "CharacterSet.h"
  22. #include "LexerModule.h"
  23. #ifdef SCI_NAMESPACE
  24. using namespace Scintilla;
  25. #endif
  26. #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  27. #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  28. #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  29. enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  30. enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  31. static inline bool IsAWordChar(const int ch) {
  32. return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  33. }
  34. static inline bool IsAWordStart(const int ch) {
  35. return (ch < 0x80) && (isalnum(ch) || ch == '_');
  36. }
  37. inline bool IsOperator(int ch) {
  38. if (isascii(ch) && isalnum(ch))
  39. return false;
  40. // '.' left out as it is used to make up numbers
  41. if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  42. ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  43. ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  44. ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  45. ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  46. ch == '?' || ch == '!' || ch == '.' || ch == '~')
  47. return true;
  48. return false;
  49. }
  50. static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  51. size_t i = 0;
  52. for (; (i < end - start + 1) && (i < len-1); i++) {
  53. s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  54. }
  55. s[i] = '\0';
  56. }
  57. static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  58. size_t i = 0;
  59. for (; i < sLen-1; i++) {
  60. char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  61. if ((i == 0) && !IsAWordStart(ch))
  62. break;
  63. if ((i > 0) && !IsAWordChar(ch))
  64. break;
  65. s[i] = ch;
  66. }
  67. s[i] = '\0';
  68. return s;
  69. }
  70. static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  71. char s[100];
  72. GetTextSegment(styler, start, end, s, sizeof(s));
  73. //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  74. if (strstr(s, "src")) // External script
  75. return eScriptNone;
  76. if (strstr(s, "vbs"))
  77. return eScriptVBS;
  78. if (strstr(s, "pyth"))
  79. return eScriptPython;
  80. if (strstr(s, "javas"))
  81. return eScriptJS;
  82. if (strstr(s, "jscr"))
  83. return eScriptJS;
  84. if (strstr(s, "php"))
  85. return eScriptPHP;
  86. if (strstr(s, "xml")) {
  87. const char *xml = strstr(s, "xml");
  88. for (const char *t=s; t<xml; t++) {
  89. if (!IsASpace(*t)) {
  90. return prevValue;
  91. }
  92. }
  93. return eScriptXML;
  94. }
  95. return prevValue;
  96. }
  97. static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
  98. int iResult = 0;
  99. char s[100];
  100. GetTextSegment(styler, start, end, s, sizeof(s));
  101. if (0 == strncmp(s, "php", 3)) {
  102. iResult = 3;
  103. }
  104. return iResult;
  105. }
  106. static script_type ScriptOfState(int state) {
  107. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  108. return eScriptPython;
  109. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  110. return eScriptVBS;
  111. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  112. return eScriptJS;
  113. } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
  114. return eScriptPHP;
  115. } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
  116. return eScriptSGML;
  117. } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
  118. return eScriptSGMLblock;
  119. } else {
  120. return eScriptNone;
  121. }
  122. }
  123. static int statePrintForState(int state, script_mode inScriptType) {
  124. int StateToPrint = state;
  125. if (state >= SCE_HJ_START) {
  126. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  127. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
  128. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  129. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
  130. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  131. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
  132. }
  133. }
  134. return StateToPrint;
  135. }
  136. static int stateForPrintState(int StateToPrint) {
  137. int state;
  138. if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
  139. state = StateToPrint - SCE_HA_PYTHON;
  140. } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
  141. state = StateToPrint - SCE_HA_VBS;
  142. } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
  143. state = StateToPrint - SCE_HA_JS;
  144. } else {
  145. state = StateToPrint;
  146. }
  147. return state;
  148. }
  149. static inline bool IsNumber(unsigned int start, Accessor &styler) {
  150. return IsADigit(styler[start]) || (styler[start] == '.') ||
  151. (styler[start] == '-') || (styler[start] == '#');
  152. }
  153. static inline bool isStringState(int state) {
  154. bool bResult;
  155. switch (state) {
  156. case SCE_HJ_DOUBLESTRING:
  157. case SCE_HJ_SINGLESTRING:
  158. case SCE_HJA_DOUBLESTRING:
  159. case SCE_HJA_SINGLESTRING:
  160. case SCE_HB_STRING:
  161. case SCE_HBA_STRING:
  162. case SCE_HP_STRING:
  163. case SCE_HP_CHARACTER:
  164. case SCE_HP_TRIPLE:
  165. case SCE_HP_TRIPLEDOUBLE:
  166. case SCE_HPA_STRING:
  167. case SCE_HPA_CHARACTER:
  168. case SCE_HPA_TRIPLE:
  169. case SCE_HPA_TRIPLEDOUBLE:
  170. case SCE_HPHP_HSTRING:
  171. case SCE_HPHP_SIMPLESTRING:
  172. case SCE_HPHP_HSTRING_VARIABLE:
  173. case SCE_HPHP_COMPLEX_VARIABLE:
  174. bResult = true;
  175. break;
  176. default :
  177. bResult = false;
  178. break;
  179. }
  180. return bResult;
  181. }
  182. static inline bool stateAllowsTermination(int state) {
  183. bool allowTermination = !isStringState(state);
  184. if (allowTermination) {
  185. switch (state) {
  186. case SCE_HB_COMMENTLINE:
  187. case SCE_HPHP_COMMENT:
  188. case SCE_HP_COMMENTLINE:
  189. case SCE_HPA_COMMENTLINE:
  190. allowTermination = false;
  191. }
  192. }
  193. return allowTermination;
  194. }
  195. // not really well done, since it's only comments that should lex the %> and <%
  196. static inline bool isCommentASPState(int state) {
  197. bool bResult;
  198. switch (state) {
  199. case SCE_HJ_COMMENT:
  200. case SCE_HJ_COMMENTLINE:
  201. case SCE_HJ_COMMENTDOC:
  202. case SCE_HB_COMMENTLINE:
  203. case SCE_HP_COMMENTLINE:
  204. case SCE_HPHP_COMMENT:
  205. case SCE_HPHP_COMMENTLINE:
  206. bResult = true;
  207. break;
  208. default :
  209. bResult = false;
  210. break;
  211. }
  212. return bResult;
  213. }
  214. static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  215. bool wordIsNumber = IsNumber(start, styler);
  216. char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
  217. if (wordIsNumber) {
  218. chAttr = SCE_H_NUMBER;
  219. } else {
  220. char s[100];
  221. GetTextSegment(styler, start, end, s, sizeof(s));
  222. if (keywords.InList(s))
  223. chAttr = SCE_H_ATTRIBUTE;
  224. }
  225. if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
  226. // No keywords -> all are known
  227. chAttr = SCE_H_ATTRIBUTE;
  228. styler.ColourTo(end, chAttr);
  229. }
  230. static int classifyTagHTML(unsigned int start, unsigned int end,
  231. WordList &keywords, Accessor &styler, bool &tagDontFold,
  232. bool caseSensitive, bool isXml, bool allowScripts) {
  233. char s[30 + 2];
  234. // Copy after the '<'
  235. unsigned int i = 0;
  236. for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
  237. char ch = styler[cPos];
  238. if ((ch != '<') && (ch != '/')) {
  239. s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
  240. }
  241. }
  242. //The following is only a quick hack, to see if this whole thing would work
  243. //we first need the tagname with a trailing space...
  244. s[i] = ' ';
  245. s[i+1] = '\0';
  246. // if the current language is XML, I can fold any tag
  247. // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
  248. //...to find it in the list of no-container-tags
  249. tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
  250. //now we can remove the trailing space
  251. s[i] = '\0';
  252. // No keywords -> all are known
  253. char chAttr = SCE_H_TAGUNKNOWN;
  254. if (s[0] == '!') {
  255. chAttr = SCE_H_SGML_DEFAULT;
  256. } else if (!keywords || keywords.InList(s)) {
  257. chAttr = SCE_H_TAG;
  258. }
  259. styler.ColourTo(end, chAttr);
  260. if (chAttr == SCE_H_TAG) {
  261. if (allowScripts && 0 == strcmp(s, "script")) {
  262. // check to see if this is a self-closing tag by sniffing ahead
  263. bool isSelfClose = false;
  264. for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
  265. char ch = styler.SafeGetCharAt(cPos, '\0');
  266. if (ch == '\0' || ch == '>')
  267. break;
  268. else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
  269. isSelfClose = true;
  270. break;
  271. }
  272. }
  273. // do not enter a script state if the tag self-closed
  274. if (!isSelfClose)
  275. chAttr = SCE_H_SCRIPT;
  276. } else if (!isXml && 0 == strcmp(s, "comment")) {
  277. chAttr = SCE_H_COMMENT;
  278. }
  279. }
  280. return chAttr;
  281. }
  282. static void classifyWordHTJS(unsigned int start, unsigned int end,
  283. WordList &keywords, Accessor &styler, script_mode inScriptType) {
  284. char chAttr = SCE_HJ_WORD;
  285. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  286. if (wordIsNumber)
  287. chAttr = SCE_HJ_NUMBER;
  288. else {
  289. char s[30 + 1];
  290. unsigned int i = 0;
  291. for (; i < end - start + 1 && i < 30; i++) {
  292. s[i] = styler[start + i];
  293. }
  294. s[i] = '\0';
  295. if (keywords.InList(s))
  296. chAttr = SCE_HJ_KEYWORD;
  297. }
  298. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  299. }
  300. static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
  301. char chAttr = SCE_HB_IDENTIFIER;
  302. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  303. if (wordIsNumber)
  304. chAttr = SCE_HB_NUMBER;
  305. else {
  306. char s[100];
  307. GetTextSegment(styler, start, end, s, sizeof(s));
  308. if (keywords.InList(s)) {
  309. chAttr = SCE_HB_WORD;
  310. if (strcmp(s, "rem") == 0)
  311. chAttr = SCE_HB_COMMENTLINE;
  312. }
  313. }
  314. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  315. if (chAttr == SCE_HB_COMMENTLINE)
  316. return SCE_HB_COMMENTLINE;
  317. else
  318. return SCE_HB_DEFAULT;
  319. }
  320. static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
  321. bool wordIsNumber = IsADigit(styler[start]);
  322. char s[30 + 1];
  323. unsigned int i = 0;
  324. for (; i < end - start + 1 && i < 30; i++) {
  325. s[i] = styler[start + i];
  326. }
  327. s[i] = '\0';
  328. char chAttr = SCE_HP_IDENTIFIER;
  329. if (0 == strcmp(prevWord, "class"))
  330. chAttr = SCE_HP_CLASSNAME;
  331. else if (0 == strcmp(prevWord, "def"))
  332. chAttr = SCE_HP_DEFNAME;
  333. else if (wordIsNumber)
  334. chAttr = SCE_HP_NUMBER;
  335. else if (keywords.InList(s))
  336. chAttr = SCE_HP_WORD;
  337. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  338. strcpy(prevWord, s);
  339. }
  340. // Update the word colour to default or keyword
  341. // Called when in a PHP word
  342. static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  343. char chAttr = SCE_HPHP_DEFAULT;
  344. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
  345. if (wordIsNumber)
  346. chAttr = SCE_HPHP_NUMBER;
  347. else {
  348. char s[100];
  349. GetTextSegment(styler, start, end, s, sizeof(s));
  350. if (keywords.InList(s))
  351. chAttr = SCE_HPHP_WORD;
  352. }
  353. styler.ColourTo(end, chAttr);
  354. }
  355. static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  356. char s[30 + 1];
  357. unsigned int i = 0;
  358. for (; i < end - start + 1 && i < 30; i++) {
  359. s[i] = styler[start + i];
  360. }
  361. s[i] = '\0';
  362. return keywords.InList(s);
  363. }
  364. static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
  365. char s[30 + 1];
  366. unsigned int i = 0;
  367. for (; i < end - start + 1 && i < 30; i++) {
  368. s[i] = styler[start + i];
  369. }
  370. s[i] = '\0';
  371. return (0 == strcmp(s, "[CDATA["));
  372. }
  373. // Return the first state to reach when entering a scripting language
  374. static int StateForScript(script_type scriptLanguage) {
  375. int Result;
  376. switch (scriptLanguage) {
  377. case eScriptVBS:
  378. Result = SCE_HB_START;
  379. break;
  380. case eScriptPython:
  381. Result = SCE_HP_START;
  382. break;
  383. case eScriptPHP:
  384. Result = SCE_HPHP_DEFAULT;
  385. break;
  386. case eScriptXML:
  387. Result = SCE_H_TAGUNKNOWN;
  388. break;
  389. case eScriptSGML:
  390. Result = SCE_H_SGML_DEFAULT;
  391. break;
  392. case eScriptComment:
  393. Result = SCE_H_COMMENT;
  394. break;
  395. default :
  396. Result = SCE_HJ_START;
  397. break;
  398. }
  399. return Result;
  400. }
  401. static inline bool ishtmlwordchar(int ch) {
  402. return !isascii(ch) ||
  403. (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
  404. }
  405. static inline bool issgmlwordchar(int ch) {
  406. return !isascii(ch) ||
  407. (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
  408. }
  409. static inline bool IsPhpWordStart(int ch) {
  410. return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
  411. }
  412. static inline bool IsPhpWordChar(int ch) {
  413. return IsADigit(ch) || IsPhpWordStart(ch);
  414. }
  415. static bool InTagState(int state) {
  416. return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
  417. state == SCE_H_SCRIPT ||
  418. state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
  419. state == SCE_H_NUMBER || state == SCE_H_OTHER ||
  420. state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
  421. }
  422. static bool IsCommentState(const int state) {
  423. return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
  424. }
  425. static bool IsScriptCommentState(const int state) {
  426. return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
  427. state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
  428. }
  429. static bool isLineEnd(int ch) {
  430. return ch == '\r' || ch == '\n';
  431. }
  432. static bool isOKBeforeRE(int ch) {
  433. return (ch == '(') || (ch == '=') || (ch == ',');
  434. }
  435. static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
  436. if (strlen(blockType) == 0) {
  437. return ((ch == '%') && (chNext == '>'));
  438. } else if ((0 == strcmp(blockType, "inherit")) ||
  439. (0 == strcmp(blockType, "namespace")) ||
  440. (0 == strcmp(blockType, "include")) ||
  441. (0 == strcmp(blockType, "page"))) {
  442. return ((ch == '/') && (chNext == '>'));
  443. } else if (0 == strcmp(blockType, "%")) {
  444. return isLineEnd(ch);
  445. } else if (0 == strcmp(blockType, "{")) {
  446. return ch == '}';
  447. } else {
  448. return (ch == '>');
  449. }
  450. }
  451. static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
  452. if (strlen(blockType) == 0) {
  453. return 0;
  454. } else if (0 == strcmp(blockType, "%")) {
  455. return ((ch == '%') && (chNext == '}'));
  456. } else if (0 == strcmp(blockType, "{")) {
  457. return ((ch == '}') && (chNext == '}'));
  458. } else {
  459. return 0;
  460. }
  461. }
  462. static bool isPHPStringState(int state) {
  463. return
  464. (state == SCE_HPHP_HSTRING) ||
  465. (state == SCE_HPHP_SIMPLESTRING) ||
  466. (state == SCE_HPHP_HSTRING_VARIABLE) ||
  467. (state == SCE_HPHP_COMPLEX_VARIABLE);
  468. }
  469. static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
  470. int j;
  471. const int beginning = i - 1;
  472. bool isValidSimpleString = false;
  473. while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
  474. i++;
  475. char ch = styler.SafeGetCharAt(i);
  476. const char chNext = styler.SafeGetCharAt(i + 1);
  477. if (!IsPhpWordStart(ch)) {
  478. if (ch == '\'' && IsPhpWordStart(chNext)) {
  479. i++;
  480. ch = chNext;
  481. isSimpleString = true;
  482. } else {
  483. phpStringDelimiter[0] = '\0';
  484. return beginning;
  485. }
  486. }
  487. phpStringDelimiter[0] = ch;
  488. i++;
  489. for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
  490. if (!IsPhpWordChar(styler[j])) {
  491. if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
  492. isValidSimpleString = true;
  493. j++;
  494. break;
  495. } else {
  496. phpStringDelimiter[0] = '\0';
  497. return beginning;
  498. }
  499. }
  500. if (j - i < phpStringDelimiterSize - 2)
  501. phpStringDelimiter[j-i+1] = styler[j];
  502. else
  503. i++;
  504. }
  505. if (isSimpleString && !isValidSimpleString) {
  506. phpStringDelimiter[0] = '\0';
  507. return beginning;
  508. }
  509. phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
  510. return j - 1;
  511. }
  512. static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
  513. Accessor &styler, bool isXml) {
  514. WordList &keywords = *keywordlists[0];
  515. WordList &keywords2 = *keywordlists[1];
  516. WordList &keywords3 = *keywordlists[2];
  517. WordList &keywords4 = *keywordlists[3];
  518. WordList &keywords5 = *keywordlists[4];
  519. WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
  520. // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
  521. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  522. char prevWord[200];
  523. prevWord[0] = '\0';
  524. char phpStringDelimiter[200]; // PHP is not limited in length, we are
  525. phpStringDelimiter[0] = '\0';
  526. int StateToPrint = initStyle;
  527. int state = stateForPrintState(StateToPrint);
  528. char makoBlockType[200];
  529. makoBlockType[0] = '\0';
  530. char djangoBlockType[2];
  531. djangoBlockType[0] = '\0';
  532. // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
  533. if (InTagState(state)) {
  534. while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
  535. startPos--;
  536. length++;
  537. }
  538. state = SCE_H_DEFAULT;
  539. }
  540. // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
  541. if (isPHPStringState(state)) {
  542. while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
  543. startPos--;
  544. length++;
  545. state = styler.StyleAt(startPos);
  546. }
  547. if (startPos == 0)
  548. state = SCE_H_DEFAULT;
  549. }
  550. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  551. int lineCurrent = styler.GetLine(startPos);
  552. int lineState;
  553. if (lineCurrent > 0) {
  554. lineState = styler.GetLineState(lineCurrent);
  555. } else {
  556. // Default client and ASP scripting language is JavaScript
  557. lineState = eScriptJS << 8;
  558. // property asp.default.language
  559. // Script in ASP code is initially assumed to be in JavaScript.
  560. // To change this to VBScript set asp.default.language to 2. Python is 3.
  561. lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
  562. }
  563. script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
  564. bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
  565. bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
  566. bool tagDontFold = false; //some HTML tags should not be folded
  567. script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
  568. script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
  569. int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
  570. script_type scriptLanguage = ScriptOfState(state);
  571. // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
  572. if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
  573. scriptLanguage = eScriptComment;
  574. }
  575. script_type beforeLanguage = ScriptOfState(beforePreProc);
  576. // property fold.html
  577. // Folding is turned on or off for HTML and XML files with this option.
  578. // The fold option must also be on for folding to occur.
  579. const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
  580. const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
  581. // property fold.html.preprocessor
  582. // Folding is turned on or off for scripts embedded in HTML files with this option.
  583. // The default is on.
  584. const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
  585. const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  586. // property fold.hypertext.comment
  587. // Allow folding for comments in scripts embedded in HTML.
  588. // The default is off.
  589. const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
  590. // property fold.hypertext.heredoc
  591. // Allow folding for heredocs in scripts embedded in HTML.
  592. // The default is off.
  593. const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
  594. // property html.tags.case.sensitive
  595. // For XML and HTML, setting this property to 1 will make tags match in a case
  596. // sensitive way which is the expected behaviour for XML and XHTML.
  597. const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
  598. // property lexer.xml.allow.scripts
  599. // Set to 0 to disable scripts in XML.
  600. const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
  601. // property lexer.html.mako
  602. // Set to 1 to enable the mako template language.
  603. const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
  604. // property lexer.html.django
  605. // Set to 1 to enable the django template language.
  606. const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
  607. const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
  608. const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
  609. const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
  610. int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
  611. int levelCurrent = levelPrev;
  612. int visibleChars = 0;
  613. int lineStartVisibleChars = 0;
  614. int chPrev = ' ';
  615. int ch = ' ';
  616. int chPrevNonWhite = ' ';
  617. // look back to set chPrevNonWhite properly for better regex colouring
  618. if (scriptLanguage == eScriptJS && startPos > 0) {
  619. int back = startPos;
  620. int style = 0;
  621. while (--back) {
  622. style = styler.StyleAt(back);
  623. if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
  624. // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
  625. break;
  626. }
  627. if (style == SCE_HJ_SYMBOLS) {
  628. chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
  629. }
  630. }
  631. styler.StartSegment(startPos);
  632. const int lengthDoc = startPos + length;
  633. for (int i = startPos; i < lengthDoc; i++) {
  634. const int chPrev2 = chPrev;
  635. chPrev = ch;
  636. if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
  637. state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
  638. chPrevNonWhite = ch;
  639. ch = static_cast<unsigned char>(styler[i]);
  640. int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  641. const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
  642. // Handle DBCS codepages
  643. if (styler.IsLeadByte(static_cast<char>(ch))) {
  644. chPrev = ' ';
  645. i += 1;
  646. continue;
  647. }
  648. if ((!IsASpace(ch) || !foldCompact) && fold)
  649. visibleChars++;
  650. if (!IsASpace(ch))
  651. lineStartVisibleChars++;
  652. // decide what is the current state to print (depending of the script tag)
  653. StateToPrint = statePrintForState(state, inScriptType);
  654. // handle script folding
  655. if (fold) {
  656. switch (scriptLanguage) {
  657. case eScriptJS:
  658. case eScriptPHP:
  659. //not currently supported case eScriptVBS:
  660. if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
  661. //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
  662. //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
  663. if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
  664. levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
  665. }
  666. } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
  667. levelCurrent--;
  668. }
  669. break;
  670. case eScriptPython:
  671. if (state != SCE_HP_COMMENTLINE) {
  672. if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
  673. levelCurrent++;
  674. } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
  675. // check if the number of tabs is lower than the level
  676. int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
  677. for (int j = 0; Findlevel > 0; j++) {
  678. char chTmp = styler.SafeGetCharAt(i + j + 1);
  679. if (chTmp == '\t') {
  680. Findlevel -= 8;
  681. } else if (chTmp == ' ') {
  682. Findlevel--;
  683. } else {
  684. break;
  685. }
  686. }
  687. if (Findlevel > 0) {
  688. levelCurrent -= Findlevel / 8;
  689. if (Findlevel % 8)
  690. levelCurrent--;
  691. }
  692. }
  693. }
  694. break;
  695. default:
  696. break;
  697. }
  698. }
  699. if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
  700. // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
  701. // Avoid triggering two times on Dos/Win
  702. // New line -> record any line state onto /next/ line
  703. if (fold) {
  704. int lev = levelPrev;
  705. if (visibleChars == 0)
  706. lev |= SC_FOLDLEVELWHITEFLAG;
  707. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  708. lev |= SC_FOLDLEVELHEADERFLAG;
  709. styler.SetLevel(lineCurrent, lev);
  710. visibleChars = 0;
  711. levelPrev = levelCurrent;
  712. }
  713. lineCurrent++;
  714. lineStartVisibleChars = 0;
  715. styler.SetLineState(lineCurrent,
  716. ((inScriptType & 0x03) << 0) |
  717. ((tagOpened & 0x01) << 2) |
  718. ((tagClosing & 0x01) << 3) |
  719. ((aspScript & 0x0F) << 4) |
  720. ((clientScript & 0x0F) << 8) |
  721. ((beforePreProc & 0xFF) << 12));
  722. }
  723. // Allow falling through to mako handling code if newline is going to end a block
  724. if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
  725. (!isMako || (0 != strcmp(makoBlockType, "%")))) {
  726. }
  727. // generic end of script processing
  728. else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
  729. // Check if it's the end of the script tag (or any other HTML tag)
  730. switch (state) {
  731. // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
  732. case SCE_H_DOUBLESTRING:
  733. case SCE_H_SINGLESTRING:
  734. case SCE_HJ_COMMENT:
  735. case SCE_HJ_COMMENTDOC:
  736. //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
  737. // the end of script marker from some JS interpreters.
  738. case SCE_HB_COMMENTLINE:
  739. case SCE_HBA_COMMENTLINE:
  740. case SCE_HJ_DOUBLESTRING:
  741. case SCE_HJ_SINGLESTRING:
  742. case SCE_HJ_REGEX:
  743. case SCE_HB_STRING:
  744. case SCE_HBA_STRING:
  745. case SCE_HP_STRING:
  746. case SCE_HP_TRIPLE:
  747. case SCE_HP_TRIPLEDOUBLE:
  748. case SCE_HPHP_HSTRING:
  749. case SCE_HPHP_SIMPLESTRING:
  750. case SCE_HPHP_COMMENT:
  751. case SCE_HPHP_COMMENTLINE:
  752. break;
  753. default :
  754. // check if the closing tag is a script tag
  755. if (const char *tag =
  756. state == SCE_HJ_COMMENTLINE || isXml ? "script" :
  757. state == SCE_H_COMMENT ? "comment" : 0) {
  758. int j = i + 2;
  759. int chr;
  760. do {
  761. chr = static_cast<int>(*tag++);
  762. } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
  763. if (chr != 0) break;
  764. }
  765. // closing tag of the script (it's a closing HTML tag anyway)
  766. styler.ColourTo(i - 1, StateToPrint);
  767. state = SCE_H_TAGUNKNOWN;
  768. inScriptType = eHtml;
  769. scriptLanguage = eScriptNone;
  770. clientScript = eScriptJS;
  771. i += 2;
  772. visibleChars += 2;
  773. tagClosing = true;
  774. continue;
  775. }
  776. }
  777. /////////////////////////////////////
  778. // handle the start of PHP pre-processor = Non-HTML
  779. else if ((state != SCE_H_ASPAT) &&
  780. !isPHPStringState(state) &&
  781. (state != SCE_HPHP_COMMENT) &&
  782. (state != SCE_HPHP_COMMENTLINE) &&
  783. (ch == '<') &&
  784. (chNext == '?') &&
  785. !IsScriptCommentState(state) ) {
  786. scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
  787. if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
  788. styler.ColourTo(i - 1, StateToPrint);
  789. beforePreProc = state;
  790. i++;
  791. visibleChars++;
  792. i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
  793. if (scriptLanguage == eScriptXML)
  794. styler.ColourTo(i, SCE_H_XMLSTART);
  795. else
  796. styler.ColourTo(i, SCE_H_QUESTION);
  797. state = StateForScript(scriptLanguage);
  798. if (inScriptType == eNonHtmlScript)
  799. inScriptType = eNonHtmlScriptPreProc;
  800. else
  801. inScriptType = eNonHtmlPreProc;
  802. // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
  803. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  804. levelCurrent++;
  805. }
  806. // should be better
  807. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  808. continue;
  809. }
  810. // handle the start Mako template Python code
  811. else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
  812. (lineStartVisibleChars == 1 && ch == '%') ||
  813. (ch == '$' && chNext == '{') ||
  814. (ch == '<' && chNext == '/' && chNext2 == '%'))) {
  815. if (ch == '%')
  816. strcpy(makoBlockType, "%");
  817. else if (ch == '$')
  818. strcpy(makoBlockType, "{");
  819. else if (chNext == '/')
  820. GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
  821. else
  822. GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
  823. styler.ColourTo(i - 1, StateToPrint);
  824. beforePreProc = state;
  825. if (inScriptType == eNonHtmlScript)
  826. inScriptType = eNonHtmlScriptPreProc;
  827. else
  828. inScriptType = eNonHtmlPreProc;
  829. if (chNext == '/') {
  830. i += 2;
  831. visibleChars += 2;
  832. } else if (ch != '%') {
  833. i++;
  834. visibleChars++;
  835. }
  836. state = SCE_HP_START;
  837. scriptLanguage = eScriptPython;
  838. styler.ColourTo(i, SCE_H_ASP);
  839. if (foldHTMLPreprocessor && ch == '<')
  840. levelCurrent++;
  841. if (ch != '%' && ch != '$') {
  842. i += strlen(makoBlockType);
  843. visibleChars += strlen(makoBlockType);
  844. if (keywords4.InList(makoBlockType))
  845. styler.ColourTo(i, SCE_HP_WORD);
  846. else
  847. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  848. }
  849. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  850. continue;
  851. }
  852. // handle the start Django template code
  853. else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' || chNext == '{'))) {
  854. if (chNext == '%')
  855. strcpy(djangoBlockType, "%");
  856. else
  857. strcpy(djangoBlockType, "{");
  858. styler.ColourTo(i - 1, StateToPrint);
  859. beforePreProc = state;
  860. if (inScriptType == eNonHtmlScript)
  861. inScriptType = eNonHtmlScriptPreProc;
  862. else
  863. inScriptType = eNonHtmlPreProc;
  864. i += 1;
  865. visibleChars += 1;
  866. state = SCE_HP_START;
  867. beforeLanguage = scriptLanguage;
  868. scriptLanguage = eScriptPython;
  869. styler.ColourTo(i, SCE_H_ASP);
  870. if (foldHTMLPreprocessor && chNext == '%')
  871. levelCurrent++;
  872. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  873. continue;
  874. }
  875. // handle the start of ASP pre-processor = Non-HTML
  876. else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
  877. styler.ColourTo(i - 1, StateToPrint);
  878. beforePreProc = state;
  879. if (inScriptType == eNonHtmlScript)
  880. inScriptType = eNonHtmlScriptPreProc;
  881. else
  882. inScriptType = eNonHtmlPreProc;
  883. if (chNext2 == '@') {
  884. i += 2; // place as if it was the second next char treated
  885. visibleChars += 2;
  886. state = SCE_H_ASPAT;
  887. } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
  888. styler.ColourTo(i + 3, SCE_H_ASP);
  889. state = SCE_H_XCCOMMENT;
  890. scriptLanguage = eScriptVBS;
  891. continue;
  892. } else {
  893. if (chNext2 == '=') {
  894. i += 2; // place as if it was the second next char treated
  895. visibleChars += 2;
  896. } else {
  897. i++; // place as if it was the next char treated
  898. visibleChars++;
  899. }
  900. state = StateForScript(aspScript);
  901. }
  902. scriptLanguage = eScriptVBS;
  903. styler.ColourTo(i, SCE_H_ASP);
  904. // fold whole script
  905. if (foldHTMLPreprocessor)
  906. levelCurrent++;
  907. // should be better
  908. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  909. continue;
  910. }
  911. /////////////////////////////////////
  912. // handle the start of SGML language (DTD)
  913. else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
  914. (chPrev == '<') &&
  915. (ch == '!') &&
  916. (StateToPrint != SCE_H_CDATA) &&
  917. (!IsCommentState(StateToPrint)) &&
  918. (!IsScriptCommentState(StateToPrint)) ) {
  919. beforePreProc = state;
  920. styler.ColourTo(i - 2, StateToPrint);
  921. if ((chNext == '-') && (chNext2 == '-')) {
  922. state = SCE_H_COMMENT; // wait for a pending command
  923. styler.ColourTo(i + 2, SCE_H_COMMENT);
  924. i += 2; // follow styling after the --
  925. } else if (isWordCdata(i + 1, i + 7, styler)) {
  926. state = SCE_H_CDATA;
  927. } else {
  928. styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
  929. scriptLanguage = eScriptSGML;
  930. state = SCE_H_SGML_COMMAND; // wait for a pending command
  931. }
  932. // fold whole tag (-- when closing the tag)
  933. if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
  934. levelCurrent++;
  935. continue;
  936. }
  937. // handle the end of Mako Python code
  938. else if (isMako &&
  939. ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  940. (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
  941. isMakoBlockEnd(ch, chNext, makoBlockType)) {
  942. if (state == SCE_H_ASPAT) {
  943. aspScript = segIsScriptingIndicator(styler,
  944. styler.GetStartSegment(), i - 1, aspScript);
  945. }
  946. if (state == SCE_HP_WORD) {
  947. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  948. } else {
  949. styler.ColourTo(i - 1, StateToPrint);
  950. }
  951. if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
  952. i++;
  953. visibleChars++;
  954. }
  955. if (0 != strcmp(makoBlockType, "%")) {
  956. styler.ColourTo(i, SCE_H_ASP);
  957. }
  958. state = beforePreProc;
  959. if (inScriptType == eNonHtmlScriptPreProc)
  960. inScriptType = eNonHtmlScript;
  961. else
  962. inScriptType = eHtml;
  963. if (foldHTMLPreprocessor && ch != '\n' && ch != '\r') {
  964. levelCurrent--;
  965. }
  966. scriptLanguage = eScriptNone;
  967. continue;
  968. }
  969. // handle the end of Django template code
  970. else if (isDjango &&
  971. ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  972. (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
  973. isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
  974. if (state == SCE_H_ASPAT) {
  975. aspScript = segIsScriptingIndicator(styler,
  976. styler.GetStartSegment(), i - 1, aspScript);
  977. }
  978. if (state == SCE_HP_WORD) {
  979. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  980. } else {
  981. styler.ColourTo(i - 1, StateToPrint);
  982. }
  983. i += 1;
  984. visibleChars += 1;
  985. styler.ColourTo(i, SCE_H_ASP);
  986. state = beforePreProc;
  987. if (inScriptType == eNonHtmlScriptPreProc)
  988. inScriptType = eNonHtmlScript;
  989. else
  990. inScriptType = eHtml;
  991. if (foldHTMLPreprocessor) {
  992. levelCurrent--;
  993. }
  994. scriptLanguage = beforeLanguage;
  995. continue;
  996. }
  997. // handle the end of a pre-processor = Non-HTML
  998. else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  999. (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
  1000. (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
  1001. ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
  1002. if (state == SCE_H_ASPAT) {
  1003. aspScript = segIsScriptingIndicator(styler,
  1004. styler.GetStartSegment(), i - 1, aspScript);
  1005. }
  1006. // Bounce out of any ASP mode
  1007. switch (state) {
  1008. case SCE_HJ_WORD:
  1009. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  1010. break;
  1011. case SCE_HB_WORD:
  1012. classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  1013. break;
  1014. case SCE_HP_WORD:
  1015. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  1016. break;
  1017. case SCE_HPHP_WORD:
  1018. classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
  1019. break;
  1020. case SCE_H_XCCOMMENT:
  1021. styler.ColourTo(i - 1, state);
  1022. break;
  1023. default :
  1024. styler.ColourTo(i - 1, StateToPrint);
  1025. break;
  1026. }
  1027. if (scriptLanguage != eScriptSGML) {
  1028. i++;
  1029. visibleChars++;
  1030. }
  1031. if (ch == '%')
  1032. styler.ColourTo(i, SCE_H_ASP);
  1033. else if (scriptLanguage == eScriptXML)
  1034. styler.ColourTo(i, SCE_H_XMLEND);
  1035. else if (scriptLanguage == eScriptSGML)
  1036. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1037. else
  1038. styler.ColourTo(i, SCE_H_QUESTION);
  1039. state = beforePreProc;
  1040. if (inScriptType == eNonHtmlScriptPreProc)
  1041. inScriptType = eNonHtmlScript;
  1042. else
  1043. inScriptType = eHtml;
  1044. // Unfold all scripting languages, except for XML tag
  1045. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  1046. levelCurrent--;
  1047. }
  1048. scriptLanguage = eScriptNone;
  1049. continue;
  1050. }
  1051. /////////////////////////////////////
  1052. switch (state) {
  1053. case SCE_H_DEFAULT:
  1054. if (ch == '<') {
  1055. // in HTML, fold on tag open and unfold on tag close
  1056. tagOpened = true;
  1057. tagClosing = (chNext == '/');
  1058. styler.ColourTo(i - 1, StateToPrint);
  1059. if (chNext != '!')
  1060. state = SCE_H_TAGUNKNOWN;
  1061. } else if (ch == '&') {
  1062. styler.ColourTo(i - 1, SCE_H_DEFAULT);
  1063. state = SCE_H_ENTITY;
  1064. }
  1065. break;
  1066. case SCE_H_SGML_DEFAULT:
  1067. case SCE_H_SGML_BLOCK_DEFAULT:
  1068. // if (scriptLanguage == eScriptSGMLblock)
  1069. // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
  1070. if (ch == '\"') {
  1071. styler.ColourTo(i - 1, StateToPrint);
  1072. state = SCE_H_SGML_DOUBLESTRING;
  1073. } else if (ch == '\'') {
  1074. styler.ColourTo(i - 1, StateToPrint);
  1075. state = SCE_H_SGML_SIMPLESTRING;
  1076. } else if ((ch == '-') && (chPrev == '-')) {
  1077. if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
  1078. styler.ColourTo(i - 2, StateToPrint);
  1079. }
  1080. state = SCE_H_SGML_COMMENT;
  1081. } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
  1082. styler.ColourTo(i - 2, StateToPrint);
  1083. state = SCE_H_SGML_ENTITY;
  1084. } else if (ch == '#') {
  1085. styler.ColourTo(i - 1, StateToPrint);
  1086. state = SCE_H_SGML_SPECIAL;
  1087. } else if (ch == '[') {
  1088. styler.ColourTo(i - 1, StateToPrint);
  1089. scriptLanguage = eScriptSGMLblock;
  1090. state = SCE_H_SGML_BLOCK_DEFAULT;
  1091. } else if (ch == ']') {
  1092. if (scriptLanguage == eScriptSGMLblock) {
  1093. styler.ColourTo(i, StateToPrint);
  1094. scriptLanguage = eScriptSGML;
  1095. } else {
  1096. styler.ColourTo(i - 1, StateToPrint);
  1097. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1098. }
  1099. state = SCE_H_SGML_DEFAULT;
  1100. } else if (scriptLanguage == eScriptSGMLblock) {
  1101. if ((ch == '!') && (chPrev == '<')) {
  1102. styler.ColourTo(i - 2, StateToPrint);
  1103. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1104. state = SCE_H_SGML_COMMAND;
  1105. } else if (ch == '>') {
  1106. styler.ColourTo(i - 1, StateToPrint);
  1107. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1108. }
  1109. }
  1110. break;
  1111. case SCE_H_SGML_COMMAND:
  1112. if ((ch == '-') && (chPrev == '-')) {
  1113. styler.ColourTo(i - 2, StateToPrint);
  1114. state = SCE_H_SGML_COMMENT;
  1115. } else if (!issgmlwordchar(ch)) {
  1116. if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
  1117. styler.ColourTo(i - 1, StateToPrint);
  1118. state = SCE_H_SGML_1ST_PARAM;
  1119. } else {
  1120. state = SCE_H_SGML_ERROR;
  1121. }
  1122. }
  1123. break;
  1124. case SCE_H_SGML_1ST_PARAM:
  1125. // wait for the beginning of the word
  1126. if ((ch == '-') && (chPrev == '-')) {
  1127. if (scriptLanguage == eScriptSGMLblock) {
  1128. styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
  1129. } else {
  1130. styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
  1131. }
  1132. state = SCE_H_SGML_1ST_PARAM_COMMENT;
  1133. } else if (issgmlwordchar(ch)) {
  1134. if (scriptLanguage == eScriptSGMLblock) {
  1135. styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
  1136. } else {
  1137. styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
  1138. }
  1139. // find the length of the word
  1140. int size = 1;
  1141. while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
  1142. size++;
  1143. styler.ColourTo(i + size - 1, StateToPrint);
  1144. i += size - 1;
  1145. visibleChars += size - 1;
  1146. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  1147. if (scriptLanguage == eScriptSGMLblock) {
  1148. state = SCE_H_SGML_BLOCK_DEFAULT;
  1149. } else {
  1150. state = SCE_H_SGML_DEFAULT;
  1151. }
  1152. continue;
  1153. }
  1154. break;
  1155. case SCE_H_SGML_ERROR:
  1156. if ((ch == '-') && (chPrev == '-')) {
  1157. styler.ColourTo(i - 2, StateToPrint);
  1158. state = SCE_H_SGML_COMMENT;
  1159. }
  1160. case SCE_H_SGML_DOUBLESTRING:
  1161. if (ch == '\"') {
  1162. styler.ColourTo(i, StateToPrint);
  1163. state = SCE_H_SGML_DEFAULT;
  1164. }
  1165. break;
  1166. case SCE_H_SGML_SIMPLESTRING:
  1167. if (ch == '\'') {
  1168. styler.ColourTo(i, StateToPrint);
  1169. state = SCE_H_SGML_DEFAULT;
  1170. }
  1171. break;
  1172. case SCE_H_SGML_COMMENT:
  1173. if ((ch == '-') && (chPrev == '-')) {
  1174. styler.ColourTo(i, StateToPrint);
  1175. state = SCE_H_SGML_DEFAULT;
  1176. }
  1177. break;
  1178. case SCE_H_CDATA:
  1179. if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
  1180. styler.ColourTo(i, StateToPrint);
  1181. state = SCE_H_DEFAULT;
  1182. levelCurrent--;
  1183. }
  1184. break;
  1185. case SCE_H_COMMENT:
  1186. if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
  1187. styler.ColourTo(i, StateToPrint);
  1188. state = SCE_H_DEFAULT;
  1189. levelCurrent--;
  1190. }
  1191. break;
  1192. case SCE_H_SGML_1ST_PARAM_COMMENT:
  1193. if ((ch == '-') && (chPrev == '-')) {
  1194. styler.ColourTo(i, SCE_H_SGML_COMMENT);
  1195. state = SCE_H_SGML_1ST_PARAM;
  1196. }
  1197. break;
  1198. case SCE_H_SGML_SPECIAL:
  1199. if (!(isascii(ch) && isupper(ch))) {
  1200. styler.ColourTo(i - 1, StateToPrint);
  1201. if (isalnum(ch)) {
  1202. state = SCE_H_SGML_ERROR;
  1203. } else {
  1204. state = SCE_H_SGML_DEFAULT;
  1205. }
  1206. }
  1207. break;
  1208. case SCE_H_SGML_ENTITY:
  1209. if (ch == ';') {
  1210. styler.ColourTo(i, StateToPrint);
  1211. state = SCE_H_SGML_DEFAULT;
  1212. } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
  1213. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1214. state = SCE_H_SGML_DEFAULT;
  1215. }
  1216. break;
  1217. case SCE_H_ENTITY:
  1218. if (ch == ';') {
  1219. styler.ColourTo(i, StateToPrint);
  1220. state = SCE_H_DEFAULT;
  1221. }
  1222. if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
  1223. && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
  1224. if (!isascii(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
  1225. styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
  1226. else
  1227. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  1228. state = SCE_H_DEFAULT;
  1229. }
  1230. break;
  1231. case SCE_H_TAGUNKNOWN:
  1232. if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
  1233. int eClass = classifyTagHTML(styler.GetStartSegment(),
  1234. i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
  1235. if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
  1236. if (!tagClosing) {
  1237. inScriptType = eNonHtmlScript;
  1238. scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
  1239. } else {
  1240. scriptLanguage = eScriptNone;
  1241. }
  1242. eClass = SCE_H_TAG;
  1243. }
  1244. if (ch == '>') {
  1245. styler.ColourTo(i, eClass);
  1246. if (inScriptType == eNonHtmlScript) {
  1247. state = StateForScript(scriptLanguage);
  1248. } else {
  1249. state = SCE_H_DEFAULT;
  1250. }
  1251. tagOpened = false;
  1252. if (!tagDontFold) {
  1253. if (tagClosing) {
  1254. levelCurrent--;
  1255. } else {
  1256. levelCurrent++;
  1257. }
  1258. }
  1259. tagClosing = false;
  1260. } else if (ch == '/' && chNext == '>') {
  1261. if (eClass == SCE_H_TAGUNKNOWN) {
  1262. styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
  1263. } else {
  1264. styler.ColourTo(i - 1, StateToPrint);
  1265. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1266. }
  1267. i++;
  1268. ch = chNext;
  1269. state = SCE_H_DEFAULT;
  1270. tagOpened = false;
  1271. } else {
  1272. if (eClass != SCE_H_TAGUNKNOWN) {
  1273. if (eClass == SCE_H_SGML_DEFAULT) {
  1274. state = SCE_H_SGML_DEFAULT;
  1275. } else {
  1276. state = SCE_H_OTHER;
  1277. }
  1278. }
  1279. }
  1280. }
  1281. break;
  1282. case SCE_H_ATTRIBUTE:
  1283. if (!setAttributeContinue.Contains(ch)) {
  1284. if (inScriptType == eNonHtmlScript) {
  1285. int scriptLanguagePrev = scriptLanguage;
  1286. clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
  1287. scriptLanguage = clientScript;
  1288. if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
  1289. inScriptType = eHtml;
  1290. }
  1291. classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
  1292. if (ch == '>') {
  1293. styler.ColourTo(i, SCE_H_TAG);
  1294. if (inScriptType == eNonHtmlScript) {
  1295. state = StateForScript(scriptLanguage);
  1296. } else {
  1297. state = SCE_H_DEFAULT;
  1298. }
  1299. tagOpened = false;
  1300. if (!tagDontFold) {
  1301. if (tagClosing) {
  1302. levelCurrent--;
  1303. } else {
  1304. levelCurrent++;
  1305. }
  1306. }
  1307. tagClosing = false;
  1308. } else if (ch == '=') {
  1309. styler.ColourTo(i, SCE_H_OTHER);
  1310. state = SCE_H_VALUE;
  1311. } else {
  1312. state = SCE_H_OTHER;
  1313. }
  1314. }
  1315. break;
  1316. case SCE_H_OTHER:
  1317. if (ch == '>') {
  1318. styler.ColourTo(i - 1, StateToPrint);
  1319. styler.ColourTo(i, SCE_H_TAG);
  1320. if (inScriptType == eNonHtmlScript) {
  1321. state = StateForScript(scriptLanguage);
  1322. } else {
  1323. state = SCE_H_DEFAULT;
  1324. }
  1325. tagOpened = false;
  1326. if (!tagDontFold) {
  1327. if (tagClosing) {
  1328. levelCurrent--;
  1329. } else {
  1330. levelCurrent++;
  1331. }
  1332. }
  1333. tagClosing = false;
  1334. } else if (ch == '\"') {
  1335. styler.ColourTo(i - 1, StateToPrint);
  1336. state = SCE_H_DOUBLESTRING;
  1337. } else if (ch == '\'') {
  1338. styler.ColourTo(i - 1, StateToPrint);
  1339. state = SCE_H_SINGLESTRING;
  1340. } else if (ch == '=') {
  1341. styler.ColourTo(i, StateToPrint);
  1342. state = SCE_H_VALUE;
  1343. } else if (ch == '/' && chNext == '>') {
  1344. styler.ColourTo(i - 1, StateToPrint);
  1345. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1346. i++;
  1347. ch = chNext;
  1348. state = SCE_H_DEFAULT;
  1349. tagOpened = false;
  1350. } else if (ch == '?' && chNext == '>') {
  1351. styler.ColourTo(i - 1, StateToPrint);
  1352. styler.ColourTo(i + 1, SCE_H_XMLEND);
  1353. i++;
  1354. ch = chNext;
  1355. state = SCE_H_DEFAULT;
  1356. } else if (setHTMLWord.Contains(ch)) {
  1357. styler.ColourTo(i - 1, StateToPrint);
  1358. state = SCE_H_ATTRIBUTE;
  1359. }
  1360. break;
  1361. case SCE_H_DOUBLESTRING:
  1362. if (ch == '\"') {
  1363. if (inScriptType == eNonHtmlScript) {
  1364. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1365. }
  1366. styler.ColourTo(i, SCE_H_DOUBLESTRING);
  1367. state = SCE_H_OTHER;
  1368. }
  1369. break;
  1370. case SCE_H_SINGLESTRING:
  1371. if (ch == '\'') {
  1372. if (inScriptType == eNonHtmlScript) {
  1373. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1374. }
  1375. styler.ColourTo(i, SCE_H_SINGLESTRING);
  1376. state = SCE_H_OTHER;
  1377. }
  1378. break;
  1379. case SCE_H_VALUE:
  1380. if (!setHTMLWord.Contains(ch)) {
  1381. if (ch == '\"' && chPrev == '=') {
  1382. // Should really test for being first character
  1383. state = SCE_H_DOUBLESTRING;
  1384. } else if (ch == '\'' && chPrev == '=') {
  1385. state = SCE_H_SINGLESTRING;
  1386. } else {
  1387. if (IsNumber(styler.GetStartSegment(), styler)) {
  1388. styler.ColourTo(i - 1, SCE_H_NUMBER);
  1389. } else {
  1390. styler.ColourTo(i - 1, StateToPrint);
  1391. }
  1392. if (ch == '>') {
  1393. styler.ColourTo(i, SCE_H_TAG);
  1394. if (inScriptType == eNonHtmlScript) {
  1395. state = StateForScript(scriptLanguage);
  1396. } else {
  1397. state = SCE_H_DEFAULT;
  1398. }
  1399. tagOpened = false;
  1400. if (!tagDontFold) {
  1401. if (tagClosing) {
  1402. levelCurrent--;
  1403. } else {
  1404. levelCurrent++;
  1405. }
  1406. }
  1407. tagClosing = false;
  1408. } else {
  1409. state = SCE_H_OTHER;
  1410. }
  1411. }
  1412. }
  1413. break;
  1414. case SCE_HJ_DEFAULT:
  1415. case SCE_HJ_START:
  1416. case SCE_HJ_SYMBOLS:
  1417. if (IsAWordStart(ch)) {
  1418. styler.ColourTo(i - 1, StateToPrint);
  1419. state = SCE_HJ_WORD;
  1420. } else if (ch == '/' && chNext == '*') {
  1421. styler.ColourTo(i - 1, StateToPrint);
  1422. if (chNext2 == '*')
  1423. state = SCE_HJ_COMMENTDOC;
  1424. else
  1425. state = SCE_HJ_COMMENT;
  1426. } else if (ch == '/' && chNext == '/') {
  1427. styler.ColourTo(i - 1, StateToPrint);
  1428. state = SCE_HJ_COMMENTLINE;
  1429. } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
  1430. styler.ColourTo(i - 1, StateToPrint);
  1431. state = SCE_HJ_REGEX;
  1432. } else if (ch == '\"') {
  1433. styler.ColourTo(i - 1, StateToPrint);
  1434. state = SCE_HJ_DOUBLESTRING;
  1435. } else if (ch == '\'') {
  1436. styler.ColourTo(i - 1, StateToPrint);
  1437. state = SCE_HJ_SINGLESTRING;
  1438. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1439. styler.SafeGetCharAt(i + 3) == '-') {
  1440. styler.ColourTo(i - 1, StateToPrint);
  1441. state = SCE_HJ_COMMENTLINE;
  1442. } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1443. styler.ColourTo(i - 1, StateToPrint);
  1444. state = SCE_HJ_COMMENTLINE;
  1445. i += 2;
  1446. } else if (IsOperator(ch)) {
  1447. styler.ColourTo(i - 1, StateToPrint);
  1448. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1449. state = SCE_HJ_DEFAULT;
  1450. } else if ((ch == ' ') || (ch == '\t')) {
  1451. if (state == SCE_HJ_START) {
  1452. styler.ColourTo(i - 1, StateToPrint);
  1453. state = SCE_HJ_DEFAULT;
  1454. }
  1455. }
  1456. break;
  1457. case SCE_HJ_WORD:
  1458. if (!IsAWordChar(ch)) {
  1459. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  1460. //styler.ColourTo(i - 1, eHTJSKeyword);
  1461. state = SCE_HJ_DEFAULT;
  1462. if (ch == '/' && chNext == '*') {
  1463. if (chNext2 == '*')
  1464. state = SCE_HJ_COMMENTDOC;
  1465. else
  1466. state = SCE_HJ_COMMENT;
  1467. } else if (ch == '/' && chNext == '/') {
  1468. state = SCE_HJ_COMMENTLINE;
  1469. } else if (ch == '\"') {
  1470. state = SCE_HJ_DOUBLESTRING;
  1471. } else if (ch == '\'') {
  1472. state = SCE_HJ_SINGLESTRING;
  1473. } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1474. styler.ColourTo(i - 1, StateToPrint);
  1475. state = SCE_HJ_COMMENTLINE;
  1476. i += 2;
  1477. } else if (IsOperator(ch)) {
  1478. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1479. state = SCE_HJ_DEFAULT;
  1480. }
  1481. }
  1482. break;
  1483. case SCE_HJ_COMMENT:
  1484. case SCE_HJ_COMMENTDOC:
  1485. if (ch == '/' && chPrev == '*') {
  1486. styler.ColourTo(i, StateToPrint);
  1487. state = SCE_HJ_DEFAULT;
  1488. ch = ' ';
  1489. }
  1490. break;
  1491. case SCE_HJ_COMMENTLINE:
  1492. if (ch == '\r' || ch == '\n') {
  1493. styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
  1494. state = SCE_HJ_DEFAULT;
  1495. ch = ' ';
  1496. }
  1497. break;
  1498. case SCE_HJ_DOUBLESTRING:
  1499. if (ch == '\\') {
  1500. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1501. i++;
  1502. }
  1503. } else if (ch == '\"') {
  1504. styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
  1505. state = SCE_HJ_DEFAULT;
  1506. } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1507. styler.ColourTo(i - 1, StateToPrint);
  1508. state = SCE_HJ_COMMENTLINE;
  1509. i += 2;
  1510. } else if (isLineEnd(ch)) {
  1511. styler.ColourTo(i - 1, StateToPrint);
  1512. state = SCE_HJ_STRINGEOL;
  1513. }
  1514. break;
  1515. case SCE_HJ_SINGLESTRING:
  1516. if (ch == '\\') {
  1517. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1518. i++;
  1519. }
  1520. } else if (ch == '\'') {
  1521. styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
  1522. state = SCE_HJ_DEFAULT;
  1523. } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1524. styler.ColourTo(i - 1, StateToPrint);
  1525. state = SCE_HJ_COMMENTLINE;
  1526. i += 2;
  1527. } else if (isLineEnd(ch)) {
  1528. styler.ColourTo(i - 1, StateToPrint);
  1529. state = SCE_HJ_STRINGEOL;
  1530. }
  1531. break;
  1532. case SCE_HJ_STRINGEOL:
  1533. if (!isLineEnd(ch)) {
  1534. styler.ColourTo(i - 1, StateToPrint);
  1535. state = SCE_HJ_DEFAULT;
  1536. } else if (!isLineEnd(chNext)) {
  1537. styler.ColourTo(i, StateToPrint);
  1538. state = SCE_HJ_DEFAULT;
  1539. }
  1540. break;
  1541. case SCE_HJ_REGEX:
  1542. if (ch == '\r' || ch == '\n' || ch == '/') {
  1543. if (ch == '/') {
  1544. while (isascii(chNext) && islower(chNext)) { // gobble regex flags
  1545. i++;
  1546. ch = chNext;
  1547. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1548. }
  1549. }
  1550. styler.ColourTo(i, StateToPrint);
  1551. state = SCE_HJ_DEFAULT;
  1552. } else if (ch == '\\') {
  1553. // Gobble up the quoted character
  1554. if (chNext == '\\' || chNext == '/') {
  1555. i++;
  1556. ch = chNext;
  1557. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1558. }
  1559. }
  1560. break;
  1561. case SCE_HB_DEFAULT:
  1562. case SCE_HB_START:
  1563. if (IsAWordStart(ch)) {
  1564. styler.ColourTo(i - 1, StateToPrint);
  1565. state = SCE_HB_WORD;
  1566. } else if (ch == '\'') {
  1567. styler.ColourTo(i - 1, StateToPrint);
  1568. state = SCE_HB_COMMENTLINE;
  1569. } else if (ch == '\"') {
  1570. styler.ColourTo(i - 1, StateToPrint);
  1571. state = SCE_HB_STRING;
  1572. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1573. styler.SafeGetCharAt(i + 3) == '-') {
  1574. styler.ColourTo(i - 1, StateToPrint);
  1575. state = SCE_HB_COMMENTLINE;
  1576. } else if (IsOperator(ch)) {
  1577. styler.ColourTo(i - 1, StateToPrint);
  1578. styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
  1579. state = SCE_HB_DEFAULT;
  1580. } else if ((ch == ' ') || (ch == '\t')) {
  1581. if (state == SCE_HB_START) {
  1582. styler.ColourTo(i - 1, StateToPrint);
  1583. state = SCE_HB_DEFAULT;
  1584. }
  1585. }
  1586. break;
  1587. case SCE_HB_WORD:
  1588. if (!IsAWordChar(ch)) {
  1589. state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  1590. if (state == SCE_HB_DEFAULT) {
  1591. if (ch == '\"') {
  1592. state = SCE_HB_STRING;
  1593. } else if (ch == '\'') {
  1594. state = SCE_HB_COMMENTLINE;
  1595. } else if (IsOperator(ch)) {
  1596. styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
  1597. state = SCE_HB_DEFAULT;
  1598. }
  1599. }
  1600. }
  1601. break;
  1602. case SCE_HB_STRING:
  1603. if (ch == '\"') {
  1604. styler.ColourTo(i, StateToPrint);
  1605. state = SCE_HB_DEFAULT;
  1606. } else if (ch == '\r' || ch == '\n') {
  1607. styler.ColourTo(i - 1, StateToPrint);
  1608. state = SCE_HB_STRINGEOL;
  1609. }
  1610. break;
  1611. case SCE_HB_COMMENTLINE:
  1612. if (ch == '\r' || ch == '\n') {
  1613. styler.ColourTo(i - 1, StateToPrint);
  1614. state = SCE_HB_DEFAULT;
  1615. }
  1616. break;
  1617. case SCE_HB_STRINGEOL:
  1618. if (!isLineEnd(ch)) {
  1619. styler.ColourTo(i - 1, StateToPrint);
  1620. state = SCE_HB_DEFAULT;
  1621. } else if (!isLineEnd(chNext)) {
  1622. styler.ColourTo(i, StateToPrint);
  1623. state = SCE_HB_DEFAULT;
  1624. }
  1625. break;
  1626. case SCE_HP_DEFAULT:
  1627. case SCE_HP_START:
  1628. if (IsAWordStart(ch)) {
  1629. styler.ColourTo(i - 1, StateToPrint);
  1630. state = SCE_HP_WORD;
  1631. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1632. styler.SafeGetCharAt(i + 3) == '-') {
  1633. styler.ColourTo(i - 1, StateToPrint);
  1634. state = SCE_HP_COMMENTLINE;
  1635. } else if (ch == '#') {
  1636. styler.ColourTo(i - 1, StateToPrint);
  1637. state = SCE_HP_COMMENTLINE;
  1638. } else if (ch == '\"') {
  1639. styler.ColourTo(i - 1, StateToPrint);
  1640. if (chNext == '\"' && chNext2 == '\"') {
  1641. i += 2;
  1642. state = SCE_HP_TRIPLEDOUBLE;
  1643. ch = ' ';
  1644. chPrev = ' ';
  1645. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1646. } else {
  1647. // state = statePrintForState(SCE_HP_STRING,inScriptType);
  1648. state = SCE_HP_STRING;
  1649. }
  1650. } else if (ch == '\'') {
  1651. styler.ColourTo(i - 1, StateToPrint);
  1652. if (chNext == '\'' && chNext2 == '\'') {
  1653. i += 2;
  1654. state = SCE_HP_TRIPLE;
  1655. ch = ' ';
  1656. chPrev = ' ';
  1657. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1658. } else {
  1659. state = SCE_HP_CHARACTER;
  1660. }
  1661. } else if (IsOperator(ch)) {
  1662. styler.ColourTo(i - 1, StateToPrint);
  1663. styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
  1664. } else if ((ch == ' ') || (ch == '\t')) {
  1665. if (state == SCE_HP_START) {
  1666. styler.ColourTo(i - 1, StateToPrint);
  1667. state = SCE_HP_DEFAULT;
  1668. }
  1669. }
  1670. break;
  1671. case SCE_HP_WORD:
  1672. if (!IsAWordChar(ch)) {
  1673. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  1674. state = SCE_HP_DEFAULT;
  1675. if (ch == '#') {
  1676. state = SCE_HP_COMMENTLINE;
  1677. } else if (ch == '\"') {
  1678. if (chNext == '\"' && chNext2 == '\"') {
  1679. i += 2;
  1680. state = SCE_HP_TRIPLEDOUBLE;
  1681. ch = ' ';
  1682. chPrev = ' ';
  1683. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1684. } else {
  1685. state = SCE_HP_STRING;
  1686. }
  1687. } else if (ch == '\'') {
  1688. if (chNext == '\'' && chNext2 == '\'') {
  1689. i += 2;
  1690. state = SCE_HP_TRIPLE;
  1691. ch = ' ';
  1692. chPrev = ' ';
  1693. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1694. } else {
  1695. state = SCE_HP_CHARACTER;
  1696. }
  1697. } else if (IsOperator(ch)) {
  1698. styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
  1699. }
  1700. }
  1701. break;
  1702. case SCE_HP_COMMENTLINE:
  1703. if (ch == '\r' || ch == '\n') {
  1704. styler.ColourTo(i - 1, StateToPrint);
  1705. state = SCE_HP_DEFAULT;
  1706. }
  1707. break;
  1708. case SCE_HP_STRING:
  1709. if (ch == '\\') {
  1710. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1711. i++;
  1712. ch = chNext;
  1713. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1714. }
  1715. } else if (ch == '\"') {
  1716. styler.ColourTo(i, StateToPrint);
  1717. state = SCE_HP_DEFAULT;
  1718. }
  1719. break;
  1720. case SCE_HP_CHARACTER:
  1721. if (ch == '\\') {
  1722. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1723. i++;
  1724. ch = chNext;
  1725. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1726. }
  1727. } else if (ch == '\'') {
  1728. styler.ColourTo(i, StateToPrint);
  1729. state = SCE_HP_DEFAULT;
  1730. }
  1731. break;
  1732. case SCE_HP_TRIPLE:
  1733. if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
  1734. styler.ColourTo(i, StateToPrint);
  1735. state = SCE_HP_DEFAULT;
  1736. }
  1737. break;
  1738. case SCE_HP_TRIPLEDOUBLE:
  1739. if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
  1740. styler.ColourTo(i, StateToPrint);
  1741. state = SCE_HP_DEFAULT;
  1742. }
  1743. break;
  1744. ///////////// start - PHP state handling
  1745. case SCE_HPHP_WORD:
  1746. if (!IsAWordChar(ch)) {
  1747. classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
  1748. if (ch == '/' && chNext == '*') {
  1749. i++;
  1750. state = SCE_HPHP_COMMENT;
  1751. } else if (ch == '/' && chNext == '/') {
  1752. i++;
  1753. state = SCE_HPHP_COMMENTLINE;
  1754. } else if (ch == '#') {
  1755. state = SCE_HPHP_COMMENTLINE;
  1756. } else if (ch == '\"') {
  1757. state = SCE_HPHP_HSTRING;
  1758. strcpy(phpStringDelimiter, "\"");
  1759. } else if (styler.Match(i, "<<<")) {
  1760. bool isSimpleString = false;
  1761. i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
  1762. if (strlen(phpStringDelimiter)) {
  1763. state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
  1764. if (foldHeredoc) levelCurrent++;
  1765. }
  1766. } else if (ch == '\'') {
  1767. state = SCE_HPHP_SIMPLESTRING;
  1768. strcpy(phpStringDelimiter, "\'");
  1769. } else if (ch == '$' && IsPhpWordStart(chNext)) {
  1770. state = SCE_HPHP_VARIABLE;
  1771. } else if (IsOperator(ch)) {
  1772. state = SCE_HPHP_OPERATOR;
  1773. } else {
  1774. state = SCE_HPHP_DEFAULT;
  1775. }
  1776. }
  1777. break;
  1778. case SCE_HPHP_NUMBER:
  1779. // recognize bases 8,10 or 16 integers OR floating-point numbers
  1780. if (!IsADigit(ch)
  1781. && strchr(".xXabcdefABCDEF", ch) == NULL
  1782. && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
  1783. styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
  1784. if (IsOperator(ch))
  1785. state = SCE_HPHP_OPERATOR;
  1786. else
  1787. state = SCE_HPHP_DEFAULT;
  1788. }
  1789. break;
  1790. case SCE_HPHP_VARIABLE:
  1791. if (!IsPhpWordChar(chNext)) {
  1792. styler.ColourTo(i, SCE_HPHP_VARIABLE);
  1793. state = SCE_HPHP_DEFAULT;
  1794. }
  1795. break;
  1796. case SCE_HPHP_COMMENT:
  1797. if (ch == '/' && chPrev == '*') {
  1798. styler.ColourTo(i, StateToPrint);
  1799. state = SCE_HPHP_DEFAULT;
  1800. }
  1801. break;
  1802. case SCE_HPHP_COMMENTLINE:
  1803. if (ch == '\r' || ch == '\n') {
  1804. styler.ColourTo(i - 1, StateToPrint);
  1805. state = SCE_HPHP_DEFAULT;
  1806. }
  1807. break;
  1808. case SCE_HPHP_HSTRING:
  1809. if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
  1810. // skip the next char
  1811. i++;
  1812. } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
  1813. && IsPhpWordStart(chNext2)) {
  1814. styler.ColourTo(i - 1, StateToPrint);
  1815. state = SCE_HPHP_COMPLEX_VARIABLE;
  1816. } else if (ch == '$' && IsPhpWordStart(chNext)) {
  1817. styler.ColourTo(i - 1, StateToPrint);
  1818. state = SCE_HPHP_HSTRING_VARIABLE;
  1819. } else if (styler.Match(i, phpStringDelimiter)) {
  1820. if (phpStringDelimiter[0] == '\"') {
  1821. styler.ColourTo(i, StateToPrint);
  1822. state = SCE_HPHP_DEFAULT;
  1823. } else if (isLineEnd(chPrev)) {
  1824. const int psdLength = strlen(phpStringDelimiter);
  1825. const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
  1826. const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
  1827. if (isLineEnd(chAfterPsd) ||
  1828. (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
  1829. i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
  1830. styler.ColourTo(i, StateToPrint);
  1831. state = SCE_HPHP_DEFAULT;
  1832. if (foldHeredoc) levelCurrent--;
  1833. }
  1834. }
  1835. }
  1836. break;
  1837. case SCE_HPHP_SIMPLESTRING:
  1838. if (phpStringDelimiter[0] == '\'') {
  1839. if (ch == '\\') {
  1840. // skip the next char
  1841. i++;
  1842. } else if (ch == '\'') {
  1843. styler.ColourTo(i, StateToPrint);
  1844. state = SCE_HPHP_DEFAULT;
  1845. }
  1846. } else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter)) {
  1847. const int psdLength = strlen(phpStringDelimiter);
  1848. const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
  1849. const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
  1850. if (isLineEnd(chAfterPsd) ||
  1851. (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
  1852. i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
  1853. styler.ColourTo(i, StateToPrint);
  1854. state = SCE_HPHP_DEFAULT;
  1855. if (foldHeredoc) levelCurrent--;
  1856. }
  1857. }
  1858. break;
  1859. case SCE_HPHP_HSTRING_VARIABLE:
  1860. if (!IsPhpWordChar(chNext)) {
  1861. styler.ColourTo(i, StateToPrint);
  1862. state = SCE_HPHP_HSTRING;
  1863. }
  1864. break;
  1865. case SCE_HPHP_COMPLEX_VARIABLE:
  1866. if (ch == '}') {
  1867. styler.ColourTo(i, StateToPrint);
  1868. state = SCE_HPHP_HSTRING;
  1869. }
  1870. break;
  1871. case SCE_HPHP_OPERATOR:
  1872. case SCE_HPHP_DEFAULT:
  1873. styler.ColourTo(i - 1, StateToPrint);
  1874. if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
  1875. state = SCE_HPHP_NUMBER;
  1876. } else if (IsAWordStart(ch)) {
  1877. state = SCE_HPHP_WORD;
  1878. } else if (ch == '/' && chNext == '*') {
  1879. i++;
  1880. state = SCE_HPHP_COMMENT;
  1881. } else if (ch == '/' && chNext == '/') {
  1882. i++;
  1883. state = SCE_HPHP_COMMENTLINE;
  1884. } else if (ch == '#') {
  1885. state = SCE_HPHP_COMMENTLINE;
  1886. } else if (ch == '\"') {
  1887. state = SCE_HPHP_HSTRING;
  1888. strcpy(phpStringDelimiter, "\"");
  1889. } else if (styler.Match(i, "<<<")) {
  1890. bool isSimpleString = false;
  1891. i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler, isSimpleString);
  1892. if (strlen(phpStringDelimiter)) {
  1893. state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
  1894. if (foldHeredoc) levelCurrent++;
  1895. }
  1896. } else if (ch == '\'') {
  1897. state = SCE_HPHP_SIMPLESTRING;
  1898. strcpy(phpStringDelimiter, "\'");
  1899. } else if (ch == '$' && IsPhpWordStart(chNext)) {
  1900. state = SCE_HPHP_VARIABLE;
  1901. } else if (IsOperator(ch)) {
  1902. state = SCE_HPHP_OPERATOR;
  1903. } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
  1904. state = SCE_HPHP_DEFAULT;
  1905. }
  1906. break;
  1907. ///////////// end - PHP state handling
  1908. }
  1909. // Some of the above terminated their lexeme but since the same character starts
  1910. // the same class again, only reenter if non empty segment.
  1911. bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
  1912. if (state == SCE_HB_DEFAULT) { // One of the above succeeded
  1913. if ((ch == '\"') && (nonEmptySegment)) {
  1914. state = SCE_HB_STRING;
  1915. } else if (ch == '\'') {
  1916. state = SCE_HB_COMMENTLINE;
  1917. } else if (IsAWordStart(ch)) {
  1918. state = SCE_HB_WORD;
  1919. } else if (IsOperator(ch)) {
  1920. styler.ColourTo(i, SCE_HB_DEFAULT);
  1921. }
  1922. } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
  1923. if ((ch == '\"') && (nonEmptySegment)) {
  1924. state = SCE_HBA_STRING;
  1925. } else if (ch == '\'') {
  1926. state = SCE_HBA_COMMENTLINE;
  1927. } else if (IsAWordStart(ch)) {
  1928. state = SCE_HBA_WORD;
  1929. } else if (IsOperator(ch)) {
  1930. styler.ColourTo(i, SCE_HBA_DEFAULT);
  1931. }
  1932. } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
  1933. if (ch == '/' && chNext == '*') {
  1934. if (styler.SafeGetCharAt(i + 2) == '*')
  1935. state = SCE_HJ_COMMENTDOC;
  1936. else
  1937. state = SCE_HJ_COMMENT;
  1938. } else if (ch == '/' && chNext == '/') {
  1939. state = SCE_HJ_COMMENTLINE;
  1940. } else if ((ch == '\"') && (nonEmptySegment)) {
  1941. state = SCE_HJ_DOUBLESTRING;
  1942. } else if ((ch == '\'') && (nonEmptySegment)) {
  1943. state = SCE_HJ_SINGLESTRING;
  1944. } else if (IsAWordStart(ch)) {
  1945. state = SCE_HJ_WORD;
  1946. } else if (IsOperator(ch)) {
  1947. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1948. }
  1949. }
  1950. }
  1951. switch (state) {
  1952. case SCE_HJ_WORD:
  1953. classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
  1954. break;
  1955. case SCE_HB_WORD:
  1956. classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
  1957. break;
  1958. case SCE_HP_WORD:
  1959. classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType);
  1960. break;
  1961. case SCE_HPHP_WORD:
  1962. classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
  1963. break;
  1964. default:
  1965. StateToPrint = statePrintForState(state, inScriptType);
  1966. styler.ColourTo(lengthDoc - 1, StateToPrint);