PageRenderTime 54ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/DBDiff.Scintilla NET-2.0/SciLexter/src/LexHTML.cxx

#
C++ | 2018 lines | 1817 code | 101 blank | 100 comment | 1341 complexity | 9b3b9fbbe631ec9c8f31fa273fd03d5b MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. // Scintilla source code edit control
  2. /** @file LexHTML.cxx
  3. ** Lexer for HTML.
  4. **/
  5. // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <ctype.h>
  10. #include <stdio.h>
  11. #include <stdarg.h>
  12. #include "Platform.h"
  13. #include "PropSet.h"
  14. #include "Accessor.h"
  15. #include "StyleContext.h"
  16. #include "KeyWords.h"
  17. #include "Scintilla.h"
  18. #include "SciLexer.h"
  19. #include "CharacterSet.h"
  20. #ifdef SCI_NAMESPACE
  21. using namespace Scintilla;
  22. #endif
  23. #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  24. #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  25. #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  26. enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  27. enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  28. static inline bool IsAWordChar(const int ch) {
  29. return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  30. }
  31. static inline bool IsAWordStart(const int ch) {
  32. return (ch < 0x80) && (isalnum(ch) || ch == '_');
  33. }
  34. inline bool IsOperator(int ch) {
  35. if (isascii(ch) && isalnum(ch))
  36. return false;
  37. // '.' left out as it is used to make up numbers
  38. if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  39. ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  40. ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  41. ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  42. ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  43. ch == '?' || ch == '!' || ch == '.' || ch == '~')
  44. return true;
  45. return false;
  46. }
  47. static inline int MakeLowerCase(int ch) {
  48. if (ch < 'A' || ch > 'Z')
  49. return ch;
  50. else
  51. return ch - 'A' + 'a';
  52. }
  53. static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  54. size_t i = 0;
  55. for (; (i < end - start + 1) && (i < len-1); i++) {
  56. s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  57. }
  58. s[i] = '\0';
  59. }
  60. static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  61. char s[100];
  62. GetTextSegment(styler, start, end, s, sizeof(s));
  63. //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  64. if (strstr(s, "src")) // External script
  65. return eScriptNone;
  66. if (strstr(s, "vbs"))
  67. return eScriptVBS;
  68. if (strstr(s, "pyth"))
  69. return eScriptPython;
  70. if (strstr(s, "javas"))
  71. return eScriptJS;
  72. if (strstr(s, "jscr"))
  73. return eScriptJS;
  74. if (strstr(s, "php"))
  75. return eScriptPHP;
  76. if (strstr(s, "xml")) {
  77. const char *xml = strstr(s, "xml");
  78. for (const char *t=s; t<xml; t++) {
  79. if (!IsASpace(*t)) {
  80. return prevValue;
  81. }
  82. }
  83. return eScriptXML;
  84. }
  85. return prevValue;
  86. }
  87. static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
  88. int iResult = 0;
  89. char s[100];
  90. GetTextSegment(styler, start, end, s, sizeof(s));
  91. if (0 == strncmp(s, "php", 3)) {
  92. iResult = 3;
  93. }
  94. return iResult;
  95. }
  96. static script_type ScriptOfState(int state) {
  97. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  98. return eScriptPython;
  99. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  100. return eScriptVBS;
  101. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  102. return eScriptJS;
  103. } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
  104. return eScriptPHP;
  105. } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
  106. return eScriptSGML;
  107. } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
  108. return eScriptSGMLblock;
  109. } else {
  110. return eScriptNone;
  111. }
  112. }
  113. static int statePrintForState(int state, script_mode inScriptType) {
  114. int StateToPrint = state;
  115. if (state >= SCE_HJ_START) {
  116. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  117. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
  118. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  119. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
  120. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  121. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
  122. }
  123. }
  124. return StateToPrint;
  125. }
  126. static int stateForPrintState(int StateToPrint) {
  127. int state;
  128. if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
  129. state = StateToPrint - SCE_HA_PYTHON;
  130. } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
  131. state = StateToPrint - SCE_HA_VBS;
  132. } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
  133. state = StateToPrint - SCE_HA_JS;
  134. } else {
  135. state = StateToPrint;
  136. }
  137. return state;
  138. }
  139. static inline bool IsNumber(unsigned int start, Accessor &styler) {
  140. return IsADigit(styler[start]) || (styler[start] == '.') ||
  141. (styler[start] == '-') || (styler[start] == '#');
  142. }
  143. static inline bool isStringState(int state) {
  144. bool bResult;
  145. switch (state) {
  146. case SCE_HJ_DOUBLESTRING:
  147. case SCE_HJ_SINGLESTRING:
  148. case SCE_HJA_DOUBLESTRING:
  149. case SCE_HJA_SINGLESTRING:
  150. case SCE_HB_STRING:
  151. case SCE_HBA_STRING:
  152. case SCE_HP_STRING:
  153. case SCE_HP_CHARACTER:
  154. case SCE_HP_TRIPLE:
  155. case SCE_HP_TRIPLEDOUBLE:
  156. case SCE_HPA_STRING:
  157. case SCE_HPA_CHARACTER:
  158. case SCE_HPA_TRIPLE:
  159. case SCE_HPA_TRIPLEDOUBLE:
  160. case SCE_HPHP_HSTRING:
  161. case SCE_HPHP_SIMPLESTRING:
  162. case SCE_HPHP_HSTRING_VARIABLE:
  163. case SCE_HPHP_COMPLEX_VARIABLE:
  164. bResult = true;
  165. break;
  166. default :
  167. bResult = false;
  168. break;
  169. }
  170. return bResult;
  171. }
  172. static inline bool stateAllowsTermination(int state) {
  173. bool allowTermination = !isStringState(state);
  174. if (allowTermination) {
  175. switch (state) {
  176. case SCE_HB_COMMENTLINE:
  177. case SCE_HPHP_COMMENT:
  178. case SCE_HP_COMMENTLINE:
  179. case SCE_HPA_COMMENTLINE:
  180. allowTermination = false;
  181. }
  182. }
  183. return allowTermination;
  184. }
  185. // not really well done, since it's only comments that should lex the %> and <%
  186. static inline bool isCommentASPState(int state) {
  187. bool bResult;
  188. switch (state) {
  189. case SCE_HJ_COMMENT:
  190. case SCE_HJ_COMMENTLINE:
  191. case SCE_HJ_COMMENTDOC:
  192. case SCE_HB_COMMENTLINE:
  193. case SCE_HP_COMMENTLINE:
  194. case SCE_HPHP_COMMENT:
  195. case SCE_HPHP_COMMENTLINE:
  196. bResult = true;
  197. break;
  198. default :
  199. bResult = false;
  200. break;
  201. }
  202. return bResult;
  203. }
  204. static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  205. bool wordIsNumber = IsNumber(start, styler);
  206. char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
  207. if (wordIsNumber) {
  208. chAttr = SCE_H_NUMBER;
  209. } else {
  210. char s[100];
  211. GetTextSegment(styler, start, end, s, sizeof(s));
  212. if (keywords.InList(s))
  213. chAttr = SCE_H_ATTRIBUTE;
  214. }
  215. if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
  216. // No keywords -> all are known
  217. chAttr = SCE_H_ATTRIBUTE;
  218. styler.ColourTo(end, chAttr);
  219. }
  220. static int classifyTagHTML(unsigned int start, unsigned int end,
  221. WordList &keywords, Accessor &styler, bool &tagDontFold,
  222. bool caseSensitive, bool isXml, bool allowScripts) {
  223. char s[30 + 2];
  224. // Copy after the '<'
  225. unsigned int i = 0;
  226. for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
  227. char ch = styler[cPos];
  228. if ((ch != '<') && (ch != '/')) {
  229. s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
  230. }
  231. }
  232. //The following is only a quick hack, to see if this whole thing would work
  233. //we first need the tagname with a trailing space...
  234. s[i] = ' ';
  235. s[i+1] = '\0';
  236. // if the current language is XML, I can fold any tag
  237. // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
  238. //...to find it in the list of no-container-tags
  239. tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
  240. //now we can remove the trailing space
  241. s[i] = '\0';
  242. // No keywords -> all are known
  243. char chAttr = SCE_H_TAGUNKNOWN;
  244. if (s[0] == '!') {
  245. chAttr = SCE_H_SGML_DEFAULT;
  246. } else if (!keywords || keywords.InList(s)) {
  247. chAttr = SCE_H_TAG;
  248. }
  249. styler.ColourTo(end, chAttr);
  250. if (chAttr == SCE_H_TAG) {
  251. if (allowScripts && 0 == strcmp(s, "script")) {
  252. // check to see if this is a self-closing tag by sniffing ahead
  253. bool isSelfClose = false;
  254. for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
  255. char ch = styler.SafeGetCharAt(cPos, '\0');
  256. if (ch == '\0' || ch == '>')
  257. break;
  258. else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
  259. isSelfClose = true;
  260. break;
  261. }
  262. }
  263. // do not enter a script state if the tag self-closed
  264. if (!isSelfClose)
  265. chAttr = SCE_H_SCRIPT;
  266. } else if (!isXml && 0 == strcmp(s, "comment")) {
  267. chAttr = SCE_H_COMMENT;
  268. }
  269. }
  270. return chAttr;
  271. }
  272. static void classifyWordHTJS(unsigned int start, unsigned int end,
  273. WordList &keywords, Accessor &styler, script_mode inScriptType) {
  274. char chAttr = SCE_HJ_WORD;
  275. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  276. if (wordIsNumber)
  277. chAttr = SCE_HJ_NUMBER;
  278. else {
  279. char s[30 + 1];
  280. unsigned int i = 0;
  281. for (; i < end - start + 1 && i < 30; i++) {
  282. s[i] = styler[start + i];
  283. }
  284. s[i] = '\0';
  285. if (keywords.InList(s))
  286. chAttr = SCE_HJ_KEYWORD;
  287. }
  288. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  289. }
  290. static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
  291. char chAttr = SCE_HB_IDENTIFIER;
  292. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  293. if (wordIsNumber)
  294. chAttr = SCE_HB_NUMBER;
  295. else {
  296. char s[100];
  297. GetTextSegment(styler, start, end, s, sizeof(s));
  298. if (keywords.InList(s)) {
  299. chAttr = SCE_HB_WORD;
  300. if (strcmp(s, "rem") == 0)
  301. chAttr = SCE_HB_COMMENTLINE;
  302. }
  303. }
  304. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  305. if (chAttr == SCE_HB_COMMENTLINE)
  306. return SCE_HB_COMMENTLINE;
  307. else
  308. return SCE_HB_DEFAULT;
  309. }
  310. static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
  311. bool wordIsNumber = IsADigit(styler[start]);
  312. char s[30 + 1];
  313. unsigned int i = 0;
  314. for (; i < end - start + 1 && i < 30; i++) {
  315. s[i] = styler[start + i];
  316. }
  317. s[i] = '\0';
  318. char chAttr = SCE_HP_IDENTIFIER;
  319. if (0 == strcmp(prevWord, "class"))
  320. chAttr = SCE_HP_CLASSNAME;
  321. else if (0 == strcmp(prevWord, "def"))
  322. chAttr = SCE_HP_DEFNAME;
  323. else if (wordIsNumber)
  324. chAttr = SCE_HP_NUMBER;
  325. else if (keywords.InList(s))
  326. chAttr = SCE_HP_WORD;
  327. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  328. strcpy(prevWord, s);
  329. }
  330. // Update the word colour to default or keyword
  331. // Called when in a PHP word
  332. static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  333. char chAttr = SCE_HPHP_DEFAULT;
  334. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
  335. if (wordIsNumber)
  336. chAttr = SCE_HPHP_NUMBER;
  337. else {
  338. char s[100];
  339. GetTextSegment(styler, start, end, s, sizeof(s));
  340. if (keywords.InList(s))
  341. chAttr = SCE_HPHP_WORD;
  342. }
  343. styler.ColourTo(end, chAttr);
  344. }
  345. static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  346. char s[30 + 1];
  347. unsigned int i = 0;
  348. for (; i < end - start + 1 && i < 30; i++) {
  349. s[i] = styler[start + i];
  350. }
  351. s[i] = '\0';
  352. return keywords.InList(s);
  353. }
  354. static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
  355. char s[30 + 1];
  356. unsigned int i = 0;
  357. for (; i < end - start + 1 && i < 30; i++) {
  358. s[i] = styler[start + i];
  359. }
  360. s[i] = '\0';
  361. return (0 == strcmp(s, "[CDATA["));
  362. }
  363. // Return the first state to reach when entering a scripting language
  364. static int StateForScript(script_type scriptLanguage) {
  365. int Result;
  366. switch (scriptLanguage) {
  367. case eScriptVBS:
  368. Result = SCE_HB_START;
  369. break;
  370. case eScriptPython:
  371. Result = SCE_HP_START;
  372. break;
  373. case eScriptPHP:
  374. Result = SCE_HPHP_DEFAULT;
  375. break;
  376. case eScriptXML:
  377. Result = SCE_H_TAGUNKNOWN;
  378. break;
  379. case eScriptSGML:
  380. Result = SCE_H_SGML_DEFAULT;
  381. break;
  382. case eScriptComment:
  383. Result = SCE_H_COMMENT;
  384. break;
  385. default :
  386. Result = SCE_HJ_START;
  387. break;
  388. }
  389. return Result;
  390. }
  391. static inline bool ishtmlwordchar(int ch) {
  392. return !isascii(ch) ||
  393. (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
  394. }
  395. static inline bool issgmlwordchar(int ch) {
  396. return !isascii(ch) ||
  397. (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
  398. }
  399. static inline bool IsPhpWordStart(int ch) {
  400. return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
  401. }
  402. static inline bool IsPhpWordChar(int ch) {
  403. return IsADigit(ch) || IsPhpWordStart(ch);
  404. }
  405. static bool InTagState(int state) {
  406. return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
  407. state == SCE_H_SCRIPT ||
  408. state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
  409. state == SCE_H_NUMBER || state == SCE_H_OTHER ||
  410. state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
  411. }
  412. static bool IsCommentState(const int state) {
  413. return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
  414. }
  415. static bool IsScriptCommentState(const int state) {
  416. return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
  417. state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
  418. }
  419. static bool isLineEnd(int ch) {
  420. return ch == '\r' || ch == '\n';
  421. }
  422. static bool isOKBeforeRE(int ch) {
  423. return (ch == '(') || (ch == '=') || (ch == ',');
  424. }
  425. static bool isPHPStringState(int state) {
  426. return
  427. (state == SCE_HPHP_HSTRING) ||
  428. (state == SCE_HPHP_SIMPLESTRING) ||
  429. (state == SCE_HPHP_HSTRING_VARIABLE) ||
  430. (state == SCE_HPHP_COMPLEX_VARIABLE);
  431. }
  432. static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
  433. int j;
  434. const int beginning = i - 1;
  435. bool isValidSimpleString = false;
  436. while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
  437. i++;
  438. char ch = styler.SafeGetCharAt(i);
  439. const char chNext = styler.SafeGetCharAt(i + 1);
  440. if (!IsPhpWordStart(ch)) {
  441. if (ch == '\'' && IsPhpWordStart(chNext)) {
  442. i++;
  443. ch = chNext;
  444. isSimpleString = true;
  445. } else {
  446. phpStringDelimiter[0] = '\0';
  447. return beginning;
  448. }
  449. }
  450. phpStringDelimiter[0] = ch;
  451. i++;
  452. for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
  453. if (!IsPhpWordChar(styler[j])) {
  454. if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
  455. isValidSimpleString = true;
  456. j++;
  457. break;
  458. } else {
  459. phpStringDelimiter[0] = '\0';
  460. return beginning;
  461. }
  462. }
  463. if (j - i < phpStringDelimiterSize - 2)
  464. phpStringDelimiter[j-i+1] = styler[j];
  465. else
  466. i++;
  467. }
  468. if (isSimpleString && !isValidSimpleString) {
  469. phpStringDelimiter[0] = '\0';
  470. return beginning;
  471. }
  472. phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
  473. return j - 1;
  474. }
  475. static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
  476. Accessor &styler, bool isXml) {
  477. WordList &keywords = *keywordlists[0];
  478. WordList &keywords2 = *keywordlists[1];
  479. WordList &keywords3 = *keywordlists[2];
  480. WordList &keywords4 = *keywordlists[3];
  481. WordList &keywords5 = *keywordlists[4];
  482. WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
  483. // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
  484. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  485. char prevWord[200];
  486. prevWord[0] = '\0';
  487. char phpStringDelimiter[200]; // PHP is not limited in length, we are
  488. phpStringDelimiter[0] = '\0';
  489. int StateToPrint = initStyle;
  490. int state = stateForPrintState(StateToPrint);
  491. // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
  492. if (InTagState(state)) {
  493. while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
  494. startPos--;
  495. length++;
  496. }
  497. state = SCE_H_DEFAULT;
  498. }
  499. // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
  500. if (isPHPStringState(state)) {
  501. while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
  502. startPos--;
  503. length++;
  504. state = styler.StyleAt(startPos);
  505. }
  506. if (startPos == 0)
  507. state = SCE_H_DEFAULT;
  508. }
  509. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  510. int lineCurrent = styler.GetLine(startPos);
  511. int lineState;
  512. if (lineCurrent > 0) {
  513. lineState = styler.GetLineState(lineCurrent);
  514. } else {
  515. // Default client and ASP scripting language is JavaScript
  516. lineState = eScriptJS << 8;
  517. // property asp.default.language
  518. // Script in ASP code is initially assumed to be in JavaScript.
  519. // To change this to VBScript set asp.default.language to 2. Python is 3.
  520. lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
  521. }
  522. script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
  523. bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
  524. bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
  525. bool tagDontFold = false; //some HTML tags should not be folded
  526. script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
  527. script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
  528. int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
  529. script_type scriptLanguage = ScriptOfState(state);
  530. // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
  531. if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
  532. scriptLanguage = eScriptComment;
  533. }
  534. // property fold.html
  535. // Folding is turned on or off for HTML and XML files with this option.
  536. // The fold option must also be on for folding to occur.
  537. const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
  538. const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
  539. // property fold.html.preprocessor
  540. // Folding is turned on or off for scripts embedded in HTML files with this option.
  541. // The default is on.
  542. const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
  543. const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  544. // property fold.hypertext.comment
  545. // Allow folding for comments in scripts embedded in HTML.
  546. // The default is off.
  547. const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
  548. // property fold.hypertext.heredoc
  549. // Allow folding for heredocs in scripts embedded in HTML.
  550. // The default is off.
  551. const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
  552. // property html.tags.case.sensitive
  553. // For XML and HTML, setting this property to 1 will make tags match in a case
  554. // sensitive way which is the expected behaviour for XML and XHTML.
  555. const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
  556. // property lexer.xml.allow.scripts
  557. // Set to 0 to disable scripts in XML.
  558. const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
  559. const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
  560. const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
  561. const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
  562. int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
  563. int levelCurrent = levelPrev;
  564. int visibleChars = 0;
  565. int chPrev = ' ';
  566. int ch = ' ';
  567. int chPrevNonWhite = ' ';
  568. // look back to set chPrevNonWhite properly for better regex colouring
  569. if (scriptLanguage == eScriptJS && startPos > 0) {
  570. int back = startPos;
  571. int style = 0;
  572. while (--back) {
  573. style = styler.StyleAt(back);
  574. if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
  575. // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
  576. break;
  577. }
  578. if (style == SCE_HJ_SYMBOLS) {
  579. chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
  580. }
  581. }
  582. styler.StartSegment(startPos);
  583. const int lengthDoc = startPos + length;
  584. for (int i = startPos; i < lengthDoc; i++) {
  585. const int chPrev2 = chPrev;
  586. chPrev = ch;
  587. if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
  588. state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
  589. chPrevNonWhite = ch;
  590. ch = static_cast<unsigned char>(styler[i]);
  591. int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  592. const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
  593. // Handle DBCS codepages
  594. if (styler.IsLeadByte(static_cast<char>(ch))) {
  595. chPrev = ' ';
  596. i += 1;
  597. continue;
  598. }
  599. if ((!IsASpace(ch) || !foldCompact) && fold)
  600. visibleChars++;
  601. // decide what is the current state to print (depending of the script tag)
  602. StateToPrint = statePrintForState(state, inScriptType);
  603. // handle script folding
  604. if (fold) {
  605. switch (scriptLanguage) {
  606. case eScriptJS:
  607. case eScriptPHP:
  608. //not currently supported case eScriptVBS:
  609. if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
  610. //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
  611. //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
  612. if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
  613. levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
  614. }
  615. } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
  616. levelCurrent--;
  617. }
  618. break;
  619. case eScriptPython:
  620. if (state != SCE_HP_COMMENTLINE) {
  621. if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
  622. levelCurrent++;
  623. } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
  624. // check if the number of tabs is lower than the level
  625. int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
  626. for (int j = 0; Findlevel > 0; j++) {
  627. char chTmp = styler.SafeGetCharAt(i + j + 1);
  628. if (chTmp == '\t') {
  629. Findlevel -= 8;
  630. } else if (chTmp == ' ') {
  631. Findlevel--;
  632. } else {
  633. break;
  634. }
  635. }
  636. if (Findlevel > 0) {
  637. levelCurrent -= Findlevel / 8;
  638. if (Findlevel % 8)
  639. levelCurrent--;
  640. }
  641. }
  642. }
  643. break;
  644. default:
  645. break;
  646. }
  647. }
  648. if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
  649. // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
  650. // Avoid triggering two times on Dos/Win
  651. // New line -> record any line state onto /next/ line
  652. if (fold) {
  653. int lev = levelPrev;
  654. if (visibleChars == 0)
  655. lev |= SC_FOLDLEVELWHITEFLAG;
  656. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  657. lev |= SC_FOLDLEVELHEADERFLAG;
  658. styler.SetLevel(lineCurrent, lev);
  659. visibleChars = 0;
  660. levelPrev = levelCurrent;
  661. }
  662. lineCurrent++;
  663. styler.SetLineState(lineCurrent,
  664. ((inScriptType & 0x03) << 0) |
  665. ((tagOpened & 0x01) << 2) |
  666. ((tagClosing & 0x01) << 3) |
  667. ((aspScript & 0x0F) << 4) |
  668. ((clientScript & 0x0F) << 8) |
  669. ((beforePreProc & 0xFF) << 12));
  670. }
  671. // generic end of script processing
  672. else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
  673. // Check if it's the end of the script tag (or any other HTML tag)
  674. switch (state) {
  675. // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
  676. case SCE_H_DOUBLESTRING:
  677. case SCE_H_SINGLESTRING:
  678. case SCE_HJ_COMMENT:
  679. case SCE_HJ_COMMENTDOC:
  680. //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
  681. // the end of script marker from some JS interpreters.
  682. case SCE_HB_COMMENTLINE:
  683. case SCE_HBA_COMMENTLINE:
  684. case SCE_HJ_DOUBLESTRING:
  685. case SCE_HJ_SINGLESTRING:
  686. case SCE_HJ_REGEX:
  687. case SCE_HB_STRING:
  688. case SCE_HBA_STRING:
  689. case SCE_HP_STRING:
  690. case SCE_HP_TRIPLE:
  691. case SCE_HP_TRIPLEDOUBLE:
  692. case SCE_HPHP_HSTRING:
  693. case SCE_HPHP_SIMPLESTRING:
  694. case SCE_HPHP_COMMENT:
  695. case SCE_HPHP_COMMENTLINE:
  696. break;
  697. default :
  698. // check if the closing tag is a script tag
  699. if (const char *tag =
  700. state == SCE_HJ_COMMENTLINE || isXml ? "script" :
  701. state == SCE_H_COMMENT ? "comment" : 0) {
  702. int j = i + 2;
  703. int chr;
  704. do {
  705. chr = static_cast<int>(*tag++);
  706. } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
  707. if (chr != 0) break;
  708. }
  709. // closing tag of the script (it's a closing HTML tag anyway)
  710. styler.ColourTo(i - 1, StateToPrint);
  711. state = SCE_H_TAGUNKNOWN;
  712. inScriptType = eHtml;
  713. scriptLanguage = eScriptNone;
  714. clientScript = eScriptJS;
  715. i += 2;
  716. visibleChars += 2;
  717. tagClosing = true;
  718. continue;
  719. }
  720. }
  721. /////////////////////////////////////
  722. // handle the start of PHP pre-processor = Non-HTML
  723. else if ((state != SCE_H_ASPAT) &&
  724. !isPHPStringState(state) &&
  725. (state != SCE_HPHP_COMMENT) &&
  726. (ch == '<') &&
  727. (chNext == '?') &&
  728. !IsScriptCommentState(state) ) {
  729. scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
  730. if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
  731. styler.ColourTo(i - 1, StateToPrint);
  732. beforePreProc = state;
  733. i++;
  734. visibleChars++;
  735. i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
  736. if (scriptLanguage == eScriptXML)
  737. styler.ColourTo(i, SCE_H_XMLSTART);
  738. else
  739. styler.ColourTo(i, SCE_H_QUESTION);
  740. state = StateForScript(scriptLanguage);
  741. if (inScriptType == eNonHtmlScript)
  742. inScriptType = eNonHtmlScriptPreProc;
  743. else
  744. inScriptType = eNonHtmlPreProc;
  745. // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
  746. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  747. levelCurrent++;
  748. }
  749. // should be better
  750. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  751. continue;
  752. }
  753. // handle the start of ASP pre-processor = Non-HTML
  754. else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
  755. styler.ColourTo(i - 1, StateToPrint);
  756. beforePreProc = state;
  757. if (inScriptType == eNonHtmlScript)
  758. inScriptType = eNonHtmlScriptPreProc;
  759. else
  760. inScriptType = eNonHtmlPreProc;
  761. if (chNext2 == '@') {
  762. i += 2; // place as if it was the second next char treated
  763. visibleChars += 2;
  764. state = SCE_H_ASPAT;
  765. } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
  766. styler.ColourTo(i + 3, SCE_H_ASP);
  767. state = SCE_H_XCCOMMENT;
  768. scriptLanguage = eScriptVBS;
  769. continue;
  770. } else {
  771. if (chNext2 == '=') {
  772. i += 2; // place as if it was the second next char treated
  773. visibleChars += 2;
  774. } else {
  775. i++; // place as if it was the next char treated
  776. visibleChars++;
  777. }
  778. state = StateForScript(aspScript);
  779. }
  780. scriptLanguage = eScriptVBS;
  781. styler.ColourTo(i, SCE_H_ASP);
  782. // fold whole script
  783. if (foldHTMLPreprocessor)
  784. levelCurrent++;
  785. // should be better
  786. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  787. continue;
  788. }
  789. /////////////////////////////////////
  790. // handle the start of SGML language (DTD)
  791. else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
  792. (chPrev == '<') &&
  793. (ch == '!') &&
  794. (StateToPrint != SCE_H_CDATA) &&
  795. (!IsCommentState(StateToPrint)) &&
  796. (!IsScriptCommentState(StateToPrint)) ) {
  797. beforePreProc = state;
  798. styler.ColourTo(i - 2, StateToPrint);
  799. if ((chNext == '-') && (chNext2 == '-')) {
  800. state = SCE_H_COMMENT; // wait for a pending command
  801. styler.ColourTo(i + 2, SCE_H_COMMENT);
  802. i += 2; // follow styling after the --
  803. } else if (isWordCdata(i + 1, i + 7, styler)) {
  804. state = SCE_H_CDATA;
  805. } else {
  806. styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
  807. scriptLanguage = eScriptSGML;
  808. state = SCE_H_SGML_COMMAND; // wait for a pending command
  809. }
  810. // fold whole tag (-- when closing the tag)
  811. if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
  812. levelCurrent++;
  813. continue;
  814. }
  815. // handle the end of a pre-processor = Non-HTML
  816. else if ((
  817. ((inScriptType == eNonHtmlPreProc)
  818. || (inScriptType == eNonHtmlScriptPreProc)) && (
  819. ((scriptLanguage != eScriptNone) && stateAllowsTermination(state) && ((ch == '%') || (ch == '?')))
  820. ) && (chNext == '>')) ||
  821. ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
  822. if (state == SCE_H_ASPAT) {
  823. aspScript = segIsScriptingIndicator(styler,
  824. styler.GetStartSegment(), i - 1, aspScript);
  825. }
  826. // Bounce out of any ASP mode
  827. switch (state) {
  828. case SCE_HJ_WORD:
  829. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  830. break;
  831. case SCE_HB_WORD:
  832. classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  833. break;
  834. case SCE_HP_WORD:
  835. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  836. break;
  837. case SCE_HPHP_WORD:
  838. classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
  839. break;
  840. case SCE_H_XCCOMMENT:
  841. styler.ColourTo(i - 1, state);
  842. break;
  843. default :
  844. styler.ColourTo(i - 1, StateToPrint);
  845. break;
  846. }
  847. if (scriptLanguage != eScriptSGML) {
  848. i++;
  849. visibleChars++;
  850. }
  851. if (ch == '%')
  852. styler.ColourTo(i, SCE_H_ASP);
  853. else if (scriptLanguage == eScriptXML)
  854. styler.ColourTo(i, SCE_H_XMLEND);
  855. else if (scriptLanguage == eScriptSGML)
  856. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  857. else
  858. styler.ColourTo(i, SCE_H_QUESTION);
  859. state = beforePreProc;
  860. if (inScriptType == eNonHtmlScriptPreProc)
  861. inScriptType = eNonHtmlScript;
  862. else
  863. inScriptType = eHtml;
  864. // Unfold all scripting languages, except for XML tag
  865. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  866. levelCurrent--;
  867. }
  868. scriptLanguage = eScriptNone;
  869. continue;
  870. }
  871. /////////////////////////////////////
  872. switch (state) {
  873. case SCE_H_DEFAULT:
  874. if (ch == '<') {
  875. // in HTML, fold on tag open and unfold on tag close
  876. tagOpened = true;
  877. tagClosing = (chNext == '/');
  878. styler.ColourTo(i - 1, StateToPrint);
  879. if (chNext != '!')
  880. state = SCE_H_TAGUNKNOWN;
  881. } else if (ch == '&') {
  882. styler.ColourTo(i - 1, SCE_H_DEFAULT);
  883. state = SCE_H_ENTITY;
  884. }
  885. break;
  886. case SCE_H_SGML_DEFAULT:
  887. case SCE_H_SGML_BLOCK_DEFAULT:
  888. // if (scriptLanguage == eScriptSGMLblock)
  889. // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
  890. if (ch == '\"') {
  891. styler.ColourTo(i - 1, StateToPrint);
  892. state = SCE_H_SGML_DOUBLESTRING;
  893. } else if (ch == '\'') {
  894. styler.ColourTo(i - 1, StateToPrint);
  895. state = SCE_H_SGML_SIMPLESTRING;
  896. } else if ((ch == '-') && (chPrev == '-')) {
  897. if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
  898. styler.ColourTo(i - 2, StateToPrint);
  899. }
  900. state = SCE_H_SGML_COMMENT;
  901. } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
  902. styler.ColourTo(i - 2, StateToPrint);
  903. state = SCE_H_SGML_ENTITY;
  904. } else if (ch == '#') {
  905. styler.ColourTo(i - 1, StateToPrint);
  906. state = SCE_H_SGML_SPECIAL;
  907. } else if (ch == '[') {
  908. styler.ColourTo(i - 1, StateToPrint);
  909. scriptLanguage = eScriptSGMLblock;
  910. state = SCE_H_SGML_BLOCK_DEFAULT;
  911. } else if (ch == ']') {
  912. if (scriptLanguage == eScriptSGMLblock) {
  913. styler.ColourTo(i, StateToPrint);
  914. scriptLanguage = eScriptSGML;
  915. } else {
  916. styler.ColourTo(i - 1, StateToPrint);
  917. styler.ColourTo(i, SCE_H_SGML_ERROR);
  918. }
  919. state = SCE_H_SGML_DEFAULT;
  920. } else if (scriptLanguage == eScriptSGMLblock) {
  921. if ((ch == '!') && (chPrev == '<')) {
  922. styler.ColourTo(i - 2, StateToPrint);
  923. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  924. state = SCE_H_SGML_COMMAND;
  925. } else if (ch == '>') {
  926. styler.ColourTo(i - 1, StateToPrint);
  927. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  928. }
  929. }
  930. break;
  931. case SCE_H_SGML_COMMAND:
  932. if ((ch == '-') && (chPrev == '-')) {
  933. styler.ColourTo(i - 2, StateToPrint);
  934. state = SCE_H_SGML_COMMENT;
  935. } else if (!issgmlwordchar(ch)) {
  936. if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
  937. styler.ColourTo(i - 1, StateToPrint);
  938. state = SCE_H_SGML_1ST_PARAM;
  939. } else {
  940. state = SCE_H_SGML_ERROR;
  941. }
  942. }
  943. break;
  944. case SCE_H_SGML_1ST_PARAM:
  945. // wait for the beginning of the word
  946. if ((ch == '-') && (chPrev == '-')) {
  947. if (scriptLanguage == eScriptSGMLblock) {
  948. styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
  949. } else {
  950. styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
  951. }
  952. state = SCE_H_SGML_1ST_PARAM_COMMENT;
  953. } else if (issgmlwordchar(ch)) {
  954. if (scriptLanguage == eScriptSGMLblock) {
  955. styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
  956. } else {
  957. styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
  958. }
  959. // find the length of the word
  960. int size = 1;
  961. while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
  962. size++;
  963. styler.ColourTo(i + size - 1, StateToPrint);
  964. i += size - 1;
  965. visibleChars += size - 1;
  966. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  967. if (scriptLanguage == eScriptSGMLblock) {
  968. state = SCE_H_SGML_BLOCK_DEFAULT;
  969. } else {
  970. state = SCE_H_SGML_DEFAULT;
  971. }
  972. continue;
  973. }
  974. break;
  975. case SCE_H_SGML_ERROR:
  976. if ((ch == '-') && (chPrev == '-')) {
  977. styler.ColourTo(i - 2, StateToPrint);
  978. state = SCE_H_SGML_COMMENT;
  979. }
  980. case SCE_H_SGML_DOUBLESTRING:
  981. if (ch == '\"') {
  982. styler.ColourTo(i, StateToPrint);
  983. state = SCE_H_SGML_DEFAULT;
  984. }
  985. break;
  986. case SCE_H_SGML_SIMPLESTRING:
  987. if (ch == '\'') {
  988. styler.ColourTo(i, StateToPrint);
  989. state = SCE_H_SGML_DEFAULT;
  990. }
  991. break;
  992. case SCE_H_SGML_COMMENT:
  993. if ((ch == '-') && (chPrev == '-')) {
  994. styler.ColourTo(i, StateToPrint);
  995. state = SCE_H_SGML_DEFAULT;
  996. }
  997. break;
  998. case SCE_H_CDATA:
  999. if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
  1000. styler.ColourTo(i, StateToPrint);
  1001. state = SCE_H_DEFAULT;
  1002. levelCurrent--;
  1003. }
  1004. break;
  1005. case SCE_H_COMMENT:
  1006. if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
  1007. styler.ColourTo(i, StateToPrint);
  1008. state = SCE_H_DEFAULT;
  1009. levelCurrent--;
  1010. }
  1011. break;
  1012. case SCE_H_SGML_1ST_PARAM_COMMENT:
  1013. if ((ch == '-') && (chPrev == '-')) {
  1014. styler.ColourTo(i, SCE_H_SGML_COMMENT);
  1015. state = SCE_H_SGML_1ST_PARAM;
  1016. }
  1017. break;
  1018. case SCE_H_SGML_SPECIAL:
  1019. if (!(isascii(ch) && isupper(ch))) {
  1020. styler.ColourTo(i - 1, StateToPrint);
  1021. if (isalnum(ch)) {
  1022. state = SCE_H_SGML_ERROR;
  1023. } else {
  1024. state = SCE_H_SGML_DEFAULT;
  1025. }
  1026. }
  1027. break;
  1028. case SCE_H_SGML_ENTITY:
  1029. if (ch == ';') {
  1030. styler.ColourTo(i, StateToPrint);
  1031. state = SCE_H_SGML_DEFAULT;
  1032. } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
  1033. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1034. state = SCE_H_SGML_DEFAULT;
  1035. }
  1036. break;
  1037. case SCE_H_ENTITY:
  1038. if (ch == ';') {
  1039. styler.ColourTo(i, StateToPrint);
  1040. state = SCE_H_DEFAULT;
  1041. }
  1042. if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
  1043. && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
  1044. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  1045. state = SCE_H_DEFAULT;
  1046. }
  1047. break;
  1048. case SCE_H_TAGUNKNOWN:
  1049. if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
  1050. int eClass = classifyTagHTML(styler.GetStartSegment(),
  1051. i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
  1052. if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
  1053. if (!tagClosing) {
  1054. inScriptType = eNonHtmlScript;
  1055. scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
  1056. } else {
  1057. scriptLanguage = eScriptNone;
  1058. }
  1059. eClass = SCE_H_TAG;
  1060. }
  1061. if (ch == '>') {
  1062. styler.ColourTo(i, eClass);
  1063. if (inScriptType == eNonHtmlScript) {
  1064. state = StateForScript(scriptLanguage);
  1065. } else {
  1066. state = SCE_H_DEFAULT;
  1067. }
  1068. tagOpened = false;
  1069. if (!tagDontFold) {
  1070. if (tagClosing) {
  1071. levelCurrent--;
  1072. } else {
  1073. levelCurrent++;
  1074. }
  1075. }
  1076. tagClosing = false;
  1077. } else if (ch == '/' && chNext == '>') {
  1078. if (eClass == SCE_H_TAGUNKNOWN) {
  1079. styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
  1080. } else {
  1081. styler.ColourTo(i - 1, StateToPrint);
  1082. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1083. }
  1084. i++;
  1085. ch = chNext;
  1086. state = SCE_H_DEFAULT;
  1087. tagOpened = false;
  1088. } else {
  1089. if (eClass != SCE_H_TAGUNKNOWN) {
  1090. if (eClass == SCE_H_SGML_DEFAULT) {
  1091. state = SCE_H_SGML_DEFAULT;
  1092. } else {
  1093. state = SCE_H_OTHER;
  1094. }
  1095. }
  1096. }
  1097. }
  1098. break;
  1099. case SCE_H_ATTRIBUTE:
  1100. if (!setAttributeContinue.Contains(ch)) {
  1101. if (inScriptType == eNonHtmlScript) {
  1102. int scriptLanguagePrev = scriptLanguage;
  1103. clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
  1104. scriptLanguage = clientScript;
  1105. if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
  1106. inScriptType = eHtml;
  1107. }
  1108. classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
  1109. if (ch == '>') {
  1110. styler.ColourTo(i, SCE_H_TAG);
  1111. if (inScriptType == eNonHtmlScript) {
  1112. state = StateForScript(scriptLanguage);
  1113. } else {
  1114. state = SCE_H_DEFAULT;
  1115. }
  1116. tagOpened = false;
  1117. if (!tagDontFold) {
  1118. if (tagClosing) {
  1119. levelCurrent--;
  1120. } else {
  1121. levelCurrent++;
  1122. }
  1123. }
  1124. tagClosing = false;
  1125. } else if (ch == '=') {
  1126. styler.ColourTo(i, SCE_H_OTHER);
  1127. state = SCE_H_VALUE;
  1128. } else {
  1129. state = SCE_H_OTHER;
  1130. }
  1131. }
  1132. break;
  1133. case SCE_H_OTHER:
  1134. if (ch == '>') {
  1135. styler.ColourTo(i - 1, StateToPrint);
  1136. styler.ColourTo(i, SCE_H_TAG);
  1137. if (inScriptType == eNonHtmlScript) {
  1138. state = StateForScript(scriptLanguage);
  1139. } else {
  1140. state = SCE_H_DEFAULT;
  1141. }
  1142. tagOpened = false;
  1143. if (!tagDontFold) {
  1144. if (tagClosing) {
  1145. levelCurrent--;
  1146. } else {
  1147. levelCurrent++;
  1148. }
  1149. }
  1150. tagClosing = false;
  1151. } else if (ch == '\"') {
  1152. styler.ColourTo(i - 1, StateToPrint);
  1153. state = SCE_H_DOUBLESTRING;
  1154. } else if (ch == '\'') {
  1155. styler.ColourTo(i - 1, StateToPrint);
  1156. state = SCE_H_SINGLESTRING;
  1157. } else if (ch == '=') {
  1158. styler.ColourTo(i, StateToPrint);
  1159. state = SCE_H_VALUE;
  1160. } else if (ch == '/' && chNext == '>') {
  1161. styler.ColourTo(i - 1, StateToPrint);
  1162. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1163. i++;
  1164. ch = chNext;
  1165. state = SCE_H_DEFAULT;
  1166. tagOpened = false;
  1167. } else if (ch == '?' && chNext == '>') {
  1168. styler.ColourTo(i - 1, StateToPrint);
  1169. styler.ColourTo(i + 1, SCE_H_XMLEND);
  1170. i++;
  1171. ch = chNext;
  1172. state = SCE_H_DEFAULT;
  1173. } else if (setHTMLWord.Contains(ch)) {
  1174. styler.ColourTo(i - 1, StateToPrint);
  1175. state = SCE_H_ATTRIBUTE;
  1176. }
  1177. break;
  1178. case SCE_H_DOUBLESTRING:
  1179. if (ch == '\"') {
  1180. if (inScriptType == eNonHtmlScript) {
  1181. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1182. }
  1183. styler.ColourTo(i, SCE_H_DOUBLESTRING);
  1184. state = SCE_H_OTHER;
  1185. }
  1186. break;
  1187. case SCE_H_SINGLESTRING:
  1188. if (ch == '\'') {
  1189. if (inScriptType == eNonHtmlScript) {
  1190. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1191. }
  1192. styler.ColourTo(i, SCE_H_SINGLESTRING);
  1193. state = SCE_H_OTHER;
  1194. }
  1195. break;
  1196. case SCE_H_VALUE:
  1197. if (!setHTMLWord.Contains(ch)) {
  1198. if (ch == '\"' && chPrev == '=') {
  1199. // Should really test for being first character
  1200. state = SCE_H_DOUBLESTRING;
  1201. } else if (ch == '\'' && chPrev == '=') {
  1202. state = SCE_H_SINGLESTRING;
  1203. } else {
  1204. if (IsNumber(styler.GetStartSegment(), styler)) {
  1205. styler.ColourTo(i - 1, SCE_H_NUMBER);
  1206. } else {
  1207. styler.ColourTo(i - 1, StateToPrint);
  1208. }
  1209. if (ch == '>') {
  1210. styler.ColourTo(i, SCE_H_TAG);
  1211. if (inScriptType == eNonHtmlScript) {
  1212. state = StateForScript(scriptLanguage);
  1213. } else {
  1214. state = SCE_H_DEFAULT;
  1215. }
  1216. tagOpened = false;
  1217. if (!tagDontFold) {
  1218. if (tagClosing) {
  1219. levelCurrent--;
  1220. } else {
  1221. levelCurrent++;
  1222. }
  1223. }
  1224. tagClosing = false;
  1225. } else {
  1226. state = SCE_H_OTHER;
  1227. }
  1228. }
  1229. }
  1230. break;
  1231. case SCE_HJ_DEFAULT:
  1232. case SCE_HJ_START:
  1233. case SCE_HJ_SYMBOLS:
  1234. if (IsAWordStart(ch)) {
  1235. styler.ColourTo(i - 1, StateToPrint);
  1236. state = SCE_HJ_WORD;
  1237. } else if (ch == '/' && chNext == '*') {
  1238. styler.ColourTo(i - 1, StateToPrint);
  1239. if (chNext2 == '*')
  1240. state = SCE_HJ_COMMENTDOC;
  1241. else
  1242. state = SCE_HJ_COMMENT;
  1243. } else if (ch == '/' && chNext == '/') {
  1244. styler.ColourTo(i - 1, StateToPrint);
  1245. state = SCE_HJ_COMMENTLINE;
  1246. } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
  1247. styler.ColourTo(i - 1, StateToPrint);
  1248. state = SCE_HJ_REGEX;
  1249. } else if (ch == '\"') {
  1250. styler.ColourTo(i - 1, StateToPrint);
  1251. state = SCE_HJ_DOUBLESTRING;
  1252. } else if (ch == '\'') {
  1253. styler.ColourTo(i - 1, StateToPrint);
  1254. state = SCE_HJ_SINGLESTRING;
  1255. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1256. styler.SafeGetCharAt(i + 3) == '-') {
  1257. styler.ColourTo(i - 1, StateToPrint);
  1258. state = SCE_HJ_COMMENTLINE;
  1259. } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1260. styler.ColourTo(i - 1, StateToPrint);
  1261. state = SCE_HJ_COMMENTLINE;
  1262. i += 2;
  1263. } else if (IsOperator(ch)) {
  1264. styler.ColourTo(i - 1, StateToPrint);
  1265. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1266. state = SCE_HJ_DEFAULT;
  1267. } else if ((ch == ' ') || (ch == '\t')) {
  1268. if (state == SCE_HJ_START) {
  1269. styler.ColourTo(i - 1, StateToPrint);
  1270. state = SCE_HJ_DEFAULT;
  1271. }
  1272. }
  1273. break;
  1274. case SCE_HJ_WORD:
  1275. if (!IsAWordChar(ch)) {
  1276. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  1277. //styler.ColourTo(i - 1, eHTJSKeyword);
  1278. state = SCE_HJ_DEFAULT;
  1279. if (ch == '/' && chNext == '*') {
  1280. if (chNext2 == '*')
  1281. state = SCE_HJ_COMMENTDOC;
  1282. else
  1283. state = SCE_HJ_COMMENT;
  1284. } else if (ch == '/' && chNext == '/') {
  1285. state = SCE_HJ_COMMENTLINE;
  1286. } else if (ch == '\"') {
  1287. state = SCE_HJ_DOUBLESTRING;
  1288. } else if (ch == '\'') {
  1289. state = SCE_HJ_SINGLESTRING;
  1290. } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1291. styler.ColourTo(i - 1, StateToPrint);
  1292. state = SCE_HJ_COMMENTLINE;
  1293. i += 2;
  1294. } else if (IsOperator(ch)) {
  1295. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1296. state = SCE_HJ_DEFAULT;
  1297. }
  1298. }
  1299. break;
  1300. case SCE_HJ_COMMENT:
  1301. case SCE_HJ_COMMENTDOC:
  1302. if (ch == '/' && chPrev == '*') {
  1303. styler.ColourTo(i, StateToPrint);
  1304. state = SCE_HJ_DEFAULT;
  1305. ch = ' ';
  1306. }
  1307. break;
  1308. case SCE_HJ_COMMENTLINE:
  1309. if (ch == '\r' || ch == '\n') {
  1310. styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
  1311. state = SCE_HJ_DEFAULT;
  1312. ch = ' ';
  1313. }
  1314. break;
  1315. case SCE_HJ_DOUBLESTRING:
  1316. if (ch == '\\') {
  1317. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1318. i++;
  1319. }
  1320. } else if (ch == '\"') {
  1321. styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
  1322. state = SCE_HJ_DEFAULT;
  1323. } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1324. styler.ColourTo(i - 1, StateToPrint);
  1325. state = SCE_HJ_COMMENTLINE;
  1326. i += 2;
  1327. } else if (isLineEnd(ch)) {
  1328. styler.ColourTo(i - 1, StateToPrint);
  1329. state = SCE_HJ_STRINGEOL;
  1330. }
  1331. break;
  1332. case SCE_HJ_SINGLESTRING:
  1333. if (ch == '\\') {
  1334. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1335. i++;
  1336. }
  1337. } else if (ch == '\'') {
  1338. styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
  1339. state = SCE_HJ_DEFAULT;
  1340. } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1341. styler.ColourTo(i - 1, StateToPrint);
  1342. state = SCE_HJ_COMMENTLINE;
  1343. i += 2;
  1344. } else if (isLineEnd(ch)) {
  1345. styler.ColourTo(i - 1, StateToPrint);
  1346. state = SCE_HJ_STRINGEOL;
  1347. }
  1348. break;
  1349. case SCE_HJ_STRINGEOL:
  1350. if (!isLineEnd(ch)) {
  1351. styler.ColourTo(i - 1, StateToPrint);
  1352. state = SCE_HJ_DEFAULT;
  1353. } else if (!isLineEnd(chNext)) {
  1354. styler.ColourTo(i, StateToPrint);
  1355. state = SCE_HJ_DEFAULT;
  1356. }
  1357. break;
  1358. case SCE_HJ_REGEX:
  1359. if (ch == '\r' || ch == '\n' || ch == '/') {
  1360. if (ch == '/') {
  1361. while (isascii(chNext) && islower(chNext)) { // gobble regex flags
  1362. i++;
  1363. ch = chNext;
  1364. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1365. }
  1366. }
  1367. styler.ColourTo(i, StateToPrint);
  1368. state = SCE_HJ_DEFAULT;
  1369. } else if (ch == '\\') {
  1370. // Gobble up the quoted character
  1371. if (chNext == '\\' || chNext == '/') {
  1372. i++;
  1373. ch = chNext;
  1374. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1375. }
  1376. }
  1377. break;
  1378. case SCE_HB_DEFAULT:
  1379. case SCE_HB_START:
  1380. if (IsAWordStart(ch)) {
  1381. styler.ColourTo(i - 1, StateToPrint);
  1382. state = SCE_HB_WORD;
  1383. } else if (ch == '\'') {
  1384. styler.ColourTo(i - 1, StateToPrint);
  1385. state = SCE_HB_COMMENTLINE;
  1386. } else if (ch == '\"') {
  1387. styler.ColourTo(i - 1, StateToPrint);
  1388. state = SCE_HB_STRING;
  1389. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1390. styler.SafeGetCharAt(i + 3) == '-') {
  1391. styler.ColourTo(i - 1, StateToPrint);
  1392. state = SCE_HB_COMMENTLINE;
  1393. } else if (IsOperator(ch)) {
  1394. styler.ColourTo(i - 1, StateToPrint);
  1395. styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
  1396. state = SCE_HB_DEFAULT;
  1397. } else if ((ch == ' ') || (ch == '\t')) {
  1398. if (state == SCE_HB_START) {
  1399. styler.ColourTo(i - 1, StateToPrint);
  1400. state = SCE_HB_DEFAULT;
  1401. }
  1402. }
  1403. break;
  1404. case SCE_HB_WORD:
  1405. if (!IsAWordChar(ch)) {
  1406. state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  1407. if (state == SCE_HB_DEFAULT) {
  1408. if (ch == '\"') {
  1409. state = SCE_HB_STRING;
  1410. } else if (ch == '\'') {
  1411. state = SCE_HB_COMMENTLINE;
  1412. } else if (IsOperator(ch)) {
  1413. styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
  1414. state = SCE_HB_DEFAULT;
  1415. }
  1416. }
  1417. }
  1418. break;
  1419. case SCE_HB_STRING:
  1420. if (ch == '\"') {
  1421. styler.ColourTo(i, StateToPrint);
  1422. state = SCE_HB_DEFAULT;
  1423. } else if (ch == '\r' || ch == '\n') {
  1424. styler.ColourTo(i - 1, StateToPrint);
  1425. state = SCE_HB_STRINGEOL;
  1426. }
  1427. break;
  1428. case SCE_HB_COMMENTLINE:
  1429. if (ch == '\r' || ch == '\n') {
  1430. styler.ColourTo(i - 1, StateToPrint);
  1431. state = SCE_HB_DEFAULT;
  1432. }
  1433. break;
  1434. case SCE_HB_STRINGEOL:
  1435. if (!isLineEnd(ch)) {
  1436. styler.ColourTo(i - 1, StateToPrint);
  1437. state = SCE_HB_DEFAULT;
  1438. } else if (!isLineEnd(chNext)) {
  1439. styler.ColourTo(i, StateToPrint);
  1440. state = SCE_HB_DEFAULT;
  1441. }
  1442. break;
  1443. case SCE_HP_DEFAULT:
  1444. case SCE_HP_START:
  1445. if (IsAWordStart(ch)) {
  1446. styler.ColourTo(i - 1, StateToPrint);
  1447. state = SCE_HP_WORD;
  1448. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1449. styler.SafeGetCharAt(i + 3) == '-') {
  1450. styler.ColourTo(i - 1, StateToPrint);
  1451. state = SCE_HP_COMMENTLINE;
  1452. } else if (ch == '#') {
  1453. styler.ColourTo(i - 1, StateToPrint);
  1454. state = SCE_HP_COMMENTLINE;
  1455. } else if (ch == '\"') {
  1456. styler.ColourTo(i - 1, StateToPri

Large files files are truncated, but you can click here to view the full file