PageRenderTime 50ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/Pythonwin/Scintilla/src/LexHTML.cxx

https://bitbucket.org/jaraco/pywin32
C++ | 2184 lines | 1990 code | 106 blank | 88 comment | 1458 complexity | d39eb8a9411121905b14120d6b27385f MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1

Large files files are truncated, but you can click here to view the full file

  1. // Scintilla source code edit control
  2. /** @file LexHTML.cxx
  3. ** Lexer for HTML.
  4. **/
  5. // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <ctype.h>
  10. #include <stdio.h>
  11. #include <stdarg.h>
  12. #include "Platform.h"
  13. #include "PropSet.h"
  14. #include "Accessor.h"
  15. #include "StyleContext.h"
  16. #include "KeyWords.h"
  17. #include "Scintilla.h"
  18. #include "SciLexer.h"
  19. #include "CharacterSet.h"
  20. #ifdef SCI_NAMESPACE
  21. using namespace Scintilla;
  22. #endif
  23. #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  24. #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  25. #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  26. enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  27. enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  28. static inline bool IsAWordChar(const int ch) {
  29. return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  30. }
  31. static inline bool IsAWordStart(const int ch) {
  32. return (ch < 0x80) && (isalnum(ch) || ch == '_');
  33. }
  34. inline bool IsOperator(int ch) {
  35. if (isascii(ch) && isalnum(ch))
  36. return false;
  37. // '.' left out as it is used to make up numbers
  38. if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  39. ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  40. ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  41. ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  42. ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  43. ch == '?' || ch == '!' || ch == '.' || ch == '~')
  44. return true;
  45. return false;
  46. }
  47. static inline int MakeLowerCase(int ch) {
  48. if (ch < 'A' || ch > 'Z')
  49. return ch;
  50. else
  51. return ch - 'A' + 'a';
  52. }
  53. static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  54. size_t i = 0;
  55. for (; (i < end - start + 1) && (i < len-1); i++) {
  56. s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  57. }
  58. s[i] = '\0';
  59. }
  60. static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  61. char s[100];
  62. GetTextSegment(styler, start, end, s, sizeof(s));
  63. //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  64. if (strstr(s, "src")) // External script
  65. return eScriptNone;
  66. if (strstr(s, "vbs"))
  67. return eScriptVBS;
  68. if (strstr(s, "pyth"))
  69. return eScriptPython;
  70. if (strstr(s, "javas"))
  71. return eScriptJS;
  72. if (strstr(s, "jscr"))
  73. return eScriptJS;
  74. if (strstr(s, "php"))
  75. return eScriptPHP;
  76. if (strstr(s, "xml")) {
  77. const char *xml = strstr(s, "xml");
  78. for (const char *t=s; t<xml; t++) {
  79. if (!IsASpace(*t)) {
  80. return prevValue;
  81. }
  82. }
  83. return eScriptXML;
  84. }
  85. return prevValue;
  86. }
  87. static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
  88. int iResult = 0;
  89. char s[100];
  90. GetTextSegment(styler, start, end, s, sizeof(s));
  91. if (0 == strncmp(s, "php", 3)) {
  92. iResult = 3;
  93. }
  94. return iResult;
  95. }
  96. static script_type ScriptOfState(int state) {
  97. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  98. return eScriptPython;
  99. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  100. return eScriptVBS;
  101. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  102. return eScriptJS;
  103. } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
  104. return eScriptPHP;
  105. } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
  106. return eScriptSGML;
  107. } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
  108. return eScriptSGMLblock;
  109. } else {
  110. return eScriptNone;
  111. }
  112. }
  113. static int statePrintForState(int state, script_mode inScriptType) {
  114. int StateToPrint = state;
  115. if (state >= SCE_HJ_START) {
  116. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  117. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
  118. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  119. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
  120. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  121. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
  122. }
  123. }
  124. return StateToPrint;
  125. }
  126. static int stateForPrintState(int StateToPrint) {
  127. int state;
  128. if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
  129. state = StateToPrint - SCE_HA_PYTHON;
  130. } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
  131. state = StateToPrint - SCE_HA_VBS;
  132. } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
  133. state = StateToPrint - SCE_HA_JS;
  134. } else {
  135. state = StateToPrint;
  136. }
  137. return state;
  138. }
  139. static inline bool IsNumber(unsigned int start, Accessor &styler) {
  140. return IsADigit(styler[start]) || (styler[start] == '.') ||
  141. (styler[start] == '-') || (styler[start] == '#');
  142. }
  143. static inline bool isStringState(int state) {
  144. bool bResult;
  145. switch (state) {
  146. case SCE_HJ_DOUBLESTRING:
  147. case SCE_HJ_SINGLESTRING:
  148. case SCE_HJA_DOUBLESTRING:
  149. case SCE_HJA_SINGLESTRING:
  150. case SCE_HB_STRING:
  151. case SCE_HBA_STRING:
  152. case SCE_HP_STRING:
  153. case SCE_HP_CHARACTER:
  154. case SCE_HP_TRIPLE:
  155. case SCE_HP_TRIPLEDOUBLE:
  156. case SCE_HPA_STRING:
  157. case SCE_HPA_CHARACTER:
  158. case SCE_HPA_TRIPLE:
  159. case SCE_HPA_TRIPLEDOUBLE:
  160. case SCE_HPHP_HSTRING:
  161. case SCE_HPHP_SIMPLESTRING:
  162. case SCE_HPHP_HSTRING_VARIABLE:
  163. case SCE_HPHP_COMPLEX_VARIABLE:
  164. bResult = true;
  165. break;
  166. default :
  167. bResult = false;
  168. break;
  169. }
  170. return bResult;
  171. }
  172. static inline bool stateAllowsTermination(int state) {
  173. bool allowTermination = !isStringState(state);
  174. if (allowTermination) {
  175. switch (state) {
  176. case SCE_HB_COMMENTLINE:
  177. case SCE_HPHP_COMMENT:
  178. case SCE_HP_COMMENTLINE:
  179. case SCE_HPA_COMMENTLINE:
  180. allowTermination = false;
  181. }
  182. }
  183. return allowTermination;
  184. }
  185. // not really well done, since it's only comments that should lex the %> and <%
  186. static inline bool isCommentASPState(int state) {
  187. bool bResult;
  188. switch (state) {
  189. case SCE_HJ_COMMENT:
  190. case SCE_HJ_COMMENTLINE:
  191. case SCE_HJ_COMMENTDOC:
  192. case SCE_HB_COMMENTLINE:
  193. case SCE_HP_COMMENTLINE:
  194. case SCE_HPHP_COMMENT:
  195. case SCE_HPHP_COMMENTLINE:
  196. bResult = true;
  197. break;
  198. default :
  199. bResult = false;
  200. break;
  201. }
  202. return bResult;
  203. }
  204. static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  205. bool wordIsNumber = IsNumber(start, styler);
  206. char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
  207. if (wordIsNumber) {
  208. chAttr = SCE_H_NUMBER;
  209. } else {
  210. char s[100];
  211. GetTextSegment(styler, start, end, s, sizeof(s));
  212. if (keywords.InList(s))
  213. chAttr = SCE_H_ATTRIBUTE;
  214. }
  215. if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
  216. // No keywords -> all are known
  217. chAttr = SCE_H_ATTRIBUTE;
  218. styler.ColourTo(end, chAttr);
  219. }
  220. static int classifyTagHTML(unsigned int start, unsigned int end,
  221. WordList &keywords, Accessor &styler, bool &tagDontFold,
  222. bool caseSensitive, bool isXml, bool allowScripts) {
  223. char s[30 + 2];
  224. // Copy after the '<'
  225. unsigned int i = 0;
  226. for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
  227. char ch = styler[cPos];
  228. if ((ch != '<') && (ch != '/')) {
  229. s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
  230. }
  231. }
  232. //The following is only a quick hack, to see if this whole thing would work
  233. //we first need the tagname with a trailing space...
  234. s[i] = ' ';
  235. s[i+1] = '\0';
  236. // if the current language is XML, I can fold any tag
  237. // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
  238. //...to find it in the list of no-container-tags
  239. tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
  240. //now we can remove the trailing space
  241. s[i] = '\0';
  242. // No keywords -> all are known
  243. // Name of a closing tag starts at s + 1
  244. char chAttr = SCE_H_TAGUNKNOWN;
  245. if (s[0] == '!') {
  246. chAttr = SCE_H_SGML_DEFAULT;
  247. } else if (!keywords || keywords.InList(s[0] == '/' ? s + 1 : s)) {
  248. chAttr = SCE_H_TAG;
  249. }
  250. styler.ColourTo(end, chAttr);
  251. if (chAttr == SCE_H_TAG) {
  252. if (allowScripts && 0 == strcmp(s, "script")) {
  253. chAttr = SCE_H_SCRIPT;
  254. } else if (!isXml && 0 == strcmp(s, "comment")) {
  255. chAttr = SCE_H_COMMENT;
  256. }
  257. }
  258. return chAttr;
  259. }
  260. static void classifyWordHTJS(unsigned int start, unsigned int end,
  261. WordList &keywords, Accessor &styler, script_mode inScriptType) {
  262. char chAttr = SCE_HJ_WORD;
  263. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  264. if (wordIsNumber)
  265. chAttr = SCE_HJ_NUMBER;
  266. else {
  267. char s[30 + 1];
  268. unsigned int i = 0;
  269. for (; i < end - start + 1 && i < 30; i++) {
  270. s[i] = styler[start + i];
  271. }
  272. s[i] = '\0';
  273. if (keywords.InList(s))
  274. chAttr = SCE_HJ_KEYWORD;
  275. }
  276. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  277. }
  278. static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
  279. char chAttr = SCE_HB_IDENTIFIER;
  280. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  281. if (wordIsNumber)
  282. chAttr = SCE_HB_NUMBER;
  283. else {
  284. char s[100];
  285. GetTextSegment(styler, start, end, s, sizeof(s));
  286. if (keywords.InList(s)) {
  287. chAttr = SCE_HB_WORD;
  288. if (strcmp(s, "rem") == 0)
  289. chAttr = SCE_HB_COMMENTLINE;
  290. }
  291. }
  292. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  293. if (chAttr == SCE_HB_COMMENTLINE)
  294. return SCE_HB_COMMENTLINE;
  295. else
  296. return SCE_HB_DEFAULT;
  297. }
  298. static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
  299. bool wordIsNumber = IsADigit(styler[start]);
  300. char s[30 + 1];
  301. unsigned int i = 0;
  302. for (; i < end - start + 1 && i < 30; i++) {
  303. s[i] = styler[start + i];
  304. }
  305. s[i] = '\0';
  306. char chAttr = SCE_HP_IDENTIFIER;
  307. if (0 == strcmp(prevWord, "class"))
  308. chAttr = SCE_HP_CLASSNAME;
  309. else if (0 == strcmp(prevWord, "def"))
  310. chAttr = SCE_HP_DEFNAME;
  311. else if (wordIsNumber)
  312. chAttr = SCE_HP_NUMBER;
  313. else if (keywords.InList(s))
  314. chAttr = SCE_HP_WORD;
  315. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  316. strcpy(prevWord, s);
  317. }
  318. // Update the word colour to default or keyword
  319. // Called when in a PHP word
  320. static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  321. char chAttr = SCE_HPHP_DEFAULT;
  322. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
  323. if (wordIsNumber)
  324. chAttr = SCE_HPHP_NUMBER;
  325. else {
  326. char s[100];
  327. GetTextSegment(styler, start, end, s, sizeof(s));
  328. if (keywords.InList(s))
  329. chAttr = SCE_HPHP_WORD;
  330. }
  331. styler.ColourTo(end, chAttr);
  332. }
  333. static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  334. char s[30 + 1];
  335. unsigned int i = 0;
  336. for (; i < end - start + 1 && i < 30; i++) {
  337. s[i] = styler[start + i];
  338. }
  339. s[i] = '\0';
  340. return keywords.InList(s);
  341. }
  342. static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
  343. char s[30 + 1];
  344. unsigned int i = 0;
  345. for (; i < end - start + 1 && i < 30; i++) {
  346. s[i] = styler[start + i];
  347. }
  348. s[i] = '\0';
  349. return (0 == strcmp(s, "[CDATA["));
  350. }
  351. // Return the first state to reach when entering a scripting language
  352. static int StateForScript(script_type scriptLanguage) {
  353. int Result;
  354. switch (scriptLanguage) {
  355. case eScriptVBS:
  356. Result = SCE_HB_START;
  357. break;
  358. case eScriptPython:
  359. Result = SCE_HP_START;
  360. break;
  361. case eScriptPHP:
  362. Result = SCE_HPHP_DEFAULT;
  363. break;
  364. case eScriptXML:
  365. Result = SCE_H_TAGUNKNOWN;
  366. break;
  367. case eScriptSGML:
  368. Result = SCE_H_SGML_DEFAULT;
  369. break;
  370. case eScriptComment:
  371. Result = SCE_H_COMMENT;
  372. break;
  373. default :
  374. Result = SCE_HJ_START;
  375. break;
  376. }
  377. return Result;
  378. }
  379. static inline bool ishtmlwordchar(int ch) {
  380. return !isascii(ch) ||
  381. (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
  382. }
  383. static inline bool issgmlwordchar(int ch) {
  384. return !isascii(ch) ||
  385. (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
  386. }
  387. static inline bool IsPhpWordStart(int ch) {
  388. return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
  389. }
  390. static inline bool IsPhpWordChar(int ch) {
  391. return IsADigit(ch) || IsPhpWordStart(ch);
  392. }
  393. static bool InTagState(int state) {
  394. return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
  395. state == SCE_H_SCRIPT ||
  396. state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
  397. state == SCE_H_NUMBER || state == SCE_H_OTHER ||
  398. state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
  399. }
  400. static bool IsCommentState(const int state) {
  401. return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
  402. }
  403. static bool IsScriptCommentState(const int state) {
  404. return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
  405. state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
  406. }
  407. static bool isLineEnd(int ch) {
  408. return ch == '\r' || ch == '\n';
  409. }
  410. static bool isOKBeforeRE(int ch) {
  411. return (ch == '(') || (ch == '=') || (ch == ',');
  412. }
  413. static bool isPHPStringState(int state) {
  414. return
  415. (state == SCE_HPHP_HSTRING) ||
  416. (state == SCE_HPHP_SIMPLESTRING) ||
  417. (state == SCE_HPHP_HSTRING_VARIABLE) ||
  418. (state == SCE_HPHP_COMPLEX_VARIABLE);
  419. }
  420. static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
  421. int j;
  422. const int beginning = i - 1;
  423. bool isValidSimpleString = false;
  424. while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
  425. i++;
  426. char ch = styler.SafeGetCharAt(i);
  427. const char chNext = styler.SafeGetCharAt(i + 1);
  428. if (!IsPhpWordStart(ch)) {
  429. if (ch == '\'' && IsPhpWordStart(chNext)) {
  430. i++;
  431. ch = chNext;
  432. isSimpleString = true;
  433. } else {
  434. phpStringDelimiter[0] = '\0';
  435. return beginning;
  436. }
  437. }
  438. phpStringDelimiter[0] = ch;
  439. i++;
  440. for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
  441. if (!IsPhpWordChar(styler[j])) {
  442. if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
  443. isValidSimpleString = true;
  444. j++;
  445. break;
  446. } else {
  447. phpStringDelimiter[0] = '\0';
  448. return beginning;
  449. }
  450. }
  451. if (j - i < phpStringDelimiterSize - 2)
  452. phpStringDelimiter[j-i+1] = styler[j];
  453. else
  454. i++;
  455. }
  456. if (isSimpleString && !isValidSimpleString) {
  457. phpStringDelimiter[0] = '\0';
  458. return beginning;
  459. }
  460. phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
  461. return j - 1;
  462. }
  463. static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
  464. Accessor &styler, bool isXml) {
  465. WordList &keywords = *keywordlists[0];
  466. WordList &keywords2 = *keywordlists[1];
  467. WordList &keywords3 = *keywordlists[2];
  468. WordList &keywords4 = *keywordlists[3];
  469. WordList &keywords5 = *keywordlists[4];
  470. WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
  471. // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
  472. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  473. char prevWord[200];
  474. prevWord[0] = '\0';
  475. char phpStringDelimiter[200]; // PHP is not limited in length, we are
  476. phpStringDelimiter[0] = '\0';
  477. int StateToPrint = initStyle;
  478. int state = stateForPrintState(StateToPrint);
  479. // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
  480. if (InTagState(state)) {
  481. while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
  482. startPos--;
  483. length++;
  484. }
  485. state = SCE_H_DEFAULT;
  486. }
  487. // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
  488. if (isPHPStringState(state)) {
  489. while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
  490. startPos--;
  491. length++;
  492. state = styler.StyleAt(startPos);
  493. }
  494. if (startPos == 0)
  495. state = SCE_H_DEFAULT;
  496. }
  497. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  498. int lineCurrent = styler.GetLine(startPos);
  499. int lineState;
  500. if (lineCurrent > 0) {
  501. lineState = styler.GetLineState(lineCurrent);
  502. } else {
  503. // Default client and ASP scripting language is JavaScript
  504. lineState = eScriptJS << 8;
  505. lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
  506. }
  507. script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
  508. bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
  509. bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
  510. bool tagDontFold = false; //some HTML tags should not be folded
  511. script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
  512. script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
  513. int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
  514. script_type scriptLanguage = ScriptOfState(state);
  515. // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
  516. if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
  517. scriptLanguage = eScriptComment;
  518. }
  519. const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
  520. const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
  521. const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
  522. const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  523. const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
  524. const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
  525. const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
  526. const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
  527. const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
  528. const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
  529. const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
  530. int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
  531. int levelCurrent = levelPrev;
  532. int visibleChars = 0;
  533. int chPrev = ' ';
  534. int ch = ' ';
  535. int chPrevNonWhite = ' ';
  536. // look back to set chPrevNonWhite properly for better regex colouring
  537. if (scriptLanguage == eScriptJS && startPos > 0) {
  538. int back = startPos;
  539. int style = 0;
  540. while (--back) {
  541. style = styler.StyleAt(back);
  542. if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
  543. // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
  544. break;
  545. }
  546. if (style == SCE_HJ_SYMBOLS) {
  547. chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
  548. }
  549. }
  550. styler.StartSegment(startPos);
  551. const int lengthDoc = startPos + length;
  552. for (int i = startPos; i < lengthDoc; i++) {
  553. const int chPrev2 = chPrev;
  554. chPrev = ch;
  555. if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
  556. state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
  557. chPrevNonWhite = ch;
  558. ch = static_cast<unsigned char>(styler[i]);
  559. int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  560. const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
  561. // Handle DBCS codepages
  562. if (styler.IsLeadByte(static_cast<char>(ch))) {
  563. chPrev = ' ';
  564. i += 1;
  565. continue;
  566. }
  567. if ((!IsASpace(ch) || !foldCompact) && fold)
  568. visibleChars++;
  569. // decide what is the current state to print (depending of the script tag)
  570. StateToPrint = statePrintForState(state, inScriptType);
  571. // handle script folding
  572. if (fold) {
  573. switch (scriptLanguage) {
  574. case eScriptJS:
  575. case eScriptPHP:
  576. //not currently supported case eScriptVBS:
  577. if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
  578. //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
  579. //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
  580. if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
  581. levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
  582. }
  583. } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
  584. levelCurrent--;
  585. }
  586. break;
  587. case eScriptPython:
  588. if (state != SCE_HP_COMMENTLINE) {
  589. if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
  590. levelCurrent++;
  591. } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
  592. // check if the number of tabs is lower than the level
  593. int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
  594. for (int j = 0; Findlevel > 0; j++) {
  595. char chTmp = styler.SafeGetCharAt(i + j + 1);
  596. if (chTmp == '\t') {
  597. Findlevel -= 8;
  598. } else if (chTmp == ' ') {
  599. Findlevel--;
  600. } else {
  601. break;
  602. }
  603. }
  604. if (Findlevel > 0) {
  605. levelCurrent -= Findlevel / 8;
  606. if (Findlevel % 8)
  607. levelCurrent--;
  608. }
  609. }
  610. }
  611. break;
  612. default:
  613. break;
  614. }
  615. }
  616. if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
  617. // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
  618. // Avoid triggering two times on Dos/Win
  619. // New line -> record any line state onto /next/ line
  620. if (fold) {
  621. int lev = levelPrev;
  622. if (visibleChars == 0)
  623. lev |= SC_FOLDLEVELWHITEFLAG;
  624. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  625. lev |= SC_FOLDLEVELHEADERFLAG;
  626. styler.SetLevel(lineCurrent, lev);
  627. visibleChars = 0;
  628. levelPrev = levelCurrent;
  629. }
  630. lineCurrent++;
  631. styler.SetLineState(lineCurrent,
  632. ((inScriptType & 0x03) << 0) |
  633. ((tagOpened & 0x01) << 2) |
  634. ((tagClosing & 0x01) << 3) |
  635. ((aspScript & 0x0F) << 4) |
  636. ((clientScript & 0x0F) << 8) |
  637. ((beforePreProc & 0xFF) << 12));
  638. }
  639. // generic end of script processing
  640. else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
  641. // Check if it's the end of the script tag (or any other HTML tag)
  642. switch (state) {
  643. // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
  644. case SCE_H_DOUBLESTRING:
  645. case SCE_H_SINGLESTRING:
  646. case SCE_HJ_COMMENT:
  647. case SCE_HJ_COMMENTDOC:
  648. //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
  649. // the end of script marker from some JS interpreters.
  650. case SCE_HB_COMMENTLINE:
  651. case SCE_HBA_COMMENTLINE:
  652. case SCE_HJ_DOUBLESTRING:
  653. case SCE_HJ_SINGLESTRING:
  654. case SCE_HJ_REGEX:
  655. case SCE_HB_STRING:
  656. case SCE_HBA_STRING:
  657. case SCE_HP_STRING:
  658. case SCE_HP_TRIPLE:
  659. case SCE_HP_TRIPLEDOUBLE:
  660. case SCE_HPHP_HSTRING:
  661. case SCE_HPHP_SIMPLESTRING:
  662. case SCE_HPHP_COMMENT:
  663. case SCE_HPHP_COMMENTLINE:
  664. break;
  665. default :
  666. // check if the closing tag is a script tag
  667. if (const char *tag =
  668. state == SCE_HJ_COMMENTLINE || isXml ? "script" :
  669. state == SCE_H_COMMENT ? "comment" : 0) {
  670. int j = i + 2;
  671. int chr;
  672. do {
  673. chr = static_cast<int>(*tag++);
  674. } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
  675. if (chr != 0) break;
  676. }
  677. // closing tag of the script (it's a closing HTML tag anyway)
  678. styler.ColourTo(i - 1, StateToPrint);
  679. state = SCE_H_TAGUNKNOWN;
  680. inScriptType = eHtml;
  681. scriptLanguage = eScriptNone;
  682. clientScript = eScriptJS;
  683. i += 2;
  684. visibleChars += 2;
  685. tagClosing = true;
  686. continue;
  687. }
  688. }
  689. /////////////////////////////////////
  690. // handle the start of PHP pre-processor = Non-HTML
  691. else if ((state != SCE_H_ASPAT) &&
  692. !isPHPStringState(state) &&
  693. (state != SCE_HPHP_COMMENT) &&
  694. (ch == '<') &&
  695. (chNext == '?') &&
  696. !IsScriptCommentState(state) ) {
  697. scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
  698. if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
  699. styler.ColourTo(i - 1, StateToPrint);
  700. beforePreProc = state;
  701. i++;
  702. visibleChars++;
  703. i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
  704. if (scriptLanguage == eScriptXML)
  705. styler.ColourTo(i, SCE_H_XMLSTART);
  706. else
  707. styler.ColourTo(i, SCE_H_QUESTION);
  708. state = StateForScript(scriptLanguage);
  709. if (inScriptType == eNonHtmlScript)
  710. inScriptType = eNonHtmlScriptPreProc;
  711. else
  712. inScriptType = eNonHtmlPreProc;
  713. // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
  714. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  715. levelCurrent++;
  716. }
  717. // should be better
  718. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  719. continue;
  720. }
  721. // handle the start of ASP pre-processor = Non-HTML
  722. else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
  723. styler.ColourTo(i - 1, StateToPrint);
  724. beforePreProc = state;
  725. if (inScriptType == eNonHtmlScript)
  726. inScriptType = eNonHtmlScriptPreProc;
  727. else
  728. inScriptType = eNonHtmlPreProc;
  729. if (chNext2 == '@') {
  730. i += 2; // place as if it was the second next char treated
  731. visibleChars += 2;
  732. state = SCE_H_ASPAT;
  733. } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
  734. styler.ColourTo(i + 3, SCE_H_ASP);
  735. state = SCE_H_XCCOMMENT;
  736. scriptLanguage = eScriptVBS;
  737. continue;
  738. } else {
  739. if (chNext2 == '=') {
  740. i += 2; // place as if it was the second next char treated
  741. visibleChars += 2;
  742. } else {
  743. i++; // place as if it was the next char treated
  744. visibleChars++;
  745. }
  746. state = StateForScript(aspScript);
  747. }
  748. scriptLanguage = eScriptVBS;
  749. styler.ColourTo(i, SCE_H_ASP);
  750. // fold whole script
  751. if (foldHTMLPreprocessor)
  752. levelCurrent++;
  753. // should be better
  754. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  755. continue;
  756. }
  757. /////////////////////////////////////
  758. // handle the start of SGML language (DTD)
  759. else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
  760. (chPrev == '<') &&
  761. (ch == '!') &&
  762. (StateToPrint != SCE_H_CDATA) &&
  763. (!IsCommentState(StateToPrint)) &&
  764. (!IsScriptCommentState(StateToPrint)) ) {
  765. beforePreProc = state;
  766. styler.ColourTo(i - 2, StateToPrint);
  767. if ((chNext == '-') && (chNext2 == '-')) {
  768. state = SCE_H_COMMENT; // wait for a pending command
  769. styler.ColourTo(i + 2, SCE_H_COMMENT);
  770. i += 2; // follow styling after the --
  771. } else if (isWordCdata(i + 1, i + 7, styler)) {
  772. state = SCE_H_CDATA;
  773. } else {
  774. styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
  775. scriptLanguage = eScriptSGML;
  776. state = SCE_H_SGML_COMMAND; // wait for a pending command
  777. }
  778. // fold whole tag (-- when closing the tag)
  779. if (foldHTMLPreprocessor)
  780. levelCurrent++;
  781. continue;
  782. }
  783. // handle the end of a pre-processor = Non-HTML
  784. else if ((
  785. ((inScriptType == eNonHtmlPreProc)
  786. || (inScriptType == eNonHtmlScriptPreProc)) && (
  787. ((scriptLanguage != eScriptNone) && stateAllowsTermination(state) && ((ch == '%') || (ch == '?')))
  788. ) && (chNext == '>')) ||
  789. ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
  790. if (state == SCE_H_ASPAT) {
  791. aspScript = segIsScriptingIndicator(styler,
  792. styler.GetStartSegment(), i - 1, aspScript);
  793. }
  794. // Bounce out of any ASP mode
  795. switch (state) {
  796. case SCE_HJ_WORD:
  797. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  798. break;
  799. case SCE_HB_WORD:
  800. classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  801. break;
  802. case SCE_HP_WORD:
  803. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  804. break;
  805. case SCE_HPHP_WORD:
  806. classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
  807. break;
  808. case SCE_H_XCCOMMENT:
  809. styler.ColourTo(i - 1, state);
  810. break;
  811. default :
  812. styler.ColourTo(i - 1, StateToPrint);
  813. break;
  814. }
  815. if (scriptLanguage != eScriptSGML) {
  816. i++;
  817. visibleChars++;
  818. }
  819. if (ch == '%')
  820. styler.ColourTo(i, SCE_H_ASP);
  821. else if (scriptLanguage == eScriptXML)
  822. styler.ColourTo(i, SCE_H_XMLEND);
  823. else if (scriptLanguage == eScriptSGML)
  824. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  825. else
  826. styler.ColourTo(i, SCE_H_QUESTION);
  827. state = beforePreProc;
  828. if (inScriptType == eNonHtmlScriptPreProc)
  829. inScriptType = eNonHtmlScript;
  830. else
  831. inScriptType = eHtml;
  832. // Unfold all scripting languages, except for XML tag
  833. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  834. levelCurrent--;
  835. }
  836. scriptLanguage = eScriptNone;
  837. continue;
  838. }
  839. /////////////////////////////////////
  840. switch (state) {
  841. case SCE_H_DEFAULT:
  842. if (ch == '<') {
  843. // in HTML, fold on tag open and unfold on tag close
  844. tagOpened = true;
  845. tagClosing = (chNext == '/');
  846. styler.ColourTo(i - 1, StateToPrint);
  847. if (chNext != '!')
  848. state = SCE_H_TAGUNKNOWN;
  849. } else if (ch == '&') {
  850. styler.ColourTo(i - 1, SCE_H_DEFAULT);
  851. state = SCE_H_ENTITY;
  852. }
  853. break;
  854. case SCE_H_SGML_DEFAULT:
  855. case SCE_H_SGML_BLOCK_DEFAULT:
  856. // if (scriptLanguage == eScriptSGMLblock)
  857. // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
  858. if (ch == '\"') {
  859. styler.ColourTo(i - 1, StateToPrint);
  860. state = SCE_H_SGML_DOUBLESTRING;
  861. } else if (ch == '\'') {
  862. styler.ColourTo(i - 1, StateToPrint);
  863. state = SCE_H_SGML_SIMPLESTRING;
  864. } else if ((ch == '-') && (chPrev == '-')) {
  865. if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
  866. styler.ColourTo(i - 2, StateToPrint);
  867. }
  868. state = SCE_H_SGML_COMMENT;
  869. } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
  870. styler.ColourTo(i - 2, StateToPrint);
  871. state = SCE_H_SGML_ENTITY;
  872. } else if (ch == '#') {
  873. styler.ColourTo(i - 1, StateToPrint);
  874. state = SCE_H_SGML_SPECIAL;
  875. } else if (ch == '[') {
  876. styler.ColourTo(i - 1, StateToPrint);
  877. scriptLanguage = eScriptSGMLblock;
  878. state = SCE_H_SGML_BLOCK_DEFAULT;
  879. } else if (ch == ']') {
  880. if (scriptLanguage == eScriptSGMLblock) {
  881. styler.ColourTo(i, StateToPrint);
  882. scriptLanguage = eScriptSGML;
  883. } else {
  884. styler.ColourTo(i - 1, StateToPrint);
  885. styler.ColourTo(i, SCE_H_SGML_ERROR);
  886. }
  887. state = SCE_H_SGML_DEFAULT;
  888. } else if (scriptLanguage == eScriptSGMLblock) {
  889. if ((ch == '!') && (chPrev == '<')) {
  890. styler.ColourTo(i - 2, StateToPrint);
  891. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  892. state = SCE_H_SGML_COMMAND;
  893. } else if (ch == '>') {
  894. styler.ColourTo(i - 1, StateToPrint);
  895. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  896. }
  897. }
  898. break;
  899. case SCE_H_SGML_COMMAND:
  900. if ((ch == '-') && (chPrev == '-')) {
  901. styler.ColourTo(i - 2, StateToPrint);
  902. state = SCE_H_SGML_COMMENT;
  903. } else if (!issgmlwordchar(ch)) {
  904. if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
  905. styler.ColourTo(i - 1, StateToPrint);
  906. state = SCE_H_SGML_1ST_PARAM;
  907. } else {
  908. state = SCE_H_SGML_ERROR;
  909. }
  910. }
  911. break;
  912. case SCE_H_SGML_1ST_PARAM:
  913. // wait for the beginning of the word
  914. if ((ch == '-') && (chPrev == '-')) {
  915. if (scriptLanguage == eScriptSGMLblock) {
  916. styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
  917. } else {
  918. styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
  919. }
  920. state = SCE_H_SGML_1ST_PARAM_COMMENT;
  921. } else if (issgmlwordchar(ch)) {
  922. if (scriptLanguage == eScriptSGMLblock) {
  923. styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
  924. } else {
  925. styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
  926. }
  927. // find the length of the word
  928. int size = 1;
  929. while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
  930. size++;
  931. styler.ColourTo(i + size - 1, StateToPrint);
  932. i += size - 1;
  933. visibleChars += size - 1;
  934. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  935. if (scriptLanguage == eScriptSGMLblock) {
  936. state = SCE_H_SGML_BLOCK_DEFAULT;
  937. } else {
  938. state = SCE_H_SGML_DEFAULT;
  939. }
  940. continue;
  941. }
  942. break;
  943. case SCE_H_SGML_ERROR:
  944. if ((ch == '-') && (chPrev == '-')) {
  945. styler.ColourTo(i - 2, StateToPrint);
  946. state = SCE_H_SGML_COMMENT;
  947. }
  948. case SCE_H_SGML_DOUBLESTRING:
  949. if (ch == '\"') {
  950. styler.ColourTo(i, StateToPrint);
  951. state = SCE_H_SGML_DEFAULT;
  952. }
  953. break;
  954. case SCE_H_SGML_SIMPLESTRING:
  955. if (ch == '\'') {
  956. styler.ColourTo(i, StateToPrint);
  957. state = SCE_H_SGML_DEFAULT;
  958. }
  959. break;
  960. case SCE_H_SGML_COMMENT:
  961. if ((ch == '-') && (chPrev == '-')) {
  962. styler.ColourTo(i, StateToPrint);
  963. state = SCE_H_SGML_DEFAULT;
  964. }
  965. break;
  966. case SCE_H_CDATA:
  967. if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
  968. styler.ColourTo(i, StateToPrint);
  969. state = SCE_H_DEFAULT;
  970. levelCurrent--;
  971. }
  972. break;
  973. case SCE_H_COMMENT:
  974. if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
  975. styler.ColourTo(i, StateToPrint);
  976. state = SCE_H_DEFAULT;
  977. levelCurrent--;
  978. }
  979. break;
  980. case SCE_H_SGML_1ST_PARAM_COMMENT:
  981. if ((ch == '-') && (chPrev == '-')) {
  982. styler.ColourTo(i, SCE_H_SGML_COMMENT);
  983. state = SCE_H_SGML_1ST_PARAM;
  984. }
  985. break;
  986. case SCE_H_SGML_SPECIAL:
  987. if (!(isascii(ch) && isupper(ch))) {
  988. styler.ColourTo(i - 1, StateToPrint);
  989. if (isalnum(ch)) {
  990. state = SCE_H_SGML_ERROR;
  991. } else {
  992. state = SCE_H_SGML_DEFAULT;
  993. }
  994. }
  995. break;
  996. case SCE_H_SGML_ENTITY:
  997. if (ch == ';') {
  998. styler.ColourTo(i, StateToPrint);
  999. state = SCE_H_SGML_DEFAULT;
  1000. } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
  1001. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1002. state = SCE_H_SGML_DEFAULT;
  1003. }
  1004. break;
  1005. case SCE_H_ENTITY:
  1006. if (ch == ';') {
  1007. styler.ColourTo(i, StateToPrint);
  1008. state = SCE_H_DEFAULT;
  1009. }
  1010. if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
  1011. && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
  1012. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  1013. state = SCE_H_DEFAULT;
  1014. }
  1015. break;
  1016. case SCE_H_TAGUNKNOWN:
  1017. if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
  1018. int eClass = classifyTagHTML(styler.GetStartSegment(),
  1019. i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
  1020. if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
  1021. if (!tagClosing) {
  1022. inScriptType = eNonHtmlScript;
  1023. scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
  1024. } else {
  1025. scriptLanguage = eScriptNone;
  1026. }
  1027. eClass = SCE_H_TAG;
  1028. }
  1029. if (ch == '>') {
  1030. styler.ColourTo(i, eClass);
  1031. if (inScriptType == eNonHtmlScript) {
  1032. state = StateForScript(scriptLanguage);
  1033. } else {
  1034. state = SCE_H_DEFAULT;
  1035. }
  1036. tagOpened = false;
  1037. if (!tagDontFold) {
  1038. if (tagClosing) {
  1039. levelCurrent--;
  1040. } else {
  1041. levelCurrent++;
  1042. }
  1043. }
  1044. tagClosing = false;
  1045. } else if (ch == '/' && chNext == '>') {
  1046. if (eClass == SCE_H_TAGUNKNOWN) {
  1047. styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
  1048. } else {
  1049. styler.ColourTo(i - 1, StateToPrint);
  1050. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1051. }
  1052. i++;
  1053. ch = chNext;
  1054. state = SCE_H_DEFAULT;
  1055. tagOpened = false;
  1056. } else {
  1057. if (eClass != SCE_H_TAGUNKNOWN) {
  1058. if (eClass == SCE_H_SGML_DEFAULT) {
  1059. state = SCE_H_SGML_DEFAULT;
  1060. } else {
  1061. state = SCE_H_OTHER;
  1062. }
  1063. }
  1064. }
  1065. }
  1066. break;
  1067. case SCE_H_ATTRIBUTE:
  1068. if (!setAttributeContinue.Contains(ch)) {
  1069. if (inScriptType == eNonHtmlScript) {
  1070. int scriptLanguagePrev = scriptLanguage;
  1071. clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
  1072. scriptLanguage = clientScript;
  1073. if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
  1074. inScriptType = eHtml;
  1075. }
  1076. classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
  1077. if (ch == '>') {
  1078. styler.ColourTo(i, SCE_H_TAG);
  1079. if (inScriptType == eNonHtmlScript) {
  1080. state = StateForScript(scriptLanguage);
  1081. } else {
  1082. state = SCE_H_DEFAULT;
  1083. }
  1084. tagOpened = false;
  1085. if (!tagDontFold) {
  1086. if (tagClosing) {
  1087. levelCurrent--;
  1088. } else {
  1089. levelCurrent++;
  1090. }
  1091. }
  1092. tagClosing = false;
  1093. } else if (ch == '=') {
  1094. styler.ColourTo(i, SCE_H_OTHER);
  1095. state = SCE_H_VALUE;
  1096. } else {
  1097. state = SCE_H_OTHER;
  1098. }
  1099. }
  1100. break;
  1101. case SCE_H_OTHER:
  1102. if (ch == '>') {
  1103. styler.ColourTo(i - 1, StateToPrint);
  1104. styler.ColourTo(i, SCE_H_TAG);
  1105. if (inScriptType == eNonHtmlScript) {
  1106. state = StateForScript(scriptLanguage);
  1107. } else {
  1108. state = SCE_H_DEFAULT;
  1109. }
  1110. tagOpened = false;
  1111. if (!tagDontFold) {
  1112. if (tagClosing) {
  1113. levelCurrent--;
  1114. } else {
  1115. levelCurrent++;
  1116. }
  1117. }
  1118. tagClosing = false;
  1119. } else if (ch == '\"') {
  1120. styler.ColourTo(i - 1, StateToPrint);
  1121. state = SCE_H_DOUBLESTRING;
  1122. } else if (ch == '\'') {
  1123. styler.ColourTo(i - 1, StateToPrint);
  1124. state = SCE_H_SINGLESTRING;
  1125. } else if (ch == '=') {
  1126. styler.ColourTo(i, StateToPrint);
  1127. state = SCE_H_VALUE;
  1128. } else if (ch == '/' && chNext == '>') {
  1129. styler.ColourTo(i - 1, StateToPrint);
  1130. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1131. i++;
  1132. ch = chNext;
  1133. state = SCE_H_DEFAULT;
  1134. tagOpened = false;
  1135. } else if (ch == '?' && chNext == '>') {
  1136. styler.ColourTo(i - 1, StateToPrint);
  1137. styler.ColourTo(i + 1, SCE_H_XMLEND);
  1138. i++;
  1139. ch = chNext;
  1140. state = SCE_H_DEFAULT;
  1141. } else if (setHTMLWord.Contains(ch)) {
  1142. styler.ColourTo(i - 1, StateToPrint);
  1143. state = SCE_H_ATTRIBUTE;
  1144. }
  1145. break;
  1146. case SCE_H_DOUBLESTRING:
  1147. if (ch == '\"') {
  1148. if (inScriptType == eNonHtmlScript) {
  1149. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1150. }
  1151. styler.ColourTo(i, SCE_H_DOUBLESTRING);
  1152. state = SCE_H_OTHER;
  1153. }
  1154. break;
  1155. case SCE_H_SINGLESTRING:
  1156. if (ch == '\'') {
  1157. if (inScriptType == eNonHtmlScript) {
  1158. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1159. }
  1160. styler.ColourTo(i, SCE_H_SINGLESTRING);
  1161. state = SCE_H_OTHER;
  1162. }
  1163. break;
  1164. case SCE_H_VALUE:
  1165. if (!setHTMLWord.Contains(ch)) {
  1166. if (ch == '\"' && chPrev == '=') {
  1167. // Should really test for being first character
  1168. state = SCE_H_DOUBLESTRING;
  1169. } else if (ch == '\'' && chPrev == '=') {
  1170. state = SCE_H_SINGLESTRING;
  1171. } else {
  1172. if (IsNumber(styler.GetStartSegment(), styler)) {
  1173. styler.ColourTo(i - 1, SCE_H_NUMBER);
  1174. } else {
  1175. styler.ColourTo(i - 1, StateToPrint);
  1176. }
  1177. if (ch == '>') {
  1178. styler.ColourTo(i, SCE_H_TAG);
  1179. if (inScriptType == eNonHtmlScript) {
  1180. state = StateForScript(scriptLanguage);
  1181. } else {
  1182. state = SCE_H_DEFAULT;
  1183. }
  1184. tagOpened = false;
  1185. if (!tagDontFold) {
  1186. if (tagClosing) {
  1187. levelCurrent--;
  1188. } else {
  1189. levelCurrent++;
  1190. }
  1191. }
  1192. tagClosing = false;
  1193. } else {
  1194. state = SCE_H_OTHER;
  1195. }
  1196. }
  1197. }
  1198. break;
  1199. case SCE_HJ_DEFAULT:
  1200. case SCE_HJ_START:
  1201. case SCE_HJ_SYMBOLS:
  1202. if (IsAWordStart(ch)) {
  1203. styler.ColourTo(i - 1, StateToPrint);
  1204. state = SCE_HJ_WORD;
  1205. } else if (ch == '/' && chNext == '*') {
  1206. styler.ColourTo(i - 1, StateToPrint);
  1207. if (chNext2 == '*')
  1208. state = SCE_HJ_COMMENTDOC;
  1209. else
  1210. state = SCE_HJ_COMMENT;
  1211. } else if (ch == '/' && chNext == '/') {
  1212. styler.ColourTo(i - 1, StateToPrint);
  1213. state = SCE_HJ_COMMENTLINE;
  1214. } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
  1215. styler.ColourTo(i - 1, StateToPrint);
  1216. state = SCE_HJ_REGEX;
  1217. } else if (ch == '\"') {
  1218. styler.ColourTo(i - 1, StateToPrint);
  1219. state = SCE_HJ_DOUBLESTRING;
  1220. } else if (ch == '\'') {
  1221. styler.ColourTo(i - 1, StateToPrint);
  1222. state = SCE_HJ_SINGLESTRING;
  1223. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1224. styler.SafeGetCharAt(i + 3) == '-') {
  1225. styler.ColourTo(i - 1, StateToPrint);
  1226. state = SCE_HJ_COMMENTLINE;
  1227. } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1228. styler.ColourTo(i - 1, StateToPrint);
  1229. state = SCE_HJ_COMMENTLINE;
  1230. i += 2;
  1231. } else if (IsOperator(ch)) {
  1232. styler.ColourTo(i - 1, StateToPrint);
  1233. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1234. state = SCE_HJ_DEFAULT;
  1235. } else if ((ch == ' ') || (ch == '\t')) {
  1236. if (state == SCE_HJ_START) {
  1237. styler.ColourTo(i - 1, StateToPrint);
  1238. state = SCE_HJ_DEFAULT;
  1239. }
  1240. }
  1241. break;
  1242. case SCE_HJ_WORD:
  1243. if (!IsAWordChar(ch)) {
  1244. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  1245. //styler.ColourTo(i - 1, eHTJSKeyword);
  1246. state = SCE_HJ_DEFAULT;
  1247. if (ch == '/' && chNext == '*') {
  1248. if (chNext2 == '*')
  1249. state = SCE_HJ_COMMENTDOC;
  1250. else
  1251. state = SCE_HJ_COMMENT;
  1252. } else if (ch == '/' && chNext == '/') {
  1253. state = SCE_HJ_COMMENTLINE;
  1254. } else if (ch == '\"') {
  1255. state = SCE_HJ_DOUBLESTRING;
  1256. } else if (ch == '\'') {
  1257. state = SCE_HJ_SINGLESTRING;
  1258. } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1259. styler.ColourTo(i - 1, StateToPrint);
  1260. state = SCE_HJ_COMMENTLINE;
  1261. i += 2;
  1262. } else if (IsOperator(ch)) {
  1263. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1264. state = SCE_HJ_DEFAULT;
  1265. }
  1266. }
  1267. break;
  1268. case SCE_HJ_COMMENT:
  1269. case SCE_HJ_COMMENTDOC:
  1270. if (ch == '/' && chPrev == '*') {
  1271. styler.ColourTo(i, StateToPrint);
  1272. state = SCE_HJ_DEFAULT;
  1273. ch = ' ';
  1274. }
  1275. break;
  1276. case SCE_HJ_COMMENTLINE:
  1277. if (ch == '\r' || ch == '\n') {
  1278. styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
  1279. state = SCE_HJ_DEFAULT;
  1280. ch = ' ';
  1281. }
  1282. break;
  1283. case SCE_HJ_DOUBLESTRING:
  1284. if (ch == '\\') {
  1285. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1286. i++;
  1287. }
  1288. } else if (ch == '\"') {
  1289. styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
  1290. state = SCE_HJ_DEFAULT;
  1291. } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1292. styler.ColourTo(i - 1, StateToPrint);
  1293. state = SCE_HJ_COMMENTLINE;
  1294. i += 2;
  1295. } else if (isLineEnd(ch)) {
  1296. styler.ColourTo(i - 1, StateToPrint);
  1297. state = SCE_HJ_STRINGEOL;
  1298. }
  1299. break;
  1300. case SCE_HJ_SINGLESTRING:
  1301. if (ch == '\\') {
  1302. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1303. i++;
  1304. }
  1305. } else if (ch == '\'') {
  1306. styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
  1307. state = SCE_HJ_DEFAULT;
  1308. } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1309. styler.ColourTo(i - 1, StateToPrint);
  1310. state = SCE_HJ_COMMENTLINE;
  1311. i += 2;
  1312. } else if (isLineEnd(ch)) {
  1313. styler.ColourTo(i - 1, StateToPrint);
  1314. state = SCE_HJ_STRINGEOL;
  1315. }
  1316. break;
  1317. case SCE_HJ_STRINGEOL:
  1318. if (!isLineEnd(ch)) {
  1319. styler.ColourTo(i - 1, StateToPrint);
  1320. state = SCE_HJ_DEFAULT;
  1321. } else if (!isLineEnd(chNext)) {
  1322. styler.ColourTo(i, StateToPrint);
  1323. state = SCE_HJ_DEFAULT;
  1324. }
  1325. break;
  1326. case SCE_HJ_REGEX:
  1327. if (ch == '\r' || ch == '\n' || ch == '/') {
  1328. if (ch == '/') {
  1329. while (isascii(chNext) && islower(chNext)) { // gobble regex flags
  1330. i++;
  1331. ch = chNext;
  1332. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1333. }
  1334. }
  1335. styler.ColourTo(i, StateToPrint);
  1336. state = SCE_HJ_DEFAULT;
  1337. } else if (ch == '\\') {
  1338. // Gobble up the quoted character
  1339. if (chNext == '\\' || chNext == '/') {
  1340. i++;
  1341. ch = chNext;
  1342. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1343. }
  1344. }
  1345. break;
  1346. case SCE_HB_DEFAULT:
  1347. case SCE_HB_START:
  1348. if (IsAWordStart(ch)) {
  1349. styler.ColourTo(i - 1, StateToPrint);
  1350. state = SCE_HB_WORD;
  1351. } else if (ch == '\'') {
  1352. styler.ColourTo(i - 1, StateToPrint);
  1353. state = SCE_HB_COMMENTLINE;
  1354. } else if (ch == '\"') {
  1355. styler.ColourTo(i - 1, StateToPrint);
  1356. state = SCE_HB_STRING;
  1357. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1358. styler.SafeGetCharAt(i + 3) == '-') {
  1359. styler.ColourTo(i - 1, StateToPrint);
  1360. state = SCE_HB_COMMENTLINE;
  1361. } else if (IsOperator(ch)) {
  1362. styler.ColourTo(i - 1, StateToPrint);
  1363. styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
  1364. state = SCE_HB_DEFAULT;
  1365. } else if ((ch == ' ') || (ch == '\t')) {
  1366. if (state == SCE_HB_START) {
  1367. styler.ColourTo(i - 1, StateToPrint);
  1368. state = SCE_HB_DEFAULT;
  1369. }
  1370. }
  1371. break;
  1372. case SCE_HB_WORD:
  1373. if (!IsAWordChar(ch)) {
  1374. state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  1375. if (state == SCE_HB_DEFAULT) {
  1376. if (ch == '\"') {
  1377. state = SCE_HB_STRING;
  1378. } else if (ch == '\'') {
  1379. state = SCE_HB_COMMENTLINE;
  1380. } else if (IsOperator(ch)) {
  1381. styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
  1382. state = SCE_HB_DEFAULT;
  1383. }
  1384. }
  1385. }
  1386. break;
  1387. case SCE_HB_STRING:
  1388. if (ch == '\"') {
  1389. styler.ColourTo(i, StateToPrint);
  1390. state = SCE_HB_DEFAULT;
  1391. } else if (ch == '\r' || ch == '\n') {
  1392. styler.ColourTo(i - 1, StateToPrint);
  1393. state = SCE_HB_STRINGEOL;
  1394. }
  1395. break;
  1396. case SCE_HB_COMMENTLINE:
  1397. if (ch == '\r' || ch == '\n') {
  1398. styler.ColourTo(i - 1, StateToPrint);
  1399. state = SCE_HB_DEFAULT;
  1400. }
  1401. break;
  1402. case SCE_HB_STRINGEOL:
  1403. if (!isLineEnd(ch)) {
  1404. styler.ColourTo(i - 1, StateToPrint);
  1405. state = SCE_HB_DEFAULT;
  1406. } else if (!isLineEnd(chNext)) {
  1407. styler.ColourTo(i, StateToPrint);
  1408. state = SCE_HB_DEFAULT;
  1409. }
  1410. break;
  1411. case SCE_HP_DEFAULT:
  1412. case SCE_HP_START:
  1413. if (IsAWordStart(ch)) {
  1414. styler.ColourTo(i - 1, StateToPrint);
  1415. state = SCE_HP_WORD;
  1416. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1417. styler.SafeGetCharAt(i + 3) == '-') {
  1418. styler.ColourTo(i - 1, StateToPrint);
  1419. state = SCE_HP_COMMENTLINE;
  1420. } else if (ch == '#') {
  1421. styler.ColourTo(i - 1, StateToPrint);
  1422. state = SCE_HP_COMMENTLINE;
  1423. } else if (ch == '\"') {
  1424. styler.ColourTo(i - 1, StateToPrint);
  1425. if (chNext == '\"' && chNext2 == '\"') {
  1426. i += 2;
  1427. state = SCE_HP_TRIPLEDOUBLE;
  1428. ch = ' ';
  1429. chPrev = ' ';
  1430. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1431. } else {
  1432. // state = statePrintForState(SCE_HP_STRING,inScriptType);
  1433. state = SCE_HP_STRING;
  1434. }
  1435. } else if (ch == '\'') {
  1436. styler.ColourTo(i - 1, StateToPrint);
  1437. if (chNext == '\'' && chNext2 == '\'') {
  1438. i += 2;
  1439. state = SCE_HP_TRIPLE;
  1440. ch = ' ';
  1441. chPrev = ' ';
  1442. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1443. } else {
  1444. state = SCE_HP_CHARACTER;
  1445. }
  1446. } else if (IsOperator(ch)) {
  1447. styler.ColourTo(i - 1, StateToPrint);
  1448. styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
  1449. } else if ((ch == ' ') || (ch == '\t')) {
  1450. if (state == SCE_HP_START) {
  1451. styler.ColourTo(i - 1, StateToPrint);
  1452. state = SCE_HP_DEFAULT;
  1453. }
  1454. }
  1455. break;
  1456. case SCE_HP_WORD:
  1457. if (!IsAWordChar(ch)) {
  1458. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  1459. state = SCE_HP_DEFAULT;
  1460. if (ch == '#') {
  1461. state = SCE_HP_COMMENTLINE;
  1462. } else if (ch == '\"') {
  1463. if (chNext == '\"' && chNext2 == '\"') {
  1464. i += 2;
  1465. state = SCE_HP_TRIPLEDOUBLE;
  1466. ch = ' ';
  1467. chPrev = ' ';
  1468. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1469. } else {
  1470. state = SCE_HP_STRING;
  1471. }
  1472. } else if (ch == '\'') {
  1473. if (chNext == '\'' && chNext2 == '\'') {
  1474. i += 2;
  1475. state = SCE_HP_TRIPLE;
  1476. ch = ' ';
  1477. chPrev = ' ';
  1478. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1479. } else {
  1480. state = SCE_HP_CHARACTER;
  1481. }
  1482. } else if (IsOperator(ch)) {
  1483. styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
  1484. }
  1485. }
  1486. break;
  1487. case SCE_HP_COMMENTLINE:
  1488. if (ch == '\r' || ch == '\n') {
  1489. styler.ColourTo(i - 1, StateToPrint);
  1490. state = SCE_HP_DEFAULT;
  1491. }
  1492. break;
  1493. case SCE_HP_STRING:
  1494. if (ch == '\\') {
  1495. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1496. i++;
  1497. ch = chNext;
  1498. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1499. }
  1500. } else if (ch == '\"') {
  1501. styler.ColourTo(i, StateToPrint);
  1502. state = SCE_HP_DEFAULT;
  1503. }
  1504. break;
  1505. case SCE_HP_CHARACTER:
  1506. if (ch == '\\') {
  1507. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1508. i++;
  1509. ch = chNext;
  1510. chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  1511. }
  1512. } else if (ch == '\'') {
  1513. styler.ColourTo(i, StateToPrint);
  1514. state = SCE_HP_DEFAULT;
  1515. }
  1516. break;
  1517. case SCE_HP_TRIPLE:
  1518. if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
  1519. styler.ColourTo(i, StateToPrint);
  1520. state = SCE_HP_DEFAULT;
  1521. }
  1522. break;
  1523. case SCE_HP_TRIPLEDOUBLE:
  1524. if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
  1525. styler.ColourTo(i, StateToPrint);
  1526. state = SCE_HP_DEFAULT;

Large files files are truncated, but you can click here to view the full file