PageRenderTime 63ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/wxWidgets-2.9.1/src/stc/scintilla/src/LexHTML.cxx

http://gamekit.googlecode.com/
C++ | 2059 lines | 1842 code | 112 blank | 105 comment | 1348 complexity | 4b090a81249418ee317489fc81b82ed8 MD5 | raw file
Possible License(s): BSD-2-Clause, LGPL-2.0, AGPL-3.0, BSD-3-Clause, GPL-2.0, LGPL-3.0, MPL-2.0-no-copyleft-exception, LGPL-2.1, MIT

Large files files are truncated, but you can click here to view the full file

  1. // Scintilla source code edit control
  2. /** @file LexHTML.cxx
  3. ** Lexer for HTML.
  4. **/
  5. // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <ctype.h>
  10. #include <stdio.h>
  11. #include <stdarg.h>
  12. #include "Platform.h"
  13. #include "PropSet.h"
  14. #include "Accessor.h"
  15. #include "StyleContext.h"
  16. #include "KeyWords.h"
  17. #include "Scintilla.h"
  18. #include "SciLexer.h"
  19. #include "CharacterSet.h"
  20. #ifdef SCI_NAMESPACE
  21. using namespace Scintilla;
  22. #endif
  23. #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  24. #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  25. #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  26. enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  27. enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  28. static inline bool IsAWordChar(const int ch) {
  29. return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  30. }
  31. static inline bool IsAWordStart(const int ch) {
  32. return (ch < 0x80) && (isalnum(ch) || ch == '_');
  33. }
  34. inline bool IsOperator(int ch) {
  35. if (isascii(ch) && isalnum(ch))
  36. return false;
  37. // '.' left out as it is used to make up numbers
  38. if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  39. ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  40. ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  41. ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  42. ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  43. ch == '?' || ch == '!' || ch == '.' || ch == '~')
  44. return true;
  45. return false;
  46. }
  47. static inline int MakeLowerCase(int ch) {
  48. if (ch < 'A' || ch > 'Z')
  49. return ch;
  50. else
  51. return ch - 'A' + 'a';
  52. }
  53. static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  54. size_t i = 0;
  55. for (; (i < end - start + 1) && (i < len-1); i++) {
  56. s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  57. }
  58. s[i] = '\0';
  59. }
  60. static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  61. size_t i = 0;
  62. for (; i < sLen-1; i++) {
  63. char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  64. if ((i == 0) && !IsAWordStart(ch))
  65. break;
  66. if ((i > 0) && !IsAWordChar(ch))
  67. break;
  68. s[i] = ch;
  69. }
  70. s[i] = '\0';
  71. return s;
  72. }
  73. static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  74. char s[100];
  75. GetTextSegment(styler, start, end, s, sizeof(s));
  76. //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  77. if (strstr(s, "src")) // External script
  78. return eScriptNone;
  79. if (strstr(s, "vbs"))
  80. return eScriptVBS;
  81. if (strstr(s, "pyth"))
  82. return eScriptPython;
  83. if (strstr(s, "javas"))
  84. return eScriptJS;
  85. if (strstr(s, "jscr"))
  86. return eScriptJS;
  87. if (strstr(s, "php"))
  88. return eScriptPHP;
  89. if (strstr(s, "xml")) {
  90. const char *xml = strstr(s, "xml");
  91. for (const char *t=s; t<xml; t++) {
  92. if (!IsASpace(*t)) {
  93. return prevValue;
  94. }
  95. }
  96. return eScriptXML;
  97. }
  98. return prevValue;
  99. }
  100. static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
  101. int iResult = 0;
  102. char s[100];
  103. GetTextSegment(styler, start, end, s, sizeof(s));
  104. if (0 == strncmp(s, "php", 3)) {
  105. iResult = 3;
  106. }
  107. return iResult;
  108. }
  109. static script_type ScriptOfState(int state) {
  110. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  111. return eScriptPython;
  112. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  113. return eScriptVBS;
  114. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  115. return eScriptJS;
  116. } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
  117. return eScriptPHP;
  118. } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
  119. return eScriptSGML;
  120. } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
  121. return eScriptSGMLblock;
  122. } else {
  123. return eScriptNone;
  124. }
  125. }
  126. static int statePrintForState(int state, script_mode inScriptType) {
  127. int StateToPrint = state;
  128. if (state >= SCE_HJ_START) {
  129. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  130. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
  131. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  132. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
  133. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  134. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
  135. }
  136. }
  137. return StateToPrint;
  138. }
  139. static int stateForPrintState(int StateToPrint) {
  140. int state;
  141. if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
  142. state = StateToPrint - SCE_HA_PYTHON;
  143. } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
  144. state = StateToPrint - SCE_HA_VBS;
  145. } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
  146. state = StateToPrint - SCE_HA_JS;
  147. } else {
  148. state = StateToPrint;
  149. }
  150. return state;
  151. }
  152. static inline bool IsNumber(unsigned int start, Accessor &styler) {
  153. return IsADigit(styler[start]) || (styler[start] == '.') ||
  154. (styler[start] == '-') || (styler[start] == '#');
  155. }
  156. static inline bool isStringState(int state) {
  157. bool bResult;
  158. switch (state) {
  159. case SCE_HJ_DOUBLESTRING:
  160. case SCE_HJ_SINGLESTRING:
  161. case SCE_HJA_DOUBLESTRING:
  162. case SCE_HJA_SINGLESTRING:
  163. case SCE_HB_STRING:
  164. case SCE_HBA_STRING:
  165. case SCE_HP_STRING:
  166. case SCE_HP_CHARACTER:
  167. case SCE_HP_TRIPLE:
  168. case SCE_HP_TRIPLEDOUBLE:
  169. case SCE_HPA_STRING:
  170. case SCE_HPA_CHARACTER:
  171. case SCE_HPA_TRIPLE:
  172. case SCE_HPA_TRIPLEDOUBLE:
  173. case SCE_HPHP_HSTRING:
  174. case SCE_HPHP_SIMPLESTRING:
  175. case SCE_HPHP_HSTRING_VARIABLE:
  176. case SCE_HPHP_COMPLEX_VARIABLE:
  177. bResult = true;
  178. break;
  179. default :
  180. bResult = false;
  181. break;
  182. }
  183. return bResult;
  184. }
  185. static inline bool stateAllowsTermination(int state) {
  186. bool allowTermination = !isStringState(state);
  187. if (allowTermination) {
  188. switch (state) {
  189. case SCE_HB_COMMENTLINE:
  190. case SCE_HPHP_COMMENT:
  191. case SCE_HP_COMMENTLINE:
  192. case SCE_HPA_COMMENTLINE:
  193. allowTermination = false;
  194. }
  195. }
  196. return allowTermination;
  197. }
  198. // not really well done, since it's only comments that should lex the %> and <%
  199. static inline bool isCommentASPState(int state) {
  200. bool bResult;
  201. switch (state) {
  202. case SCE_HJ_COMMENT:
  203. case SCE_HJ_COMMENTLINE:
  204. case SCE_HJ_COMMENTDOC:
  205. case SCE_HB_COMMENTLINE:
  206. case SCE_HP_COMMENTLINE:
  207. case SCE_HPHP_COMMENT:
  208. case SCE_HPHP_COMMENTLINE:
  209. bResult = true;
  210. break;
  211. default :
  212. bResult = false;
  213. break;
  214. }
  215. return bResult;
  216. }
  217. static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  218. bool wordIsNumber = IsNumber(start, styler);
  219. char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
  220. if (wordIsNumber) {
  221. chAttr = SCE_H_NUMBER;
  222. } else {
  223. char s[100];
  224. GetTextSegment(styler, start, end, s, sizeof(s));
  225. if (keywords.InList(s))
  226. chAttr = SCE_H_ATTRIBUTE;
  227. }
  228. if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
  229. // No keywords -> all are known
  230. chAttr = SCE_H_ATTRIBUTE;
  231. styler.ColourTo(end, chAttr);
  232. }
  233. static int classifyTagHTML(unsigned int start, unsigned int end,
  234. WordList &keywords, Accessor &styler, bool &tagDontFold,
  235. bool caseSensitive, bool isXml, bool allowScripts) {
  236. char s[30 + 2];
  237. // Copy after the '<'
  238. unsigned int i = 0;
  239. for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
  240. char ch = styler[cPos];
  241. if ((ch != '<') && (ch != '/')) {
  242. s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
  243. }
  244. }
  245. //The following is only a quick hack, to see if this whole thing would work
  246. //we first need the tagname with a trailing space...
  247. s[i] = ' ';
  248. s[i+1] = '\0';
  249. // if the current language is XML, I can fold any tag
  250. // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
  251. //...to find it in the list of no-container-tags
  252. tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
  253. //now we can remove the trailing space
  254. s[i] = '\0';
  255. // No keywords -> all are known
  256. char chAttr = SCE_H_TAGUNKNOWN;
  257. if (s[0] == '!') {
  258. chAttr = SCE_H_SGML_DEFAULT;
  259. } else if (!keywords || keywords.InList(s)) {
  260. chAttr = SCE_H_TAG;
  261. }
  262. styler.ColourTo(end, chAttr);
  263. if (chAttr == SCE_H_TAG) {
  264. if (allowScripts && 0 == strcmp(s, "script")) {
  265. // check to see if this is a self-closing tag by sniffing ahead
  266. bool isSelfClose = false;
  267. for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
  268. char ch = styler.SafeGetCharAt(cPos, '\0');
  269. if (ch == '\0' || ch == '>')
  270. break;
  271. else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
  272. isSelfClose = true;
  273. break;
  274. }
  275. }
  276. // do not enter a script state if the tag self-closed
  277. if (!isSelfClose)
  278. chAttr = SCE_H_SCRIPT;
  279. } else if (!isXml && 0 == strcmp(s, "comment")) {
  280. chAttr = SCE_H_COMMENT;
  281. }
  282. }
  283. return chAttr;
  284. }
  285. static void classifyWordHTJS(unsigned int start, unsigned int end,
  286. WordList &keywords, Accessor &styler, script_mode inScriptType) {
  287. char chAttr = SCE_HJ_WORD;
  288. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  289. if (wordIsNumber)
  290. chAttr = SCE_HJ_NUMBER;
  291. else {
  292. char s[30 + 1];
  293. unsigned int i = 0;
  294. for (; i < end - start + 1 && i < 30; i++) {
  295. s[i] = styler[start + i];
  296. }
  297. s[i] = '\0';
  298. if (keywords.InList(s))
  299. chAttr = SCE_HJ_KEYWORD;
  300. }
  301. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  302. }
  303. static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
  304. char chAttr = SCE_HB_IDENTIFIER;
  305. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  306. if (wordIsNumber)
  307. chAttr = SCE_HB_NUMBER;
  308. else {
  309. char s[100];
  310. GetTextSegment(styler, start, end, s, sizeof(s));
  311. if (keywords.InList(s)) {
  312. chAttr = SCE_HB_WORD;
  313. if (strcmp(s, "rem") == 0)
  314. chAttr = SCE_HB_COMMENTLINE;
  315. }
  316. }
  317. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  318. if (chAttr == SCE_HB_COMMENTLINE)
  319. return SCE_HB_COMMENTLINE;
  320. else
  321. return SCE_HB_DEFAULT;
  322. }
  323. static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
  324. bool wordIsNumber = IsADigit(styler[start]);
  325. char s[30 + 1];
  326. unsigned int i = 0;
  327. for (; i < end - start + 1 && i < 30; i++) {
  328. s[i] = styler[start + i];
  329. }
  330. s[i] = '\0';
  331. char chAttr = SCE_HP_IDENTIFIER;
  332. if (0 == strcmp(prevWord, "class"))
  333. chAttr = SCE_HP_CLASSNAME;
  334. else if (0 == strcmp(prevWord, "def"))
  335. chAttr = SCE_HP_DEFNAME;
  336. else if (wordIsNumber)
  337. chAttr = SCE_HP_NUMBER;
  338. else if (keywords.InList(s))
  339. chAttr = SCE_HP_WORD;
  340. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  341. strcpy(prevWord, s);
  342. }
  343. // Update the word colour to default or keyword
  344. // Called when in a PHP word
  345. static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  346. char chAttr = SCE_HPHP_DEFAULT;
  347. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
  348. if (wordIsNumber)
  349. chAttr = SCE_HPHP_NUMBER;
  350. else {
  351. char s[100];
  352. GetTextSegment(styler, start, end, s, sizeof(s));
  353. if (keywords.InList(s))
  354. chAttr = SCE_HPHP_WORD;
  355. }
  356. styler.ColourTo(end, chAttr);
  357. }
  358. static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  359. char s[30 + 1];
  360. unsigned int i = 0;
  361. for (; i < end - start + 1 && i < 30; i++) {
  362. s[i] = styler[start + i];
  363. }
  364. s[i] = '\0';
  365. return keywords.InList(s);
  366. }
  367. static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
  368. char s[30 + 1];
  369. unsigned int i = 0;
  370. for (; i < end - start + 1 && i < 30; i++) {
  371. s[i] = styler[start + i];
  372. }
  373. s[i] = '\0';
  374. return (0 == strcmp(s, "[CDATA["));
  375. }
  376. // Return the first state to reach when entering a scripting language
  377. static int StateForScript(script_type scriptLanguage) {
  378. int Result;
  379. switch (scriptLanguage) {
  380. case eScriptVBS:
  381. Result = SCE_HB_START;
  382. break;
  383. case eScriptPython:
  384. Result = SCE_HP_START;
  385. break;
  386. case eScriptPHP:
  387. Result = SCE_HPHP_DEFAULT;
  388. break;
  389. case eScriptXML:
  390. Result = SCE_H_TAGUNKNOWN;
  391. break;
  392. case eScriptSGML:
  393. Result = SCE_H_SGML_DEFAULT;
  394. break;
  395. case eScriptComment:
  396. Result = SCE_H_COMMENT;
  397. break;
  398. default :
  399. Result = SCE_HJ_START;
  400. break;
  401. }
  402. return Result;
  403. }
  404. static inline bool ishtmlwordchar(int ch) {
  405. return !isascii(ch) ||
  406. (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
  407. }
  408. static inline bool issgmlwordchar(int ch) {
  409. return !isascii(ch) ||
  410. (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
  411. }
  412. static inline bool IsPhpWordStart(int ch) {
  413. return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
  414. }
  415. static inline bool IsPhpWordChar(int ch) {
  416. return IsADigit(ch) || IsPhpWordStart(ch);
  417. }
  418. static bool InTagState(int state) {
  419. return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
  420. state == SCE_H_SCRIPT ||
  421. state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
  422. state == SCE_H_NUMBER || state == SCE_H_OTHER ||
  423. state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
  424. }
  425. static bool IsCommentState(const int state) {
  426. return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
  427. }
  428. static bool IsScriptCommentState(const int state) {
  429. return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
  430. state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
  431. }
  432. static bool isLineEnd(int ch) {
  433. return ch == '\r' || ch == '\n';
  434. }
  435. static bool isOKBeforeRE(int ch) {
  436. return (ch == '(') || (ch == '=') || (ch == ',');
  437. }
  438. static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
  439. if (strlen(blockType) == 0) {
  440. return ((ch == '%') && (chNext == '>'));
  441. } else if ((0 == strcmp(blockType, "inherit")) ||
  442. (0 == strcmp(blockType, "namespace")) ||
  443. (0 == strcmp(blockType, "include")) ||
  444. (0 == strcmp(blockType, "page"))) {
  445. return ((ch == '/') && (chNext == '>'));
  446. } else if (0 == strcmp(blockType, "%")) {
  447. return isLineEnd(ch);
  448. } else if (0 == strcmp(blockType, "{")) {
  449. return ch == '}';
  450. } else {
  451. return (ch == '>');
  452. }
  453. }
  454. static bool isPHPStringState(int state) {
  455. return
  456. (state == SCE_HPHP_HSTRING) ||
  457. (state == SCE_HPHP_SIMPLESTRING) ||
  458. (state == SCE_HPHP_HSTRING_VARIABLE) ||
  459. (state == SCE_HPHP_COMPLEX_VARIABLE);
  460. }
  461. static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
  462. int j;
  463. const int beginning = i - 1;
  464. bool isValidSimpleString = false;
  465. while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
  466. i++;
  467. char ch = styler.SafeGetCharAt(i);
  468. const char chNext = styler.SafeGetCharAt(i + 1);
  469. if (!IsPhpWordStart(ch)) {
  470. if (ch == '\'' && IsPhpWordStart(chNext)) {
  471. i++;
  472. ch = chNext;
  473. isSimpleString = true;
  474. } else {
  475. phpStringDelimiter[0] = '\0';
  476. return beginning;
  477. }
  478. }
  479. phpStringDelimiter[0] = ch;
  480. i++;
  481. for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
  482. if (!IsPhpWordChar(styler[j])) {
  483. if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
  484. isValidSimpleString = true;
  485. j++;
  486. break;
  487. } else {
  488. phpStringDelimiter[0] = '\0';
  489. return beginning;
  490. }
  491. }
  492. if (j - i < phpStringDelimiterSize - 2)
  493. phpStringDelimiter[j-i+1] = styler[j];
  494. else
  495. i++;
  496. }
  497. if (isSimpleString && !isValidSimpleString) {
  498. phpStringDelimiter[0] = '\0';
  499. return beginning;
  500. }
  501. phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
  502. return j - 1;
  503. }
  504. static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
  505. Accessor &styler, bool isXml) {
  506. WordList &keywords = *keywordlists[0];
  507. WordList &keywords2 = *keywordlists[1];
  508. WordList &keywords3 = *keywordlists[2];
  509. WordList &keywords4 = *keywordlists[3];
  510. WordList &keywords5 = *keywordlists[4];
  511. WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
  512. // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
  513. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  514. char prevWord[200];
  515. prevWord[0] = '\0';
  516. char nextWord[200];
  517. nextWord[0] = '\0';
  518. char phpStringDelimiter[200]; // PHP is not limited in length, we are
  519. phpStringDelimiter[0] = '\0';
  520. int StateToPrint = initStyle;
  521. int state = stateForPrintState(StateToPrint);
  522. char makoBlockType[200];
  523. makoBlockType[0] = '\0';
  524. // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
  525. if (InTagState(state)) {
  526. while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
  527. startPos--;
  528. length++;
  529. }
  530. state = SCE_H_DEFAULT;
  531. }
  532. // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
  533. if (isPHPStringState(state)) {
  534. while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
  535. startPos--;
  536. length++;
  537. state = styler.StyleAt(startPos);
  538. }
  539. if (startPos == 0)
  540. state = SCE_H_DEFAULT;
  541. }
  542. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  543. int lineCurrent = styler.GetLine(startPos);
  544. int lineState;
  545. if (lineCurrent > 0) {
  546. lineState = styler.GetLineState(lineCurrent);
  547. } else {
  548. // Default client and ASP scripting language is JavaScript
  549. lineState = eScriptJS << 8;
  550. // property asp.default.language
  551. // Script in ASP code is initially assumed to be in JavaScript.
  552. // To change this to VBScript set asp.default.language to 2. Python is 3.
  553. lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
  554. }
  555. script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
  556. bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
  557. bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
  558. bool tagDontFold = false; //some HTML tags should not be folded
  559. script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
  560. script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
  561. int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
  562. script_type scriptLanguage = ScriptOfState(state);
  563. // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
  564. if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
  565. scriptLanguage = eScriptComment;
  566. }
  567. // property fold.html
  568. // Folding is turned on or off for HTML and XML files with this option.
  569. // The fold option must also be on for folding to occur.
  570. const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
  571. const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
  572. // property fold.html.preprocessor
  573. // Folding is turned on or off for scripts embedded in HTML files with this option.
  574. // The default is on.
  575. const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
  576. const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  577. // property fold.hypertext.comment
  578. // Allow folding for comments in scripts embedded in HTML.
  579. // The default is off.
  580. const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
  581. // property fold.hypertext.heredoc
  582. // Allow folding for heredocs in scripts embedded in HTML.
  583. // The default is off.
  584. const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
  585. // property html.tags.case.sensitive
  586. // For XML and HTML, setting this property to 1 will make tags match in a case
  587. // sensitive way which is the expected behaviour for XML and XHTML.
  588. const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
  589. // property lexer.xml.allow.scripts
  590. // Set to 0 to disable scripts in XML.
  591. const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
  592. // property lexer.html.mako
  593. // Set to 1 to enable the mako template language.
  594. const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
  595. const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
  596. const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
  597. const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
  598. int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
  599. int levelCurrent = levelPrev;
  600. int visibleChars = 0;
  601. int lineStartVisibleChars = 0;
  602. int chPrev = ' ';
  603. int ch = ' ';
  604. int chPrevNonWhite = ' ';
  605. // look back to set chPrevNonWhite properly for better regex colouring
  606. if (scriptLanguage == eScriptJS && startPos > 0) {
  607. int back = startPos;
  608. int style = 0;
  609. while (--back) {
  610. style = styler.StyleAt(back);
  611. if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
  612. // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
  613. break;
  614. }
  615. if (style == SCE_HJ_SYMBOLS) {
  616. chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
  617. }
  618. }
  619. styler.StartSegment(startPos);
  620. const int lengthDoc = startPos + length;
  621. for (int i = startPos; i < lengthDoc; i++) {
  622. const int chPrev2 = chPrev;
  623. chPrev = ch;
  624. if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
  625. state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
  626. chPrevNonWhite = ch;
  627. ch = static_cast<unsigned char>(styler[i]);
  628. int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  629. const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
  630. // Handle DBCS codepages
  631. if (styler.IsLeadByte(static_cast<char>(ch))) {
  632. chPrev = ' ';
  633. i += 1;
  634. continue;
  635. }
  636. if ((!IsASpace(ch) || !foldCompact) && fold)
  637. visibleChars++;
  638. if (!IsASpace(ch))
  639. lineStartVisibleChars++;
  640. // decide what is the current state to print (depending of the script tag)
  641. StateToPrint = statePrintForState(state, inScriptType);
  642. // handle script folding
  643. if (fold) {
  644. switch (scriptLanguage) {
  645. case eScriptJS:
  646. case eScriptPHP:
  647. //not currently supported case eScriptVBS:
  648. if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
  649. //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
  650. //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
  651. if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
  652. levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
  653. }
  654. } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
  655. levelCurrent--;
  656. }
  657. break;
  658. case eScriptPython:
  659. if (state != SCE_HP_COMMENTLINE) {
  660. if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
  661. levelCurrent++;
  662. } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
  663. // check if the number of tabs is lower than the level
  664. int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
  665. for (int j = 0; Findlevel > 0; j++) {
  666. char chTmp = styler.SafeGetCharAt(i + j + 1);
  667. if (chTmp == '\t') {
  668. Findlevel -= 8;
  669. } else if (chTmp == ' ') {
  670. Findlevel--;
  671. } else {
  672. break;
  673. }
  674. }
  675. if (Findlevel > 0) {
  676. levelCurrent -= Findlevel / 8;
  677. if (Findlevel % 8)
  678. levelCurrent--;
  679. }
  680. }
  681. }
  682. break;
  683. default:
  684. break;
  685. }
  686. }
  687. if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
  688. // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
  689. // Avoid triggering two times on Dos/Win
  690. // New line -> record any line state onto /next/ line
  691. if (fold) {
  692. int lev = levelPrev;
  693. if (visibleChars == 0)
  694. lev |= SC_FOLDLEVELWHITEFLAG;
  695. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  696. lev |= SC_FOLDLEVELHEADERFLAG;
  697. styler.SetLevel(lineCurrent, lev);
  698. visibleChars = 0;
  699. levelPrev = levelCurrent;
  700. }
  701. lineCurrent++;
  702. lineStartVisibleChars = 0;
  703. styler.SetLineState(lineCurrent,
  704. ((inScriptType & 0x03) << 0) |
  705. ((tagOpened & 0x01) << 2) |
  706. ((tagClosing & 0x01) << 3) |
  707. ((aspScript & 0x0F) << 4) |
  708. ((clientScript & 0x0F) << 8) |
  709. ((beforePreProc & 0xFF) << 12));
  710. }
  711. // Allow falling through to mako handling code if newline is going to end a block
  712. if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
  713. (!isMako || (0 != strcmp(makoBlockType, "%")))) {
  714. }
  715. // generic end of script processing
  716. else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
  717. // Check if it's the end of the script tag (or any other HTML tag)
  718. switch (state) {
  719. // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
  720. case SCE_H_DOUBLESTRING:
  721. case SCE_H_SINGLESTRING:
  722. case SCE_HJ_COMMENT:
  723. case SCE_HJ_COMMENTDOC:
  724. //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
  725. // the end of script marker from some JS interpreters.
  726. case SCE_HB_COMMENTLINE:
  727. case SCE_HBA_COMMENTLINE:
  728. case SCE_HJ_DOUBLESTRING:
  729. case SCE_HJ_SINGLESTRING:
  730. case SCE_HJ_REGEX:
  731. case SCE_HB_STRING:
  732. case SCE_HBA_STRING:
  733. case SCE_HP_STRING:
  734. case SCE_HP_TRIPLE:
  735. case SCE_HP_TRIPLEDOUBLE:
  736. case SCE_HPHP_HSTRING:
  737. case SCE_HPHP_SIMPLESTRING:
  738. case SCE_HPHP_COMMENT:
  739. case SCE_HPHP_COMMENTLINE:
  740. break;
  741. default :
  742. // check if the closing tag is a script tag
  743. if (const char *tag =
  744. state == SCE_HJ_COMMENTLINE || isXml ? "script" :
  745. state == SCE_H_COMMENT ? "comment" : 0) {
  746. int j = i + 2;
  747. int chr;
  748. do {
  749. chr = static_cast<int>(*tag++);
  750. } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
  751. if (chr != 0) break;
  752. }
  753. // closing tag of the script (it's a closing HTML tag anyway)
  754. styler.ColourTo(i - 1, StateToPrint);
  755. state = SCE_H_TAGUNKNOWN;
  756. inScriptType = eHtml;
  757. scriptLanguage = eScriptNone;
  758. clientScript = eScriptJS;
  759. i += 2;
  760. visibleChars += 2;
  761. tagClosing = true;
  762. continue;
  763. }
  764. }
  765. /////////////////////////////////////
  766. // handle the start of PHP pre-processor = Non-HTML
  767. else if ((state != SCE_H_ASPAT) &&
  768. !isPHPStringState(state) &&
  769. (state != SCE_HPHP_COMMENT) &&
  770. (ch == '<') &&
  771. (chNext == '?') &&
  772. !IsScriptCommentState(state) ) {
  773. scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
  774. if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
  775. styler.ColourTo(i - 1, StateToPrint);
  776. beforePreProc = state;
  777. i++;
  778. visibleChars++;
  779. i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
  780. if (scriptLanguage == eScriptXML)
  781. styler.ColourTo(i, SCE_H_XMLSTART);
  782. else
  783. styler.ColourTo(i, SCE_H_QUESTION);
  784. state = StateForScript(scriptLanguage);
  785. if (inScriptType == eNonHtmlScript)
  786. inScriptType = eNonHtmlScriptPreProc;
  787. else
  788. inScriptType = eNonHtmlPreProc;
  789. // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
  790. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  791. levelCurrent++;
  792. }
  793. // should be better
  794. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  795. continue;
  796. }
  797. // handle the start Mako template Python code
  798. else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
  799. (lineStartVisibleChars == 1 && ch == '%') ||
  800. (ch == '$' && chNext == '{') ||
  801. (ch == '<' && chNext == '/' && chNext2 == '%'))) {
  802. if (ch == '%')
  803. strcpy(makoBlockType, "%");
  804. else if (ch == '$')
  805. strcpy(makoBlockType, "{");
  806. else if (chNext == '/')
  807. GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
  808. else
  809. GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
  810. styler.ColourTo(i - 1, StateToPrint);
  811. beforePreProc = state;
  812. if (inScriptType == eNonHtmlScript)
  813. inScriptType = eNonHtmlScriptPreProc;
  814. else
  815. inScriptType = eNonHtmlPreProc;
  816. if (chNext == '/') {
  817. i += 2;
  818. visibleChars += 2;
  819. } else if (ch != '%') {
  820. i++;
  821. visibleChars++;
  822. }
  823. state = SCE_HP_START;
  824. scriptLanguage = eScriptPython;
  825. styler.ColourTo(i, SCE_H_ASP);
  826. if (foldHTMLPreprocessor && ch == '<')
  827. levelCurrent++;
  828. if (ch != '%' && ch != '$') {
  829. i += strlen(makoBlockType);
  830. visibleChars += strlen(makoBlockType);
  831. if (keywords4.InList(makoBlockType))
  832. styler.ColourTo(i, SCE_HP_WORD);
  833. else
  834. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  835. }
  836. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  837. continue;
  838. }
  839. // handle the start of ASP pre-processor = Non-HTML
  840. else if (!isMako && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
  841. styler.ColourTo(i - 1, StateToPrint);
  842. beforePreProc = state;
  843. if (inScriptType == eNonHtmlScript)
  844. inScriptType = eNonHtmlScriptPreProc;
  845. else
  846. inScriptType = eNonHtmlPreProc;
  847. if (chNext2 == '@') {
  848. i += 2; // place as if it was the second next char treated
  849. visibleChars += 2;
  850. state = SCE_H_ASPAT;
  851. } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
  852. styler.ColourTo(i + 3, SCE_H_ASP);
  853. state = SCE_H_XCCOMMENT;
  854. scriptLanguage = eScriptVBS;
  855. continue;
  856. } else {
  857. if (chNext2 == '=') {
  858. i += 2; // place as if it was the second next char treated
  859. visibleChars += 2;
  860. } else {
  861. i++; // place as if it was the next char treated
  862. visibleChars++;
  863. }
  864. state = StateForScript(aspScript);
  865. }
  866. scriptLanguage = eScriptVBS;
  867. styler.ColourTo(i, SCE_H_ASP);
  868. // fold whole script
  869. if (foldHTMLPreprocessor)
  870. levelCurrent++;
  871. // should be better
  872. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  873. continue;
  874. }
  875. /////////////////////////////////////
  876. // handle the start of SGML language (DTD)
  877. else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
  878. (chPrev == '<') &&
  879. (ch == '!') &&
  880. (StateToPrint != SCE_H_CDATA) &&
  881. (!IsCommentState(StateToPrint)) &&
  882. (!IsScriptCommentState(StateToPrint)) ) {
  883. beforePreProc = state;
  884. styler.ColourTo(i - 2, StateToPrint);
  885. if ((chNext == '-') && (chNext2 == '-')) {
  886. state = SCE_H_COMMENT; // wait for a pending command
  887. styler.ColourTo(i + 2, SCE_H_COMMENT);
  888. i += 2; // follow styling after the --
  889. } else if (isWordCdata(i + 1, i + 7, styler)) {
  890. state = SCE_H_CDATA;
  891. } else {
  892. styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
  893. scriptLanguage = eScriptSGML;
  894. state = SCE_H_SGML_COMMAND; // wait for a pending command
  895. }
  896. // fold whole tag (-- when closing the tag)
  897. if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
  898. levelCurrent++;
  899. continue;
  900. }
  901. // handle the end of Mako Python code
  902. else if (isMako &&
  903. ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  904. (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
  905. isMakoBlockEnd(ch, chNext, makoBlockType)) {
  906. if (state == SCE_H_ASPAT) {
  907. aspScript = segIsScriptingIndicator(styler,
  908. styler.GetStartSegment(), i - 1, aspScript);
  909. }
  910. if (state == SCE_HP_WORD) {
  911. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  912. } else {
  913. styler.ColourTo(i - 1, StateToPrint);
  914. }
  915. if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
  916. i++;
  917. visibleChars++;
  918. }
  919. if (0 != strcmp(makoBlockType, "%")) {
  920. styler.ColourTo(i, SCE_H_ASP);
  921. }
  922. state = beforePreProc;
  923. if (inScriptType == eNonHtmlScriptPreProc)
  924. inScriptType = eNonHtmlScript;
  925. else
  926. inScriptType = eHtml;
  927. if (foldHTMLPreprocessor && ch != '\n' && ch != '\r') {
  928. levelCurrent--;
  929. }
  930. scriptLanguage = eScriptNone;
  931. continue;
  932. }
  933. // handle the end of a pre-processor = Non-HTML
  934. else if ((!isMako && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  935. (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
  936. (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
  937. ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
  938. if (state == SCE_H_ASPAT) {
  939. aspScript = segIsScriptingIndicator(styler,
  940. styler.GetStartSegment(), i - 1, aspScript);
  941. }
  942. // Bounce out of any ASP mode
  943. switch (state) {
  944. case SCE_HJ_WORD:
  945. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  946. break;
  947. case SCE_HB_WORD:
  948. classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  949. break;
  950. case SCE_HP_WORD:
  951. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
  952. break;
  953. case SCE_HPHP_WORD:
  954. classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
  955. break;
  956. case SCE_H_XCCOMMENT:
  957. styler.ColourTo(i - 1, state);
  958. break;
  959. default :
  960. styler.ColourTo(i - 1, StateToPrint);
  961. break;
  962. }
  963. if (scriptLanguage != eScriptSGML) {
  964. i++;
  965. visibleChars++;
  966. }
  967. if (ch == '%')
  968. styler.ColourTo(i, SCE_H_ASP);
  969. else if (scriptLanguage == eScriptXML)
  970. styler.ColourTo(i, SCE_H_XMLEND);
  971. else if (scriptLanguage == eScriptSGML)
  972. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  973. else
  974. styler.ColourTo(i, SCE_H_QUESTION);
  975. state = beforePreProc;
  976. if (inScriptType == eNonHtmlScriptPreProc)
  977. inScriptType = eNonHtmlScript;
  978. else
  979. inScriptType = eHtml;
  980. // Unfold all scripting languages, except for XML tag
  981. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  982. levelCurrent--;
  983. }
  984. scriptLanguage = eScriptNone;
  985. continue;
  986. }
  987. /////////////////////////////////////
  988. switch (state) {
  989. case SCE_H_DEFAULT:
  990. if (ch == '<') {
  991. // in HTML, fold on tag open and unfold on tag close
  992. tagOpened = true;
  993. tagClosing = (chNext == '/');
  994. styler.ColourTo(i - 1, StateToPrint);
  995. if (chNext != '!')
  996. state = SCE_H_TAGUNKNOWN;
  997. } else if (ch == '&') {
  998. styler.ColourTo(i - 1, SCE_H_DEFAULT);
  999. state = SCE_H_ENTITY;
  1000. }
  1001. break;
  1002. case SCE_H_SGML_DEFAULT:
  1003. case SCE_H_SGML_BLOCK_DEFAULT:
  1004. // if (scriptLanguage == eScriptSGMLblock)
  1005. // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
  1006. if (ch == '\"') {
  1007. styler.ColourTo(i - 1, StateToPrint);
  1008. state = SCE_H_SGML_DOUBLESTRING;
  1009. } else if (ch == '\'') {
  1010. styler.ColourTo(i - 1, StateToPrint);
  1011. state = SCE_H_SGML_SIMPLESTRING;
  1012. } else if ((ch == '-') && (chPrev == '-')) {
  1013. if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
  1014. styler.ColourTo(i - 2, StateToPrint);
  1015. }
  1016. state = SCE_H_SGML_COMMENT;
  1017. } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
  1018. styler.ColourTo(i - 2, StateToPrint);
  1019. state = SCE_H_SGML_ENTITY;
  1020. } else if (ch == '#') {
  1021. styler.ColourTo(i - 1, StateToPrint);
  1022. state = SCE_H_SGML_SPECIAL;
  1023. } else if (ch == '[') {
  1024. styler.ColourTo(i - 1, StateToPrint);
  1025. scriptLanguage = eScriptSGMLblock;
  1026. state = SCE_H_SGML_BLOCK_DEFAULT;
  1027. } else if (ch == ']') {
  1028. if (scriptLanguage == eScriptSGMLblock) {
  1029. styler.ColourTo(i, StateToPrint);
  1030. scriptLanguage = eScriptSGML;
  1031. } else {
  1032. styler.ColourTo(i - 1, StateToPrint);
  1033. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1034. }
  1035. state = SCE_H_SGML_DEFAULT;
  1036. } else if (scriptLanguage == eScriptSGMLblock) {
  1037. if ((ch == '!') && (chPrev == '<')) {
  1038. styler.ColourTo(i - 2, StateToPrint);
  1039. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1040. state = SCE_H_SGML_COMMAND;
  1041. } else if (ch == '>') {
  1042. styler.ColourTo(i - 1, StateToPrint);
  1043. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1044. }
  1045. }
  1046. break;
  1047. case SCE_H_SGML_COMMAND:
  1048. if ((ch == '-') && (chPrev == '-')) {
  1049. styler.ColourTo(i - 2, StateToPrint);
  1050. state = SCE_H_SGML_COMMENT;
  1051. } else if (!issgmlwordchar(ch)) {
  1052. if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
  1053. styler.ColourTo(i - 1, StateToPrint);
  1054. state = SCE_H_SGML_1ST_PARAM;
  1055. } else {
  1056. state = SCE_H_SGML_ERROR;
  1057. }
  1058. }
  1059. break;
  1060. case SCE_H_SGML_1ST_PARAM:
  1061. // wait for the beginning of the word
  1062. if ((ch == '-') && (chPrev == '-')) {
  1063. if (scriptLanguage == eScriptSGMLblock) {
  1064. styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
  1065. } else {
  1066. styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
  1067. }
  1068. state = SCE_H_SGML_1ST_PARAM_COMMENT;
  1069. } else if (issgmlwordchar(ch)) {
  1070. if (scriptLanguage == eScriptSGMLblock) {
  1071. styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
  1072. } else {
  1073. styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
  1074. }
  1075. // find the length of the word
  1076. int size = 1;
  1077. while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
  1078. size++;
  1079. styler.ColourTo(i + size - 1, StateToPrint);
  1080. i += size - 1;
  1081. visibleChars += size - 1;
  1082. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  1083. if (scriptLanguage == eScriptSGMLblock) {
  1084. state = SCE_H_SGML_BLOCK_DEFAULT;
  1085. } else {
  1086. state = SCE_H_SGML_DEFAULT;
  1087. }
  1088. continue;
  1089. }
  1090. break;
  1091. case SCE_H_SGML_ERROR:
  1092. if ((ch == '-') && (chPrev == '-')) {
  1093. styler.ColourTo(i - 2, StateToPrint);
  1094. state = SCE_H_SGML_COMMENT;
  1095. }
  1096. case SCE_H_SGML_DOUBLESTRING:
  1097. if (ch == '\"') {
  1098. styler.ColourTo(i, StateToPrint);
  1099. state = SCE_H_SGML_DEFAULT;
  1100. }
  1101. break;
  1102. case SCE_H_SGML_SIMPLESTRING:
  1103. if (ch == '\'') {
  1104. styler.ColourTo(i, StateToPrint);
  1105. state = SCE_H_SGML_DEFAULT;
  1106. }
  1107. break;
  1108. case SCE_H_SGML_COMMENT:
  1109. if ((ch == '-') && (chPrev == '-')) {
  1110. styler.ColourTo(i, StateToPrint);
  1111. state = SCE_H_SGML_DEFAULT;
  1112. }
  1113. break;
  1114. case SCE_H_CDATA:
  1115. if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
  1116. styler.ColourTo(i, StateToPrint);
  1117. state = SCE_H_DEFAULT;
  1118. levelCurrent--;
  1119. }
  1120. break;
  1121. case SCE_H_COMMENT:
  1122. if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
  1123. styler.ColourTo(i, StateToPrint);
  1124. state = SCE_H_DEFAULT;
  1125. levelCurrent--;
  1126. }
  1127. break;
  1128. case SCE_H_SGML_1ST_PARAM_COMMENT:
  1129. if ((ch == '-') && (chPrev == '-')) {
  1130. styler.ColourTo(i, SCE_H_SGML_COMMENT);
  1131. state = SCE_H_SGML_1ST_PARAM;
  1132. }
  1133. break;
  1134. case SCE_H_SGML_SPECIAL:
  1135. if (!(isascii(ch) && isupper(ch))) {
  1136. styler.ColourTo(i - 1, StateToPrint);
  1137. if (isalnum(ch)) {
  1138. state = SCE_H_SGML_ERROR;
  1139. } else {
  1140. state = SCE_H_SGML_DEFAULT;
  1141. }
  1142. }
  1143. break;
  1144. case SCE_H_SGML_ENTITY:
  1145. if (ch == ';') {
  1146. styler.ColourTo(i, StateToPrint);
  1147. state = SCE_H_SGML_DEFAULT;
  1148. } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
  1149. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1150. state = SCE_H_SGML_DEFAULT;
  1151. }
  1152. break;
  1153. case SCE_H_ENTITY:
  1154. if (ch == ';') {
  1155. styler.ColourTo(i, StateToPrint);
  1156. state = SCE_H_DEFAULT;
  1157. }
  1158. if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
  1159. && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
  1160. if (!isascii(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
  1161. styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
  1162. else
  1163. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  1164. state = SCE_H_DEFAULT;
  1165. }
  1166. break;
  1167. case SCE_H_TAGUNKNOWN:
  1168. if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
  1169. int eClass = classifyTagHTML(styler.GetStartSegment(),
  1170. i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
  1171. if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
  1172. if (!tagClosing) {
  1173. inScriptType = eNonHtmlScript;
  1174. scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
  1175. } else {
  1176. scriptLanguage = eScriptNone;
  1177. }
  1178. eClass = SCE_H_TAG;
  1179. }
  1180. if (ch == '>') {
  1181. styler.ColourTo(i, eClass);
  1182. if (inScriptType == eNonHtmlScript) {
  1183. state = StateForScript(scriptLanguage);
  1184. } else {
  1185. state = SCE_H_DEFAULT;
  1186. }
  1187. tagOpened = false;
  1188. if (!tagDontFold) {
  1189. if (tagClosing) {
  1190. levelCurrent--;
  1191. } else {
  1192. levelCurrent++;
  1193. }
  1194. }
  1195. tagClosing = false;
  1196. } else if (ch == '/' && chNext == '>') {
  1197. if (eClass == SCE_H_TAGUNKNOWN) {
  1198. styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
  1199. } else {
  1200. styler.ColourTo(i - 1, StateToPrint);
  1201. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1202. }
  1203. i++;
  1204. ch = chNext;
  1205. state = SCE_H_DEFAULT;
  1206. tagOpened = false;
  1207. } else {
  1208. if (eClass != SCE_H_TAGUNKNOWN) {
  1209. if (eClass == SCE_H_SGML_DEFAULT) {
  1210. state = SCE_H_SGML_DEFAULT;
  1211. } else {
  1212. state = SCE_H_OTHER;
  1213. }
  1214. }
  1215. }
  1216. }
  1217. break;
  1218. case SCE_H_ATTRIBUTE:
  1219. if (!setAttributeContinue.Contains(ch)) {
  1220. if (inScriptType == eNonHtmlScript) {
  1221. int scriptLanguagePrev = scriptLanguage;
  1222. clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
  1223. scriptLanguage = clientScript;
  1224. if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
  1225. inScriptType = eHtml;
  1226. }
  1227. classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
  1228. if (ch == '>') {
  1229. styler.ColourTo(i, SCE_H_TAG);
  1230. if (inScriptType == eNonHtmlScript) {
  1231. state = StateForScript(scriptLanguage);
  1232. } else {
  1233. state = SCE_H_DEFAULT;
  1234. }
  1235. tagOpened = false;
  1236. if (!tagDontFold) {
  1237. if (tagClosing) {
  1238. levelCurrent--;
  1239. } else {
  1240. levelCurrent++;
  1241. }
  1242. }
  1243. tagClosing = false;
  1244. } else if (ch == '=') {
  1245. styler.ColourTo(i, SCE_H_OTHER);
  1246. state = SCE_H_VALUE;
  1247. } else {
  1248. state = SCE_H_OTHER;
  1249. }
  1250. }
  1251. break;
  1252. case SCE_H_OTHER:
  1253. if (ch == '>') {
  1254. styler.ColourTo(i - 1, StateToPrint);
  1255. styler.ColourTo(i, SCE_H_TAG);
  1256. if (inScriptType == eNonHtmlScript) {
  1257. state = StateForScript(scriptLanguage);
  1258. } else {
  1259. state = SCE_H_DEFAULT;
  1260. }
  1261. tagOpened = false;
  1262. if (!tagDontFold) {
  1263. if (tagClosing) {
  1264. levelCurrent--;
  1265. } else {
  1266. levelCurrent++;
  1267. }
  1268. }
  1269. tagClosing = false;
  1270. } else if (ch == '\"') {
  1271. styler.ColourTo(i - 1, StateToPrint);
  1272. state = SCE_H_DOUBLESTRING;
  1273. } else if (ch == '\'') {
  1274. styler.ColourTo(i - 1, StateToPrint);
  1275. state = SCE_H_SINGLESTRING;
  1276. } else if (ch == '=') {
  1277. styler.ColourTo(i, StateToPrint);
  1278. state = SCE_H_VALUE;
  1279. } else if (ch == '/' && chNext == '>') {
  1280. styler.ColourTo(i - 1, StateToPrint);
  1281. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1282. i++;
  1283. ch = chNext;
  1284. state = SCE_H_DEFAULT;
  1285. tagOpened = false;
  1286. } else if (ch == '?' && chNext == '>') {
  1287. styler.ColourTo(i - 1, StateToPrint);
  1288. styler.ColourTo(i + 1, SCE_H_XMLEND);
  1289. i++;
  1290. ch = chNext;
  1291. state = SCE_H_DEFAULT;
  1292. } else if (setHTMLWord.Contains(ch)) {
  1293. styler.ColourTo(i - 1, StateToPrint);
  1294. state = SCE_H_ATTRIBUTE;
  1295. }
  1296. break;
  1297. case SCE_H_DOUBLESTRING:
  1298. if (ch == '\"') {
  1299. if (inScriptType == eNonHtmlScript) {
  1300. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1301. }
  1302. styler.ColourTo(i, SCE_H_DOUBLESTRING);
  1303. state = SCE_H_OTHER;
  1304. }
  1305. break;
  1306. case SCE_H_SINGLESTRING:
  1307. if (ch == '\'') {
  1308. if (inScriptType == eNonHtmlScript) {
  1309. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1310. }
  1311. styler.ColourTo(i, SCE_H_SINGLESTRING);
  1312. state = SCE_H_OTHER;
  1313. }
  1314. break;
  1315. case SCE_H_VALUE:
  1316. if (!setHTMLWord.Contains(ch)) {
  1317. if (ch == '\"' && chPrev == '=') {
  1318. // Should really test for being first character
  1319. state = SCE_H_DOUBLESTRING;
  1320. } else if (ch == '\'' && chPrev == '=') {
  1321. state = SCE_H_SINGLESTRING;
  1322. } else {
  1323. if (IsNumber(styler.GetStartSegment(), styler)) {
  1324. styler.ColourTo(i - 1, SCE_H_NUMBER);
  1325. } else {
  1326. styler.ColourTo(i - 1, StateToPrint);
  1327. }
  1328. if (ch == '>') {
  1329. styler.ColourTo(i, SCE_H_TAG);
  1330. if (inScriptType == eNonHtmlScript) {
  1331. state = StateForScript(scriptLanguage);
  1332. } else {
  1333. state = SCE_H_DEFAULT;
  1334. }
  1335. tagOpened = false;
  1336. if (!tagDontFold) {
  1337. if (tagClosing) {
  1338. levelCurrent--;
  1339. } else {
  1340. levelCurrent++;
  1341. }
  1342. }
  1343. tagClosing = false;
  1344. } else {
  1345. state = SCE_H_OTHER;
  1346. }
  1347. }
  1348. }
  1349. break;
  1350. case SCE_HJ_DEFAULT:
  1351. case SCE_HJ_START:
  1352. case SCE_HJ_SYMBOLS:
  1353. if (IsAWordStart(ch)) {
  1354. styler.ColourTo(i - 1, StateToPrint);
  1355. state = SCE_HJ_WORD;
  1356. } else if (ch == '/' && chNext == '*') {
  1357. styler.ColourTo(i - 1, StateToPrint);
  1358. if (chNext2 == '*')
  1359. state = SCE_HJ_COMMENTDOC;
  1360. else
  1361. state = SCE_HJ_COMMENT;
  1362. } else if (ch == '/' && chNext == '/') {
  1363. styler.ColourTo(i - 1, StateToPrint);
  1364. state = SCE_HJ_COMMENTLINE;
  1365. } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
  1366. styler.ColourTo(i - 1, StateToPrint);
  1367. state = SCE_HJ_REGEX;
  1368. } else if (ch == '\"') {
  1369. styler.ColourTo(i - 1, StateToPrint);
  1370. state = SCE_HJ_DOUBLESTRING;
  1371. } else if (ch == '\'') {
  1372. styler.ColourTo(i - 1, StateToPrint);
  1373. state = SCE_HJ_SINGLESTRING;
  1374. } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
  1375. styler.SafeGetCharAt(i + 3) == '-') {
  1376. styler.ColourTo(i - 1, StateToPrint);
  1377. state = SCE_HJ_COMMENTLINE;
  1378. } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1379. styler.ColourTo(i - 1, StateToPrint);
  1380. state = SCE_HJ_COMMENTLINE;
  1381. i += 2;
  1382. } else if (IsOperator(ch)) {
  1383. styler.ColourTo(i - 1, StateToPrint);
  1384. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1385. state = SCE_HJ_DEFAULT;
  1386. } else if ((ch == ' ') || (ch == '\t')) {
  1387. if (state == SCE_HJ_START) {
  1388. styler.ColourTo(i - 1, StateToPrint);
  1389. state = SCE_HJ_DEFAULT;
  1390. }
  1391. }
  1392. break;
  1393. case SCE_HJ_WORD:
  1394. if (!IsAWordChar(ch)) {
  1395. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  1396. //styler.ColourTo(i - 1, eHTJSKeyword);
  1397. state = SCE_HJ_DEFAULT;
  1398. if (ch == '/' && chNext == '*') {
  1399. if (chNext2 == '*')
  1400. state = SCE_HJ_COMMENTDOC;
  1401. else
  1402. state = SCE_HJ_COMMENT;
  1403. } else if (ch == '/' && chNext == '/') {
  1404. state = SCE_HJ_COMMENTLINE;
  1405. } else if (ch == '\"') {
  1406. state = SCE_HJ_DOUBLESTRING;
  1407. } else if (ch == '\'') {
  1408. state = SCE_HJ_SINGLESTRING;
  1409. } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
  1410. styler.ColourTo(i - 1, StateToPrint);
  1411. state = SCE_HJ_COMMENTLINE;
  1412. i += 2;
  1413. } else if (IsOperator(ch)) {
  1414. styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
  1415. state = SCE_HJ_DEFAULT;
  1416. }
  1417. }
  1418. break;
  1419. case SCE_HJ_COMMENT:
  1420. case SCE_HJ_COMMENTDOC:
  1421. if (ch == '/' && chPrev == '*') {
  1422. styler.ColourTo(i, StateToPrint);
  1423. state = SCE_HJ_DEFAULT;
  1424. ch = ' ';
  1425. }
  1426. break;
  1427. case SCE_HJ_COMMENTLINE:
  1428. if (ch == '\r' || ch == '\n') {
  1429. styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
  1430. state = SCE_HJ_DEFAULT;
  1431. ch = ' ';
  1432. }
  1433. break;
  1434. case SCE_HJ_DOUBLESTRING:
  1435. if (ch == '\\') {
  1436. if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
  1437. i++;
  1438. }
  1439. } else if (ch == '\"') {
  1440. styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
  1441. state = SCE_HJ_DEFAULT;
  1442. } else if ((inScriptType == eNonHtmlScript) && (ch

Large files files are truncated, but you can click here to view the full file