PageRenderTime 55ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/scintilla/lexers/LexHTML.cxx

https://github.com/aihimel/geany
C++ | 2185 lines | 1952 code | 119 blank | 114 comment | 1444 complexity | df78cf613bd6590825456c9515667d28 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. // Scintilla source code edit control
  2. /** @file LexHTML.cxx
  3. ** Lexer for HTML.
  4. **/
  5. // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <stdio.h>
  10. #include <stdarg.h>
  11. #include <assert.h>
  12. #include <ctype.h>
  13. #include "ILexer.h"
  14. #include "Scintilla.h"
  15. #include "SciLexer.h"
  16. #include "StringCopy.h"
  17. #include "WordList.h"
  18. #include "LexAccessor.h"
  19. #include "Accessor.h"
  20. #include "StyleContext.h"
  21. #include "CharacterSet.h"
  22. #include "LexerModule.h"
  23. #ifdef SCI_NAMESPACE
  24. using namespace Scintilla;
  25. #endif
  26. #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  27. #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  28. #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  29. enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  30. enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  31. static inline bool IsAWordChar(const int ch) {
  32. return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  33. }
  34. static inline bool IsAWordStart(const int ch) {
  35. return (ch < 0x80) && (isalnum(ch) || ch == '_');
  36. }
  37. inline bool IsOperator(int ch) {
  38. if (IsASCII(ch) && isalnum(ch))
  39. return false;
  40. // '.' left out as it is used to make up numbers
  41. if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  42. ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  43. ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  44. ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  45. ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  46. ch == '?' || ch == '!' || ch == '.' || ch == '~')
  47. return true;
  48. return false;
  49. }
  50. static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  51. unsigned int i = 0;
  52. for (; (i < end - start + 1) && (i < len-1); i++) {
  53. s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  54. }
  55. s[i] = '\0';
  56. }
  57. static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  58. unsigned int i = 0;
  59. for (; i < sLen-1; i++) {
  60. char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  61. if ((i == 0) && !IsAWordStart(ch))
  62. break;
  63. if ((i > 0) && !IsAWordChar(ch))
  64. break;
  65. s[i] = ch;
  66. }
  67. s[i] = '\0';
  68. return s;
  69. }
  70. static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  71. char s[100];
  72. GetTextSegment(styler, start, end, s, sizeof(s));
  73. //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  74. if (strstr(s, "src")) // External script
  75. return eScriptNone;
  76. if (strstr(s, "vbs"))
  77. return eScriptVBS;
  78. if (strstr(s, "pyth"))
  79. return eScriptPython;
  80. if (strstr(s, "javas"))
  81. return eScriptJS;
  82. if (strstr(s, "jscr"))
  83. return eScriptJS;
  84. if (strstr(s, "php"))
  85. return eScriptPHP;
  86. if (strstr(s, "xml")) {
  87. const char *xml = strstr(s, "xml");
  88. for (const char *t=s; t<xml; t++) {
  89. if (!IsASpace(*t)) {
  90. return prevValue;
  91. }
  92. }
  93. return eScriptXML;
  94. }
  95. return prevValue;
  96. }
  97. static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
  98. int iResult = 0;
  99. char s[100];
  100. GetTextSegment(styler, start, end, s, sizeof(s));
  101. if (0 == strncmp(s, "php", 3)) {
  102. iResult = 3;
  103. }
  104. return iResult;
  105. }
  106. static script_type ScriptOfState(int state) {
  107. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  108. return eScriptPython;
  109. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  110. return eScriptVBS;
  111. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  112. return eScriptJS;
  113. } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
  114. return eScriptPHP;
  115. } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
  116. return eScriptSGML;
  117. } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
  118. return eScriptSGMLblock;
  119. } else {
  120. return eScriptNone;
  121. }
  122. }
  123. static int statePrintForState(int state, script_mode inScriptType) {
  124. int StateToPrint = state;
  125. if (state >= SCE_HJ_START) {
  126. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  127. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
  128. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  129. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
  130. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  131. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
  132. }
  133. }
  134. return StateToPrint;
  135. }
  136. static int stateForPrintState(int StateToPrint) {
  137. int state;
  138. if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
  139. state = StateToPrint - SCE_HA_PYTHON;
  140. } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
  141. state = StateToPrint - SCE_HA_VBS;
  142. } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
  143. state = StateToPrint - SCE_HA_JS;
  144. } else {
  145. state = StateToPrint;
  146. }
  147. return state;
  148. }
  149. static inline bool IsNumber(unsigned int start, Accessor &styler) {
  150. return IsADigit(styler[start]) || (styler[start] == '.') ||
  151. (styler[start] == '-') || (styler[start] == '#');
  152. }
  153. static inline bool isStringState(int state) {
  154. bool bResult;
  155. switch (state) {
  156. case SCE_HJ_DOUBLESTRING:
  157. case SCE_HJ_SINGLESTRING:
  158. case SCE_HJA_DOUBLESTRING:
  159. case SCE_HJA_SINGLESTRING:
  160. case SCE_HB_STRING:
  161. case SCE_HBA_STRING:
  162. case SCE_HP_STRING:
  163. case SCE_HP_CHARACTER:
  164. case SCE_HP_TRIPLE:
  165. case SCE_HP_TRIPLEDOUBLE:
  166. case SCE_HPA_STRING:
  167. case SCE_HPA_CHARACTER:
  168. case SCE_HPA_TRIPLE:
  169. case SCE_HPA_TRIPLEDOUBLE:
  170. case SCE_HPHP_HSTRING:
  171. case SCE_HPHP_SIMPLESTRING:
  172. case SCE_HPHP_HSTRING_VARIABLE:
  173. case SCE_HPHP_COMPLEX_VARIABLE:
  174. bResult = true;
  175. break;
  176. default :
  177. bResult = false;
  178. break;
  179. }
  180. return bResult;
  181. }
  182. static inline bool stateAllowsTermination(int state) {
  183. bool allowTermination = !isStringState(state);
  184. if (allowTermination) {
  185. switch (state) {
  186. case SCE_HB_COMMENTLINE:
  187. case SCE_HPHP_COMMENT:
  188. case SCE_HP_COMMENTLINE:
  189. case SCE_HPA_COMMENTLINE:
  190. allowTermination = false;
  191. }
  192. }
  193. return allowTermination;
  194. }
  195. // not really well done, since it's only comments that should lex the %> and <%
  196. static inline bool isCommentASPState(int state) {
  197. bool bResult;
  198. switch (state) {
  199. case SCE_HJ_COMMENT:
  200. case SCE_HJ_COMMENTLINE:
  201. case SCE_HJ_COMMENTDOC:
  202. case SCE_HB_COMMENTLINE:
  203. case SCE_HP_COMMENTLINE:
  204. case SCE_HPHP_COMMENT:
  205. case SCE_HPHP_COMMENTLINE:
  206. bResult = true;
  207. break;
  208. default :
  209. bResult = false;
  210. break;
  211. }
  212. return bResult;
  213. }
  214. static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  215. bool wordIsNumber = IsNumber(start, styler);
  216. char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
  217. if (wordIsNumber) {
  218. chAttr = SCE_H_NUMBER;
  219. } else {
  220. char s[100];
  221. GetTextSegment(styler, start, end, s, sizeof(s));
  222. if (keywords.InList(s))
  223. chAttr = SCE_H_ATTRIBUTE;
  224. }
  225. if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
  226. // No keywords -> all are known
  227. chAttr = SCE_H_ATTRIBUTE;
  228. styler.ColourTo(end, chAttr);
  229. }
  230. static int classifyTagHTML(unsigned int start, unsigned int end,
  231. WordList &keywords, Accessor &styler, bool &tagDontFold,
  232. bool caseSensitive, bool isXml, bool allowScripts) {
  233. char withSpace[30 + 2] = " ";
  234. const char *s = withSpace + 1;
  235. // Copy after the '<'
  236. unsigned int i = 1;
  237. for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
  238. char ch = styler[cPos];
  239. if ((ch != '<') && (ch != '/')) {
  240. withSpace[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
  241. }
  242. }
  243. //The following is only a quick hack, to see if this whole thing would work
  244. //we first need the tagname with a trailing space...
  245. withSpace[i] = ' ';
  246. withSpace[i+1] = '\0';
  247. // if the current language is XML, I can fold any tag
  248. // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
  249. //...to find it in the list of no-container-tags
  250. tagDontFold = (!isXml) && (NULL != strstr(" area base basefont br col command embed frame hr img input isindex keygen link meta param source track wbr ", withSpace));
  251. //now we can remove the trailing space
  252. withSpace[i] = '\0';
  253. // No keywords -> all are known
  254. char chAttr = SCE_H_TAGUNKNOWN;
  255. if (s[0] == '!') {
  256. chAttr = SCE_H_SGML_DEFAULT;
  257. } else if (!keywords || keywords.InList(s)) {
  258. chAttr = SCE_H_TAG;
  259. }
  260. styler.ColourTo(end, chAttr);
  261. if (chAttr == SCE_H_TAG) {
  262. if (allowScripts && 0 == strcmp(s, "script")) {
  263. // check to see if this is a self-closing tag by sniffing ahead
  264. bool isSelfClose = false;
  265. for (unsigned int cPos = end; cPos <= end + 200; cPos++) {
  266. char ch = styler.SafeGetCharAt(cPos, '\0');
  267. if (ch == '\0' || ch == '>')
  268. break;
  269. else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
  270. isSelfClose = true;
  271. break;
  272. }
  273. }
  274. // do not enter a script state if the tag self-closed
  275. if (!isSelfClose)
  276. chAttr = SCE_H_SCRIPT;
  277. } else if (!isXml && 0 == strcmp(s, "comment")) {
  278. chAttr = SCE_H_COMMENT;
  279. }
  280. }
  281. return chAttr;
  282. }
  283. static void classifyWordHTJS(unsigned int start, unsigned int end,
  284. WordList &keywords, Accessor &styler, script_mode inScriptType) {
  285. char s[30 + 1];
  286. unsigned int i = 0;
  287. for (; i < end - start + 1 && i < 30; i++) {
  288. s[i] = styler[start + i];
  289. }
  290. s[i] = '\0';
  291. char chAttr = SCE_HJ_WORD;
  292. bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
  293. if (wordIsNumber) {
  294. chAttr = SCE_HJ_NUMBER;
  295. } else if (keywords.InList(s)) {
  296. chAttr = SCE_HJ_KEYWORD;
  297. }
  298. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  299. }
  300. static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
  301. char chAttr = SCE_HB_IDENTIFIER;
  302. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  303. if (wordIsNumber) {
  304. chAttr = SCE_HB_NUMBER;
  305. } else {
  306. char s[100];
  307. GetTextSegment(styler, start, end, s, sizeof(s));
  308. if (keywords.InList(s)) {
  309. chAttr = SCE_HB_WORD;
  310. if (strcmp(s, "rem") == 0)
  311. chAttr = SCE_HB_COMMENTLINE;
  312. }
  313. }
  314. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  315. if (chAttr == SCE_HB_COMMENTLINE)
  316. return SCE_HB_COMMENTLINE;
  317. else
  318. return SCE_HB_DEFAULT;
  319. }
  320. static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType, bool isMako) {
  321. bool wordIsNumber = IsADigit(styler[start]);
  322. char s[30 + 1];
  323. unsigned int i = 0;
  324. for (; i < end - start + 1 && i < 30; i++) {
  325. s[i] = styler[start + i];
  326. }
  327. s[i] = '\0';
  328. char chAttr = SCE_HP_IDENTIFIER;
  329. if (0 == strcmp(prevWord, "class"))
  330. chAttr = SCE_HP_CLASSNAME;
  331. else if (0 == strcmp(prevWord, "def"))
  332. chAttr = SCE_HP_DEFNAME;
  333. else if (wordIsNumber)
  334. chAttr = SCE_HP_NUMBER;
  335. else if (keywords.InList(s))
  336. chAttr = SCE_HP_WORD;
  337. else if (isMako && 0 == strcmp(s, "block"))
  338. chAttr = SCE_HP_WORD;
  339. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  340. strcpy(prevWord, s);
  341. }
  342. // Update the word colour to default or keyword
  343. // Called when in a PHP word
  344. static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  345. char chAttr = SCE_HPHP_DEFAULT;
  346. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
  347. if (wordIsNumber) {
  348. chAttr = SCE_HPHP_NUMBER;
  349. } else {
  350. char s[100];
  351. GetTextSegment(styler, start, end, s, sizeof(s));
  352. if (keywords.InList(s))
  353. chAttr = SCE_HPHP_WORD;
  354. }
  355. styler.ColourTo(end, chAttr);
  356. }
  357. static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  358. char s[30 + 1];
  359. unsigned int i = 0;
  360. for (; i < end - start + 1 && i < 30; i++) {
  361. s[i] = styler[start + i];
  362. }
  363. s[i] = '\0';
  364. return keywords.InList(s);
  365. }
  366. static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
  367. char s[30 + 1];
  368. unsigned int i = 0;
  369. for (; i < end - start + 1 && i < 30; i++) {
  370. s[i] = styler[start + i];
  371. }
  372. s[i] = '\0';
  373. return (0 == strcmp(s, "[CDATA["));
  374. }
  375. // Return the first state to reach when entering a scripting language
  376. static int StateForScript(script_type scriptLanguage) {
  377. int Result;
  378. switch (scriptLanguage) {
  379. case eScriptVBS:
  380. Result = SCE_HB_START;
  381. break;
  382. case eScriptPython:
  383. Result = SCE_HP_START;
  384. break;
  385. case eScriptPHP:
  386. Result = SCE_HPHP_DEFAULT;
  387. break;
  388. case eScriptXML:
  389. Result = SCE_H_TAGUNKNOWN;
  390. break;
  391. case eScriptSGML:
  392. Result = SCE_H_SGML_DEFAULT;
  393. break;
  394. case eScriptComment:
  395. Result = SCE_H_COMMENT;
  396. break;
  397. default :
  398. Result = SCE_HJ_START;
  399. break;
  400. }
  401. return Result;
  402. }
  403. static inline bool issgmlwordchar(int ch) {
  404. return !IsASCII(ch) ||
  405. (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
  406. }
  407. static inline bool IsPhpWordStart(int ch) {
  408. return (IsASCII(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
  409. }
  410. static inline bool IsPhpWordChar(int ch) {
  411. return IsADigit(ch) || IsPhpWordStart(ch);
  412. }
  413. static bool InTagState(int state) {
  414. return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
  415. state == SCE_H_SCRIPT ||
  416. state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
  417. state == SCE_H_NUMBER || state == SCE_H_OTHER ||
  418. state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
  419. }
  420. static bool IsCommentState(const int state) {
  421. return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
  422. }
  423. static bool IsScriptCommentState(const int state) {
  424. return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
  425. state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
  426. }
  427. static bool isLineEnd(int ch) {
  428. return ch == '\r' || ch == '\n';
  429. }
  430. static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
  431. if (strlen(blockType) == 0) {
  432. return ((ch == '%') && (chNext == '>'));
  433. } else if ((0 == strcmp(blockType, "inherit")) ||
  434. (0 == strcmp(blockType, "namespace")) ||
  435. (0 == strcmp(blockType, "include")) ||
  436. (0 == strcmp(blockType, "page"))) {
  437. return ((ch == '/') && (chNext == '>'));
  438. } else if (0 == strcmp(blockType, "%")) {
  439. if (ch == '/' && isLineEnd(chNext))
  440. return 1;
  441. else
  442. return isLineEnd(ch);
  443. } else if (0 == strcmp(blockType, "{")) {
  444. return ch == '}';
  445. } else {
  446. return (ch == '>');
  447. }
  448. }
  449. static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
  450. if (strlen(blockType) == 0) {
  451. return 0;
  452. } else if (0 == strcmp(blockType, "%")) {
  453. return ((ch == '%') && (chNext == '}'));
  454. } else if (0 == strcmp(blockType, "{")) {
  455. return ((ch == '}') && (chNext == '}'));
  456. } else {
  457. return 0;
  458. }
  459. }
  460. static bool isPHPStringState(int state) {
  461. return
  462. (state == SCE_HPHP_HSTRING) ||
  463. (state == SCE_HPHP_SIMPLESTRING) ||
  464. (state == SCE_HPHP_HSTRING_VARIABLE) ||
  465. (state == SCE_HPHP_COMPLEX_VARIABLE);
  466. }
  467. static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
  468. int j;
  469. const int beginning = i - 1;
  470. bool isValidSimpleString = false;
  471. while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
  472. i++;
  473. char ch = styler.SafeGetCharAt(i);
  474. const char chNext = styler.SafeGetCharAt(i + 1);
  475. if (!IsPhpWordStart(ch)) {
  476. if (ch == '\'' && IsPhpWordStart(chNext)) {
  477. i++;
  478. ch = chNext;
  479. isSimpleString = true;
  480. } else {
  481. phpStringDelimiter[0] = '\0';
  482. return beginning;
  483. }
  484. }
  485. phpStringDelimiter[0] = ch;
  486. i++;
  487. for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
  488. if (!IsPhpWordChar(styler[j])) {
  489. if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
  490. isValidSimpleString = true;
  491. j++;
  492. break;
  493. } else {
  494. phpStringDelimiter[0] = '\0';
  495. return beginning;
  496. }
  497. }
  498. if (j - i < phpStringDelimiterSize - 2)
  499. phpStringDelimiter[j-i+1] = styler[j];
  500. else
  501. i++;
  502. }
  503. if (isSimpleString && !isValidSimpleString) {
  504. phpStringDelimiter[0] = '\0';
  505. return beginning;
  506. }
  507. phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
  508. return j - 1;
  509. }
  510. static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
  511. Accessor &styler, bool isXml) {
  512. WordList &keywords = *keywordlists[0];
  513. WordList &keywords2 = *keywordlists[1];
  514. WordList &keywords3 = *keywordlists[2];
  515. WordList &keywords4 = *keywordlists[3];
  516. WordList &keywords5 = *keywordlists[4];
  517. WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
  518. // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
  519. styler.StartAt(startPos, static_cast<unsigned char>(STYLE_MAX));
  520. char prevWord[200];
  521. prevWord[0] = '\0';
  522. char phpStringDelimiter[200]; // PHP is not limited in length, we are
  523. phpStringDelimiter[0] = '\0';
  524. int StateToPrint = initStyle;
  525. int state = stateForPrintState(StateToPrint);
  526. char makoBlockType[200];
  527. makoBlockType[0] = '\0';
  528. int makoComment = 0;
  529. char djangoBlockType[2];
  530. djangoBlockType[0] = '\0';
  531. // If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
  532. if (InTagState(state)) {
  533. while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
  534. int backLineStart = styler.LineStart(styler.GetLine(startPos-1));
  535. length += startPos - backLineStart;
  536. startPos = backLineStart;
  537. }
  538. state = SCE_H_DEFAULT;
  539. }
  540. // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
  541. if (isPHPStringState(state)) {
  542. while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
  543. startPos--;
  544. length++;
  545. state = styler.StyleAt(startPos);
  546. }
  547. if (startPos == 0)
  548. state = SCE_H_DEFAULT;
  549. }
  550. styler.StartAt(startPos, static_cast<unsigned char>(STYLE_MAX));
  551. int lineCurrent = styler.GetLine(startPos);
  552. int lineState;
  553. if (lineCurrent > 0) {
  554. lineState = styler.GetLineState(lineCurrent-1);
  555. } else {
  556. // Default client and ASP scripting language is JavaScript
  557. lineState = eScriptJS << 8;
  558. // property asp.default.language
  559. // Script in ASP code is initially assumed to be in JavaScript.
  560. // To change this to VBScript set asp.default.language to 2. Python is 3.
  561. lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
  562. }
  563. script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
  564. bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
  565. bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
  566. bool tagDontFold = false; //some HTML tags should not be folded
  567. script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
  568. script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
  569. int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
  570. script_type scriptLanguage = ScriptOfState(state);
  571. // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
  572. if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
  573. scriptLanguage = eScriptComment;
  574. }
  575. script_type beforeLanguage = ScriptOfState(beforePreProc);
  576. // property fold.html
  577. // Folding is turned on or off for HTML and XML files with this option.
  578. // The fold option must also be on for folding to occur.
  579. const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
  580. const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
  581. // property fold.html.preprocessor
  582. // Folding is turned on or off for scripts embedded in HTML files with this option.
  583. // The default is on.
  584. const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
  585. const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  586. // property fold.hypertext.comment
  587. // Allow folding for comments in scripts embedded in HTML.
  588. // The default is off.
  589. const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
  590. // property fold.hypertext.heredoc
  591. // Allow folding for heredocs in scripts embedded in HTML.
  592. // The default is off.
  593. const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
  594. // property html.tags.case.sensitive
  595. // For XML and HTML, setting this property to 1 will make tags match in a case
  596. // sensitive way which is the expected behaviour for XML and XHTML.
  597. const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
  598. // property lexer.xml.allow.scripts
  599. // Set to 0 to disable scripts in XML.
  600. const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
  601. // property lexer.html.mako
  602. // Set to 1 to enable the mako template language.
  603. const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
  604. // property lexer.html.django
  605. // Set to 1 to enable the django template language.
  606. const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
  607. const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
  608. const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
  609. const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
  610. // TODO: also handle + and - (except if they're part of ++ or --) and return keywords
  611. const CharacterSet setOKBeforeJSRE(CharacterSet::setNone, "([{=,:;!%^&*|?~");
  612. int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
  613. int levelCurrent = levelPrev;
  614. int visibleChars = 0;
  615. int lineStartVisibleChars = 0;
  616. int chPrev = ' ';
  617. int ch = ' ';
  618. int chPrevNonWhite = ' ';
  619. // look back to set chPrevNonWhite properly for better regex colouring
  620. if (scriptLanguage == eScriptJS && startPos > 0) {
  621. int back = startPos;
  622. int style = 0;
  623. while (--back) {
  624. style = styler.StyleAt(back);
  625. if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
  626. // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
  627. break;
  628. }
  629. if (style == SCE_HJ_SYMBOLS) {
  630. chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
  631. }
  632. }
  633. styler.StartSegment(startPos);
  634. const int lengthDoc = startPos + length;
  635. for (int i = startPos; i < lengthDoc; i++) {
  636. const int chPrev2 = chPrev;
  637. chPrev = ch;
  638. if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
  639. state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
  640. chPrevNonWhite = ch;
  641. ch = static_cast<unsigned char>(styler[i]);
  642. int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  643. const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
  644. // Handle DBCS codepages
  645. if (styler.IsLeadByte(static_cast<char>(ch))) {
  646. chPrev = ' ';
  647. i += 1;
  648. continue;
  649. }
  650. if ((!IsASpace(ch) || !foldCompact) && fold)
  651. visibleChars++;
  652. if (!IsASpace(ch))
  653. lineStartVisibleChars++;
  654. // decide what is the current state to print (depending of the script tag)
  655. StateToPrint = statePrintForState(state, inScriptType);
  656. // handle script folding
  657. if (fold) {
  658. switch (scriptLanguage) {
  659. case eScriptJS:
  660. case eScriptPHP:
  661. //not currently supported case eScriptVBS:
  662. if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
  663. //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
  664. //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
  665. if (ch == '#') {
  666. int j = i + 1;
  667. while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
  668. j++;
  669. }
  670. if (styler.Match(j, "region") || styler.Match(j, "if")) {
  671. levelCurrent++;
  672. } else if (styler.Match(j, "end")) {
  673. levelCurrent--;
  674. }
  675. } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
  676. levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
  677. }
  678. } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
  679. levelCurrent--;
  680. }
  681. break;
  682. case eScriptPython:
  683. if (state != SCE_HP_COMMENTLINE && !isMako) {
  684. if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
  685. levelCurrent++;
  686. } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
  687. // check if the number of tabs is lower than the level
  688. int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
  689. for (int j = 0; Findlevel > 0; j++) {
  690. char chTmp = styler.SafeGetCharAt(i + j + 1);
  691. if (chTmp == '\t') {
  692. Findlevel -= 8;
  693. } else if (chTmp == ' ') {
  694. Findlevel--;
  695. } else {
  696. break;
  697. }
  698. }
  699. if (Findlevel > 0) {
  700. levelCurrent -= Findlevel / 8;
  701. if (Findlevel % 8)
  702. levelCurrent--;
  703. }
  704. }
  705. }
  706. break;
  707. default:
  708. break;
  709. }
  710. }
  711. if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
  712. // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
  713. // Avoid triggering two times on Dos/Win
  714. // New line -> record any line state onto /next/ line
  715. if (fold) {
  716. int lev = levelPrev;
  717. if (visibleChars == 0)
  718. lev |= SC_FOLDLEVELWHITEFLAG;
  719. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  720. lev |= SC_FOLDLEVELHEADERFLAG;
  721. styler.SetLevel(lineCurrent, lev);
  722. visibleChars = 0;
  723. levelPrev = levelCurrent;
  724. }
  725. styler.SetLineState(lineCurrent,
  726. ((inScriptType & 0x03) << 0) |
  727. ((tagOpened ? 1 : 0) << 2) |
  728. ((tagClosing ? 1 : 0) << 3) |
  729. ((aspScript & 0x0F) << 4) |
  730. ((clientScript & 0x0F) << 8) |
  731. ((beforePreProc & 0xFF) << 12));
  732. lineCurrent++;
  733. lineStartVisibleChars = 0;
  734. }
  735. // handle start of Mako comment line
  736. if (isMako && ch == '#' && chNext == '#') {
  737. makoComment = 1;
  738. }
  739. // handle end of Mako comment line
  740. else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
  741. makoComment = 0;
  742. styler.ColourTo(i, SCE_HP_COMMENTLINE);
  743. state = SCE_HP_DEFAULT;
  744. }
  745. // Allow falling through to mako handling code if newline is going to end a block
  746. if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
  747. (!isMako || (0 != strcmp(makoBlockType, "%")))) {
  748. }
  749. // generic end of script processing
  750. else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
  751. // Check if it's the end of the script tag (or any other HTML tag)
  752. switch (state) {
  753. // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
  754. case SCE_H_DOUBLESTRING:
  755. case SCE_H_SINGLESTRING:
  756. case SCE_HJ_COMMENT:
  757. case SCE_HJ_COMMENTDOC:
  758. //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
  759. // the end of script marker from some JS interpreters.
  760. case SCE_HB_COMMENTLINE:
  761. case SCE_HBA_COMMENTLINE:
  762. case SCE_HJ_DOUBLESTRING:
  763. case SCE_HJ_SINGLESTRING:
  764. case SCE_HJ_REGEX:
  765. case SCE_HB_STRING:
  766. case SCE_HBA_STRING:
  767. case SCE_HP_STRING:
  768. case SCE_HP_TRIPLE:
  769. case SCE_HP_TRIPLEDOUBLE:
  770. case SCE_HPHP_HSTRING:
  771. case SCE_HPHP_SIMPLESTRING:
  772. case SCE_HPHP_COMMENT:
  773. case SCE_HPHP_COMMENTLINE:
  774. break;
  775. default :
  776. // check if the closing tag is a script tag
  777. if (const char *tag =
  778. state == SCE_HJ_COMMENTLINE || isXml ? "script" :
  779. state == SCE_H_COMMENT ? "comment" : 0) {
  780. int j = i + 2;
  781. int chr;
  782. do {
  783. chr = static_cast<int>(*tag++);
  784. } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
  785. if (chr != 0) break;
  786. }
  787. // closing tag of the script (it's a closing HTML tag anyway)
  788. styler.ColourTo(i - 1, StateToPrint);
  789. state = SCE_H_TAGUNKNOWN;
  790. inScriptType = eHtml;
  791. scriptLanguage = eScriptNone;
  792. clientScript = eScriptJS;
  793. i += 2;
  794. visibleChars += 2;
  795. tagClosing = true;
  796. continue;
  797. }
  798. }
  799. /////////////////////////////////////
  800. // handle the start of PHP pre-processor = Non-HTML
  801. else if ((state != SCE_H_ASPAT) &&
  802. !isPHPStringState(state) &&
  803. (state != SCE_HPHP_COMMENT) &&
  804. (state != SCE_HPHP_COMMENTLINE) &&
  805. (ch == '<') &&
  806. (chNext == '?') &&
  807. !IsScriptCommentState(state)) {
  808. beforeLanguage = scriptLanguage;
  809. scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
  810. if ((scriptLanguage != eScriptPHP) && (isStringState(state) || (state==SCE_H_COMMENT))) continue;
  811. styler.ColourTo(i - 1, StateToPrint);
  812. beforePreProc = state;
  813. i++;
  814. visibleChars++;
  815. i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
  816. if (scriptLanguage == eScriptXML)
  817. styler.ColourTo(i, SCE_H_XMLSTART);
  818. else
  819. styler.ColourTo(i, SCE_H_QUESTION);
  820. state = StateForScript(scriptLanguage);
  821. if (inScriptType == eNonHtmlScript)
  822. inScriptType = eNonHtmlScriptPreProc;
  823. else
  824. inScriptType = eNonHtmlPreProc;
  825. // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
  826. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  827. levelCurrent++;
  828. }
  829. // should be better
  830. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  831. continue;
  832. }
  833. // handle the start Mako template Python code
  834. else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
  835. (lineStartVisibleChars == 1 && ch == '%') ||
  836. (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
  837. (ch == '$' && chNext == '{') ||
  838. (ch == '<' && chNext == '/' && chNext2 == '%'))) {
  839. if (ch == '%' || ch == '/')
  840. StringCopy(makoBlockType, "%");
  841. else if (ch == '$')
  842. StringCopy(makoBlockType, "{");
  843. else if (chNext == '/')
  844. GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
  845. else
  846. GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
  847. styler.ColourTo(i - 1, StateToPrint);
  848. beforePreProc = state;
  849. if (inScriptType == eNonHtmlScript)
  850. inScriptType = eNonHtmlScriptPreProc;
  851. else
  852. inScriptType = eNonHtmlPreProc;
  853. if (chNext == '/') {
  854. i += 2;
  855. visibleChars += 2;
  856. } else if (ch != '%') {
  857. i++;
  858. visibleChars++;
  859. }
  860. state = SCE_HP_START;
  861. scriptLanguage = eScriptPython;
  862. styler.ColourTo(i, SCE_H_ASP);
  863. if (ch != '%' && ch != '$' && ch != '/') {
  864. i += static_cast<int>(strlen(makoBlockType));
  865. visibleChars += static_cast<int>(strlen(makoBlockType));
  866. if (keywords4.InList(makoBlockType))
  867. styler.ColourTo(i, SCE_HP_WORD);
  868. else
  869. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  870. }
  871. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  872. continue;
  873. }
  874. // handle the start/end of Django comment
  875. else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
  876. styler.ColourTo(i - 1, StateToPrint);
  877. beforePreProc = state;
  878. beforeLanguage = scriptLanguage;
  879. if (inScriptType == eNonHtmlScript)
  880. inScriptType = eNonHtmlScriptPreProc;
  881. else
  882. inScriptType = eNonHtmlPreProc;
  883. i += 1;
  884. visibleChars += 1;
  885. scriptLanguage = eScriptComment;
  886. state = SCE_H_COMMENT;
  887. styler.ColourTo(i, SCE_H_ASP);
  888. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  889. continue;
  890. } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
  891. styler.ColourTo(i - 1, StateToPrint);
  892. i += 1;
  893. visibleChars += 1;
  894. styler.ColourTo(i, SCE_H_ASP);
  895. state = beforePreProc;
  896. if (inScriptType == eNonHtmlScriptPreProc)
  897. inScriptType = eNonHtmlScript;
  898. else
  899. inScriptType = eHtml;
  900. scriptLanguage = beforeLanguage;
  901. continue;
  902. }
  903. // handle the start Django template code
  904. else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' || chNext == '{'))) {
  905. if (chNext == '%')
  906. StringCopy(djangoBlockType, "%");
  907. else
  908. StringCopy(djangoBlockType, "{");
  909. styler.ColourTo(i - 1, StateToPrint);
  910. beforePreProc = state;
  911. if (inScriptType == eNonHtmlScript)
  912. inScriptType = eNonHtmlScriptPreProc;
  913. else
  914. inScriptType = eNonHtmlPreProc;
  915. i += 1;
  916. visibleChars += 1;
  917. state = SCE_HP_START;
  918. beforeLanguage = scriptLanguage;
  919. scriptLanguage = eScriptPython;
  920. styler.ColourTo(i, SCE_H_ASP);
  921. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  922. continue;
  923. }
  924. // handle the start of ASP pre-processor = Non-HTML
  925. else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
  926. styler.ColourTo(i - 1, StateToPrint);
  927. beforePreProc = state;
  928. if (inScriptType == eNonHtmlScript)
  929. inScriptType = eNonHtmlScriptPreProc;
  930. else
  931. inScriptType = eNonHtmlPreProc;
  932. if (chNext2 == '@') {
  933. i += 2; // place as if it was the second next char treated
  934. visibleChars += 2;
  935. state = SCE_H_ASPAT;
  936. } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
  937. styler.ColourTo(i + 3, SCE_H_ASP);
  938. state = SCE_H_XCCOMMENT;
  939. scriptLanguage = eScriptVBS;
  940. continue;
  941. } else {
  942. if (chNext2 == '=') {
  943. i += 2; // place as if it was the second next char treated
  944. visibleChars += 2;
  945. } else {
  946. i++; // place as if it was the next char treated
  947. visibleChars++;
  948. }
  949. state = StateForScript(aspScript);
  950. }
  951. scriptLanguage = eScriptVBS;
  952. styler.ColourTo(i, SCE_H_ASP);
  953. // fold whole script
  954. if (foldHTMLPreprocessor)
  955. levelCurrent++;
  956. // should be better
  957. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  958. continue;
  959. }
  960. /////////////////////////////////////
  961. // handle the start of SGML language (DTD)
  962. else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
  963. (chPrev == '<') &&
  964. (ch == '!') &&
  965. (StateToPrint != SCE_H_CDATA) &&
  966. (!IsCommentState(StateToPrint)) &&
  967. (!IsScriptCommentState(StateToPrint))) {
  968. beforePreProc = state;
  969. styler.ColourTo(i - 2, StateToPrint);
  970. if ((chNext == '-') && (chNext2 == '-')) {
  971. state = SCE_H_COMMENT; // wait for a pending command
  972. styler.ColourTo(i + 2, SCE_H_COMMENT);
  973. i += 2; // follow styling after the --
  974. } else if (isWordCdata(i + 1, i + 7, styler)) {
  975. state = SCE_H_CDATA;
  976. } else {
  977. styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
  978. scriptLanguage = eScriptSGML;
  979. state = SCE_H_SGML_COMMAND; // wait for a pending command
  980. }
  981. // fold whole tag (-- when closing the tag)
  982. if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
  983. levelCurrent++;
  984. continue;
  985. }
  986. // handle the end of Mako Python code
  987. else if (isMako &&
  988. ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  989. (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
  990. isMakoBlockEnd(ch, chNext, makoBlockType)) {
  991. if (state == SCE_H_ASPAT) {
  992. aspScript = segIsScriptingIndicator(styler,
  993. styler.GetStartSegment(), i - 1, aspScript);
  994. }
  995. if (state == SCE_HP_WORD) {
  996. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
  997. } else {
  998. styler.ColourTo(i - 1, StateToPrint);
  999. }
  1000. if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
  1001. i++;
  1002. visibleChars++;
  1003. }
  1004. else if (0 == strcmp(makoBlockType, "%") && ch == '/') {
  1005. i++;
  1006. visibleChars++;
  1007. }
  1008. if (0 != strcmp(makoBlockType, "%") || ch == '/') {
  1009. styler.ColourTo(i, SCE_H_ASP);
  1010. }
  1011. state = beforePreProc;
  1012. if (inScriptType == eNonHtmlScriptPreProc)
  1013. inScriptType = eNonHtmlScript;
  1014. else
  1015. inScriptType = eHtml;
  1016. scriptLanguage = eScriptNone;
  1017. continue;
  1018. }
  1019. // handle the end of Django template code
  1020. else if (isDjango &&
  1021. ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  1022. (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
  1023. isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
  1024. if (state == SCE_H_ASPAT) {
  1025. aspScript = segIsScriptingIndicator(styler,
  1026. styler.GetStartSegment(), i - 1, aspScript);
  1027. }
  1028. if (state == SCE_HP_WORD) {
  1029. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
  1030. } else {
  1031. styler.ColourTo(i - 1, StateToPrint);
  1032. }
  1033. i += 1;
  1034. visibleChars += 1;
  1035. styler.ColourTo(i, SCE_H_ASP);
  1036. state = beforePreProc;
  1037. if (inScriptType == eNonHtmlScriptPreProc)
  1038. inScriptType = eNonHtmlScript;
  1039. else
  1040. inScriptType = eHtml;
  1041. scriptLanguage = beforeLanguage;
  1042. continue;
  1043. }
  1044. // handle the end of a pre-processor = Non-HTML
  1045. else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  1046. (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
  1047. (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
  1048. ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
  1049. if (state == SCE_H_ASPAT) {
  1050. aspScript = segIsScriptingIndicator(styler,
  1051. styler.GetStartSegment(), i - 1, aspScript);
  1052. }
  1053. // Bounce out of any ASP mode
  1054. switch (state) {
  1055. case SCE_HJ_WORD:
  1056. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  1057. break;
  1058. case SCE_HB_WORD:
  1059. classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  1060. break;
  1061. case SCE_HP_WORD:
  1062. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
  1063. break;
  1064. case SCE_HPHP_WORD:
  1065. classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
  1066. break;
  1067. case SCE_H_XCCOMMENT:
  1068. styler.ColourTo(i - 1, state);
  1069. break;
  1070. default :
  1071. styler.ColourTo(i - 1, StateToPrint);
  1072. break;
  1073. }
  1074. if (scriptLanguage != eScriptSGML) {
  1075. i++;
  1076. visibleChars++;
  1077. }
  1078. if (ch == '%')
  1079. styler.ColourTo(i, SCE_H_ASP);
  1080. else if (scriptLanguage == eScriptXML)
  1081. styler.ColourTo(i, SCE_H_XMLEND);
  1082. else if (scriptLanguage == eScriptSGML)
  1083. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1084. else
  1085. styler.ColourTo(i, SCE_H_QUESTION);
  1086. state = beforePreProc;
  1087. if (inScriptType == eNonHtmlScriptPreProc)
  1088. inScriptType = eNonHtmlScript;
  1089. else
  1090. inScriptType = eHtml;
  1091. // Unfold all scripting languages, except for XML tag
  1092. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  1093. levelCurrent--;
  1094. }
  1095. scriptLanguage = beforeLanguage;
  1096. continue;
  1097. }
  1098. /////////////////////////////////////
  1099. switch (state) {
  1100. case SCE_H_DEFAULT:
  1101. if (ch == '<') {
  1102. // in HTML, fold on tag open and unfold on tag close
  1103. tagOpened = true;
  1104. tagClosing = (chNext == '/');
  1105. styler.ColourTo(i - 1, StateToPrint);
  1106. if (chNext != '!')
  1107. state = SCE_H_TAGUNKNOWN;
  1108. } else if (ch == '&') {
  1109. styler.ColourTo(i - 1, SCE_H_DEFAULT);
  1110. state = SCE_H_ENTITY;
  1111. }
  1112. break;
  1113. case SCE_H_SGML_DEFAULT:
  1114. case SCE_H_SGML_BLOCK_DEFAULT:
  1115. // if (scriptLanguage == eScriptSGMLblock)
  1116. // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
  1117. if (ch == '\"') {
  1118. styler.ColourTo(i - 1, StateToPrint);
  1119. state = SCE_H_SGML_DOUBLESTRING;
  1120. } else if (ch == '\'') {
  1121. styler.ColourTo(i - 1, StateToPrint);
  1122. state = SCE_H_SGML_SIMPLESTRING;
  1123. } else if ((ch == '-') && (chPrev == '-')) {
  1124. if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
  1125. styler.ColourTo(i - 2, StateToPrint);
  1126. }
  1127. state = SCE_H_SGML_COMMENT;
  1128. } else if (IsASCII(ch) && isalpha(ch) && (chPrev == '%')) {
  1129. styler.ColourTo(i - 2, StateToPrint);
  1130. state = SCE_H_SGML_ENTITY;
  1131. } else if (ch == '#') {
  1132. styler.ColourTo(i - 1, StateToPrint);
  1133. state = SCE_H_SGML_SPECIAL;
  1134. } else if (ch == '[') {
  1135. styler.ColourTo(i - 1, StateToPrint);
  1136. scriptLanguage = eScriptSGMLblock;
  1137. state = SCE_H_SGML_BLOCK_DEFAULT;
  1138. } else if (ch == ']') {
  1139. if (scriptLanguage == eScriptSGMLblock) {
  1140. styler.ColourTo(i, StateToPrint);
  1141. scriptLanguage = eScriptSGML;
  1142. } else {
  1143. styler.ColourTo(i - 1, StateToPrint);
  1144. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1145. }
  1146. state = SCE_H_SGML_DEFAULT;
  1147. } else if (scriptLanguage == eScriptSGMLblock) {
  1148. if ((ch == '!') && (chPrev == '<')) {
  1149. styler.ColourTo(i - 2, StateToPrint);
  1150. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1151. state = SCE_H_SGML_COMMAND;
  1152. } else if (ch == '>') {
  1153. styler.ColourTo(i - 1, StateToPrint);
  1154. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1155. }
  1156. }
  1157. break;
  1158. case SCE_H_SGML_COMMAND:
  1159. if ((ch == '-') && (chPrev == '-')) {
  1160. styler.ColourTo(i - 2, StateToPrint);
  1161. state = SCE_H_SGML_COMMENT;
  1162. } else if (!issgmlwordchar(ch)) {
  1163. if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
  1164. styler.ColourTo(i - 1, StateToPrint);
  1165. state = SCE_H_SGML_1ST_PARAM;
  1166. } else {
  1167. state = SCE_H_SGML_ERROR;
  1168. }
  1169. }
  1170. break;
  1171. case SCE_H_SGML_1ST_PARAM:
  1172. // wait for the beginning of the word
  1173. if ((ch == '-') && (chPrev == '-')) {
  1174. if (scriptLanguage == eScriptSGMLblock) {
  1175. styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
  1176. } else {
  1177. styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
  1178. }
  1179. state = SCE_H_SGML_1ST_PARAM_COMMENT;
  1180. } else if (issgmlwordchar(ch)) {
  1181. if (scriptLanguage == eScriptSGMLblock) {
  1182. styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
  1183. } else {
  1184. styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
  1185. }
  1186. // find the length of the word
  1187. int size = 1;
  1188. while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
  1189. size++;
  1190. styler.ColourTo(i + size - 1, StateToPrint);
  1191. i += size - 1;
  1192. visibleChars += size - 1;
  1193. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  1194. if (scriptLanguage == eScriptSGMLblock) {
  1195. state = SCE_H_SGML_BLOCK_DEFAULT;
  1196. } else {
  1197. state = SCE_H_SGML_DEFAULT;
  1198. }
  1199. continue;
  1200. }
  1201. break;
  1202. case SCE_H_SGML_ERROR:
  1203. if ((ch == '-') && (chPrev == '-')) {
  1204. styler.ColourTo(i - 2, StateToPrint);
  1205. state = SCE_H_SGML_COMMENT;
  1206. }
  1207. break;
  1208. case SCE_H_SGML_DOUBLESTRING:
  1209. if (ch == '\"') {
  1210. styler.ColourTo(i, StateToPrint);
  1211. state = SCE_H_SGML_DEFAULT;
  1212. }
  1213. break;
  1214. case SCE_H_SGML_SIMPLESTRING:
  1215. if (ch == '\'') {
  1216. styler.ColourTo(i, StateToPrint);
  1217. state = SCE_H_SGML_DEFAULT;
  1218. }
  1219. break;
  1220. case SCE_H_SGML_COMMENT:
  1221. if ((ch == '-') && (chPrev == '-')) {
  1222. styler.ColourTo(i, StateToPrint);
  1223. state = SCE_H_SGML_DEFAULT;
  1224. }
  1225. break;
  1226. case SCE_H_CDATA:
  1227. if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
  1228. styler.ColourTo(i, StateToPrint);
  1229. state = SCE_H_DEFAULT;
  1230. levelCurrent--;
  1231. }
  1232. break;
  1233. case SCE_H_COMMENT:
  1234. if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
  1235. styler.ColourTo(i, StateToPrint);
  1236. state = SCE_H_DEFAULT;
  1237. levelCurrent--;
  1238. }
  1239. break;
  1240. case SCE_H_SGML_1ST_PARAM_COMMENT:
  1241. if ((ch == '-') && (chPrev == '-')) {
  1242. styler.ColourTo(i, SCE_H_SGML_COMMENT);
  1243. state = SCE_H_SGML_1ST_PARAM;
  1244. }
  1245. break;
  1246. case SCE_H_SGML_SPECIAL:
  1247. if (!(IsASCII(ch) && isupper(ch))) {
  1248. styler.ColourTo(i - 1, StateToPrint);
  1249. if (isalnum(ch)) {
  1250. state = SCE_H_SGML_ERROR;
  1251. } else {
  1252. state = SCE_H_SGML_DEFAULT;
  1253. }
  1254. }
  1255. break;
  1256. case SCE_H_SGML_ENTITY:
  1257. if (ch == ';') {
  1258. styler.ColourTo(i, StateToPrint);
  1259. state = SCE_H_SGML_DEFAULT;
  1260. } else if (!(IsASCII(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
  1261. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1262. state = SCE_H_SGML_DEFAULT;
  1263. }
  1264. break;
  1265. case SCE_H_ENTITY:
  1266. if (ch == ';') {
  1267. styler.ColourTo(i, StateToPrint);
  1268. state = SCE_H_DEFAULT;
  1269. }
  1270. if (ch != '#' && !(IsASCII(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
  1271. && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
  1272. if (!IsASCII(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
  1273. styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
  1274. else
  1275. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  1276. state = SCE_H_DEFAULT;
  1277. }
  1278. break;
  1279. case SCE_H_TAGUNKNOWN:
  1280. if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
  1281. int eClass = classifyTagHTML(styler.GetStartSegment(),
  1282. i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
  1283. if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
  1284. if (!tagClosing) {
  1285. inScriptType = eNonHtmlScript;
  1286. scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
  1287. } else {
  1288. scriptLanguage = eScriptNone;
  1289. }
  1290. eClass = SCE_H_TAG;
  1291. }
  1292. if (ch == '>') {
  1293. styler.ColourTo(i, eClass);
  1294. if (inScriptType == eNonHtmlScript) {
  1295. state = StateForScript(scriptLanguage);
  1296. } else {
  1297. state = SCE_H_DEFAULT;
  1298. }
  1299. tagOpened = false;
  1300. if (!tagDontFold) {
  1301. if (tagClosing) {
  1302. levelCurrent--;
  1303. } else {
  1304. levelCurrent++;
  1305. }
  1306. }
  1307. tagClosing = false;
  1308. } else if (ch == '/' && chNext == '>') {
  1309. if (eClass == SCE_H_TAGUNKNOWN) {
  1310. styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
  1311. } else {
  1312. styler.ColourTo(i - 1, StateToPrint);
  1313. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1314. }
  1315. i++;
  1316. ch = chNext;
  1317. state = SCE_H_DEFAULT;
  1318. tagOpened = false;
  1319. } else {
  1320. if (eClass != SCE_H_TAGUNKNOWN) {
  1321. if (eClass == SCE_H_SGML_DEFAULT) {
  1322. state = SCE_H_SGML_DEFAULT;
  1323. } else {
  1324. state = SCE_H_OTHER;
  1325. }
  1326. }
  1327. }
  1328. }
  1329. break;
  1330. case SCE_H_ATTRIBUTE:
  1331. if (!setAttributeContinue.Contains(ch)) {
  1332. if (inScriptType == eNonHtmlScript) {
  1333. int scriptLanguagePrev = scriptLanguage;
  1334. clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
  1335. scriptLanguage = clientScript;
  1336. if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
  1337. inScriptType = eHtml;
  1338. }
  1339. classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
  1340. if (ch == '>') {
  1341. styler.ColourTo(i, SCE_H_TAG);
  1342. if (inScriptType == eNonHtmlScript) {
  1343. state = StateForScript(scriptLanguage);
  1344. } else {
  1345. state = SCE_H_DEFAULT;
  1346. }
  1347. tagOpened = false;
  1348. if (!tagDontFold) {
  1349. if (tagClosing) {
  1350. levelCurrent--;
  1351. } else {
  1352. levelCurrent++;
  1353. }
  1354. }
  1355. tagClosing = false;
  1356. } else if (ch == '=') {
  1357. styler.ColourTo(i, SCE_H_OTHER);
  1358. state = SCE_H_VALUE;
  1359. } else {
  1360. state = SCE_H_OTHER;
  1361. }
  1362. }
  1363. break;
  1364. case SCE_H_OTHER:
  1365. if (ch == '>') {
  1366. styler.ColourTo(i - 1, StateToPrint);
  1367. styler.ColourTo(i, SCE_H_TAG);
  1368. if (inScriptType == eNonHtmlScript) {
  1369. state = StateForScript(scriptLanguage);
  1370. } else {
  1371. state = SCE_H_DEFAULT;
  1372. }
  1373. tagOpened = false;
  1374. if (!tagDontFold) {
  1375. if (tagClosing) {
  1376. levelCurrent--;
  1377. } else {
  1378. levelCurrent++;
  1379. }
  1380. }
  1381. tagClosing = false;
  1382. } else if (ch == '\"') {
  1383. styler.ColourTo(i - 1, StateToPrint);
  1384. state = SCE_H_DOUBLESTRING;
  1385. } else if (ch == '\'') {
  1386. styler.ColourTo(i - 1, StateToPrint);
  1387. state = SCE_H_SINGLESTRING;
  1388. } else if (ch == '=') {
  1389. styler.ColourTo(i, StateToPrint);
  1390. state = SCE_H_VALUE;
  1391. } else if (ch == '/' && chNext == '>') {
  1392. styler.ColourTo(i - 1, StateToPrint);
  1393. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1394. i++;
  1395. ch = chNext;
  1396. state = SCE_H_DEFAULT;
  1397. tagOpened = false;
  1398. } else if (ch == '?' && chNext == '>') {
  1399. styler.ColourTo(i - 1, StateToPrint);
  1400. styler.ColourTo(i + 1, SCE_H_XMLEND);
  1401. i++;
  1402. ch = chNext;
  1403. state = SCE_H_DEFAULT;
  1404. } else if (setHTMLWord.Contains(ch)) {
  1405. styler.ColourTo(i - 1, StateToPrint);
  1406. state = SCE_H_ATTRIBUTE;
  1407. }
  1408. break;
  1409. case SCE_H_DOUBLESTRING:
  1410. if (ch == '\"') {
  1411. if (inScriptType == eNonHtmlScript) {
  1412. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1413. }
  1414. styler.ColourTo(i, SCE_H_DOUBLESTRING);
  1415. state = SCE_H_OTHER;
  1416. }
  1417. break;
  1418. case SCE_H_SINGLESTRING:
  1419. if (ch == '\'') {
  1420. if (inScriptType == eNonHtmlScript) {
  1421. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1422. }
  1423. styler.ColourTo(i, SCE_H_SINGLESTRING);
  1424. state = SCE_H_OTHER;
  1425. }
  1426. break;
  1427. case SCE_H_VALUE:
  1428. if (!setHTMLWord.Contains(ch)) {
  1429. if (ch == '\"' && chPrev == '=') {
  1430. // Should really test for being first character
  1431. state = SCE_H_DOUBLESTRING;
  1432. } else if (ch == '\'' && chPrev == '=') {
  1433. state = SCE_H_SINGLESTRING;
  1434. } else {
  1435. if (IsNumber(styler.GetStartSegment(), styler)) {
  1436. styler.ColourTo(i - 1, SCE_H_NUMBER);
  1437. } else {
  1438. styler.ColourTo(i - 1, StateToPrint);
  1439. }
  1440. if (ch == '>') {
  1441. styler.ColourTo(i, SCE_H_TAG);
  1442. if (inScriptType == eNonHtmlScript) {
  1443. state = StateForScript(scriptLanguage);
  1444. } else {
  1445. state = SCE_H_DEFAULT;
  1446. }
  1447. tagOpened = false;
  1448. if (!tagDontFold) {
  1449. if (tagClosing) {
  1450. levelCurrent--;
  1451. } else {
  1452. levelCurrent++;
  1453. }
  1454. }
  1455. tagClosing = false;
  1456. } else {
  1457. state = SCE_H_OTHER;
  1458. }
  1459. }
  1460. }
  1461. break;
  1462. case SCE_HJ_DEFAULT:
  1463. case SCE_HJ_START:
  1464. case SCE_HJ_SYMBOLS:
  1465. if (IsAWordStart(ch)) {
  1466. styler.ColourTo(i - 1, StateToPrint);
  1467. state = SCE_HJ_WORD;
  1468. } else if (ch == '/' && chNext == '*') {

Large files files are truncated, but you can click here to view the full file