PageRenderTime 58ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/src/scintilla/lexers/LexHTML.cxx

https://code.google.com/p/scite-ru-hg/
C++ | 2076 lines | 1854 code | 113 blank | 109 comment | 1402 complexity | 6ab042c713f34433f762dbff61538bca MD5 | raw file
Possible License(s): 0BSD

Large files files are truncated, but you can click here to view the full file

  1. // Scintilla source code edit control
  2. /** @file LexHTML.cxx
  3. ** Lexer for HTML.
  4. **/
  5. // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <stdio.h>
  10. #include <stdarg.h>
  11. #include <assert.h>
  12. #include <ctype.h>
  13. #include "ILexer.h"
  14. #include "Scintilla.h"
  15. #include "SciLexer.h"
  16. #include "WordList.h"
  17. #include "LexAccessor.h"
  18. #include "Accessor.h"
  19. #include "StyleContext.h"
  20. #include "CharacterSet.h"
  21. #include "LexerModule.h"
  22. #ifdef SCI_NAMESPACE
  23. using namespace Scintilla;
  24. #endif
  25. #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  26. #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  27. #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  28. enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  29. enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  30. static inline bool IsAWordChar(const int ch) {
  31. return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  32. }
  33. static inline bool IsAWordStart(const int ch) {
  34. return (ch < 0x80) && (isalnum(ch) || ch == '_');
  35. }
  36. inline bool IsOperator(int ch) {
  37. if (isascii(ch) && isalnum(ch))
  38. return false;
  39. // '.' left out as it is used to make up numbers
  40. if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  41. ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  42. ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  43. ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  44. ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  45. ch == '?' || ch == '!' || ch == '.' || ch == '~')
  46. return true;
  47. return false;
  48. }
  49. static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  50. unsigned int i = 0;
  51. for (; (i < end - start + 1) && (i < len-1); i++) {
  52. s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  53. }
  54. s[i] = '\0';
  55. }
  56. static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  57. unsigned int i = 0;
  58. for (; i < sLen-1; i++) {
  59. char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  60. if ((i == 0) && !IsAWordStart(ch))
  61. break;
  62. if ((i > 0) && !IsAWordChar(ch))
  63. break;
  64. s[i] = ch;
  65. }
  66. s[i] = '\0';
  67. return s;
  68. }
  69. static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  70. char s[100];
  71. GetTextSegment(styler, start, end, s, sizeof(s));
  72. //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  73. if (strstr(s, "src")) // External script
  74. return eScriptNone;
  75. if (strstr(s, "vbs"))
  76. return eScriptVBS;
  77. if (strstr(s, "pyth"))
  78. return eScriptPython;
  79. if (strstr(s, "javas"))
  80. return eScriptJS;
  81. if (strstr(s, "jscr"))
  82. return eScriptJS;
  83. if (strstr(s, "php"))
  84. return eScriptPHP;
  85. if (strstr(s, "xml")) {
  86. const char *xml = strstr(s, "xml");
  87. for (const char *t=s; t<xml; t++) {
  88. if (!IsASpace(*t)) {
  89. return prevValue;
  90. }
  91. }
  92. return eScriptXML;
  93. }
  94. return prevValue;
  95. }
  96. static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
  97. int iResult = 0;
  98. char s[100];
  99. GetTextSegment(styler, start, end, s, sizeof(s));
  100. if (0 == strncmp(s, "php", 3)) {
  101. iResult = 3;
  102. }
  103. return iResult;
  104. }
  105. static script_type ScriptOfState(int state) {
  106. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  107. return eScriptPython;
  108. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  109. return eScriptVBS;
  110. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  111. return eScriptJS;
  112. } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
  113. return eScriptPHP;
  114. } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
  115. return eScriptSGML;
  116. } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
  117. return eScriptSGMLblock;
  118. } else {
  119. return eScriptNone;
  120. }
  121. }
  122. static int statePrintForState(int state, script_mode inScriptType) {
  123. int StateToPrint = state;
  124. if (state >= SCE_HJ_START) {
  125. if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  126. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
  127. } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  128. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
  129. } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  130. StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
  131. }
  132. }
  133. return StateToPrint;
  134. }
  135. static int stateForPrintState(int StateToPrint) {
  136. int state;
  137. if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
  138. state = StateToPrint - SCE_HA_PYTHON;
  139. } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
  140. state = StateToPrint - SCE_HA_VBS;
  141. } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
  142. state = StateToPrint - SCE_HA_JS;
  143. } else {
  144. state = StateToPrint;
  145. }
  146. return state;
  147. }
  148. static inline bool IsNumber(unsigned int start, Accessor &styler) {
  149. return IsADigit(styler[start]) || (styler[start] == '.') ||
  150. (styler[start] == '-') || (styler[start] == '#');
  151. }
  152. static inline bool isStringState(int state) {
  153. bool bResult;
  154. switch (state) {
  155. case SCE_HJ_DOUBLESTRING:
  156. case SCE_HJ_SINGLESTRING:
  157. case SCE_HJA_DOUBLESTRING:
  158. case SCE_HJA_SINGLESTRING:
  159. case SCE_HB_STRING:
  160. case SCE_HBA_STRING:
  161. case SCE_HP_STRING:
  162. case SCE_HP_CHARACTER:
  163. case SCE_HP_TRIPLE:
  164. case SCE_HP_TRIPLEDOUBLE:
  165. case SCE_HPA_STRING:
  166. case SCE_HPA_CHARACTER:
  167. case SCE_HPA_TRIPLE:
  168. case SCE_HPA_TRIPLEDOUBLE:
  169. case SCE_HPHP_HSTRING:
  170. case SCE_HPHP_SIMPLESTRING:
  171. case SCE_HPHP_HSTRING_VARIABLE:
  172. case SCE_HPHP_COMPLEX_VARIABLE:
  173. bResult = true;
  174. break;
  175. default :
  176. bResult = false;
  177. break;
  178. }
  179. return bResult;
  180. }
  181. static inline bool stateAllowsTermination(int state) {
  182. bool allowTermination = !isStringState(state);
  183. if (allowTermination) {
  184. switch (state) {
  185. case SCE_HB_COMMENTLINE:
  186. case SCE_HPHP_COMMENT:
  187. case SCE_HP_COMMENTLINE:
  188. case SCE_HPA_COMMENTLINE:
  189. allowTermination = false;
  190. }
  191. }
  192. return allowTermination;
  193. }
  194. // not really well done, since it's only comments that should lex the %> and <%
  195. static inline bool isCommentASPState(int state) {
  196. bool bResult;
  197. switch (state) {
  198. case SCE_HJ_COMMENT:
  199. case SCE_HJ_COMMENTLINE:
  200. case SCE_HJ_COMMENTDOC:
  201. case SCE_HB_COMMENTLINE:
  202. case SCE_HP_COMMENTLINE:
  203. case SCE_HPHP_COMMENT:
  204. case SCE_HPHP_COMMENTLINE:
  205. bResult = true;
  206. break;
  207. default :
  208. bResult = false;
  209. break;
  210. }
  211. return bResult;
  212. }
  213. static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  214. bool wordIsNumber = IsNumber(start, styler);
  215. char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
  216. if (wordIsNumber) {
  217. chAttr = SCE_H_NUMBER;
  218. } else {
  219. char s[100];
  220. GetTextSegment(styler, start, end, s, sizeof(s));
  221. if (keywords.InList(s))
  222. chAttr = SCE_H_ATTRIBUTE;
  223. }
  224. if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
  225. // No keywords -> all are known
  226. chAttr = SCE_H_ATTRIBUTE;
  227. styler.ColourTo(end, chAttr);
  228. }
  229. static int classifyTagHTML(unsigned int start, unsigned int end,
  230. WordList &keywords, Accessor &styler, bool &tagDontFold,
  231. bool caseSensitive, bool isXml, bool allowScripts) {
  232. char s[30 + 2];
  233. // Copy after the '<'
  234. unsigned int i = 0;
  235. for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
  236. char ch = styler[cPos];
  237. if ((ch != '<') && (ch != '/')) {
  238. s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
  239. }
  240. }
  241. //The following is only a quick hack, to see if this whole thing would work
  242. //we first need the tagname with a trailing space...
  243. s[i] = ' ';
  244. s[i+1] = '\0';
  245. // if the current language is XML, I can fold any tag
  246. // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
  247. //...to find it in the list of no-container-tags
  248. tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
  249. //now we can remove the trailing space
  250. s[i] = '\0';
  251. // No keywords -> all are known
  252. char chAttr = SCE_H_TAGUNKNOWN;
  253. if (s[0] == '!') {
  254. chAttr = SCE_H_SGML_DEFAULT;
  255. } else if (!keywords || keywords.InList(s)) {
  256. chAttr = SCE_H_TAG;
  257. }
  258. styler.ColourTo(end, chAttr);
  259. if (chAttr == SCE_H_TAG) {
  260. if (allowScripts && 0 == strcmp(s, "script")) {
  261. // check to see if this is a self-closing tag by sniffing ahead
  262. bool isSelfClose = false;
  263. for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
  264. char ch = styler.SafeGetCharAt(cPos, '\0');
  265. if (ch == '\0' || ch == '>')
  266. break;
  267. else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
  268. isSelfClose = true;
  269. break;
  270. }
  271. }
  272. // do not enter a script state if the tag self-closed
  273. if (!isSelfClose)
  274. chAttr = SCE_H_SCRIPT;
  275. } else if (!isXml && 0 == strcmp(s, "comment")) {
  276. chAttr = SCE_H_COMMENT;
  277. }
  278. }
  279. return chAttr;
  280. }
  281. static void classifyWordHTJS(unsigned int start, unsigned int end,
  282. WordList &keywords, Accessor &styler, script_mode inScriptType) {
  283. char s[30 + 1];
  284. unsigned int i = 0;
  285. for (; i < end - start + 1 && i < 30; i++) {
  286. s[i] = styler[start + i];
  287. }
  288. s[i] = '\0';
  289. char chAttr = SCE_HJ_WORD;
  290. bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
  291. if (wordIsNumber) {
  292. chAttr = SCE_HJ_NUMBER;
  293. } else if (keywords.InList(s)) {
  294. chAttr = SCE_HJ_KEYWORD;
  295. }
  296. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  297. }
  298. static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
  299. char chAttr = SCE_HB_IDENTIFIER;
  300. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
  301. if (wordIsNumber)
  302. chAttr = SCE_HB_NUMBER;
  303. else {
  304. char s[100];
  305. GetTextSegment(styler, start, end, s, sizeof(s));
  306. if (keywords.InList(s)) {
  307. chAttr = SCE_HB_WORD;
  308. if (strcmp(s, "rem") == 0)
  309. chAttr = SCE_HB_COMMENTLINE;
  310. }
  311. }
  312. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  313. if (chAttr == SCE_HB_COMMENTLINE)
  314. return SCE_HB_COMMENTLINE;
  315. else
  316. return SCE_HB_DEFAULT;
  317. }
  318. static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType, bool isMako) {
  319. bool wordIsNumber = IsADigit(styler[start]);
  320. char s[30 + 1];
  321. unsigned int i = 0;
  322. for (; i < end - start + 1 && i < 30; i++) {
  323. s[i] = styler[start + i];
  324. }
  325. s[i] = '\0';
  326. char chAttr = SCE_HP_IDENTIFIER;
  327. if (0 == strcmp(prevWord, "class"))
  328. chAttr = SCE_HP_CLASSNAME;
  329. else if (0 == strcmp(prevWord, "def"))
  330. chAttr = SCE_HP_DEFNAME;
  331. else if (wordIsNumber)
  332. chAttr = SCE_HP_NUMBER;
  333. else if (keywords.InList(s))
  334. chAttr = SCE_HP_WORD;
  335. else if (isMako && 0 == strcmp(s, "block"))
  336. chAttr = SCE_HP_WORD;
  337. styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
  338. strcpy(prevWord, s);
  339. }
  340. // Update the word colour to default or keyword
  341. // Called when in a PHP word
  342. static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  343. char chAttr = SCE_HPHP_DEFAULT;
  344. bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
  345. if (wordIsNumber)
  346. chAttr = SCE_HPHP_NUMBER;
  347. else {
  348. char s[100];
  349. GetTextSegment(styler, start, end, s, sizeof(s));
  350. if (keywords.InList(s))
  351. chAttr = SCE_HPHP_WORD;
  352. }
  353. styler.ColourTo(end, chAttr);
  354. }
  355. static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
  356. char s[30 + 1];
  357. unsigned int i = 0;
  358. for (; i < end - start + 1 && i < 30; i++) {
  359. s[i] = styler[start + i];
  360. }
  361. s[i] = '\0';
  362. return keywords.InList(s);
  363. }
  364. static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
  365. char s[30 + 1];
  366. unsigned int i = 0;
  367. for (; i < end - start + 1 && i < 30; i++) {
  368. s[i] = styler[start + i];
  369. }
  370. s[i] = '\0';
  371. return (0 == strcmp(s, "[CDATA["));
  372. }
  373. // Return the first state to reach when entering a scripting language
  374. static int StateForScript(script_type scriptLanguage) {
  375. int Result;
  376. switch (scriptLanguage) {
  377. case eScriptVBS:
  378. Result = SCE_HB_START;
  379. break;
  380. case eScriptPython:
  381. Result = SCE_HP_START;
  382. break;
  383. case eScriptPHP:
  384. Result = SCE_HPHP_DEFAULT;
  385. break;
  386. case eScriptXML:
  387. Result = SCE_H_TAGUNKNOWN;
  388. break;
  389. case eScriptSGML:
  390. Result = SCE_H_SGML_DEFAULT;
  391. break;
  392. case eScriptComment:
  393. Result = SCE_H_COMMENT;
  394. break;
  395. default :
  396. Result = SCE_HJ_START;
  397. break;
  398. }
  399. return Result;
  400. }
  401. static inline bool ishtmlwordchar(int ch) {
  402. return !isascii(ch) ||
  403. (isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
  404. }
  405. static inline bool issgmlwordchar(int ch) {
  406. return !isascii(ch) ||
  407. (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
  408. }
  409. static inline bool IsPhpWordStart(int ch) {
  410. return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
  411. }
  412. static inline bool IsPhpWordChar(int ch) {
  413. return IsADigit(ch) || IsPhpWordStart(ch);
  414. }
  415. static bool InTagState(int state) {
  416. return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
  417. state == SCE_H_SCRIPT ||
  418. state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
  419. state == SCE_H_NUMBER || state == SCE_H_OTHER ||
  420. state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
  421. }
  422. static bool IsCommentState(const int state) {
  423. return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
  424. }
  425. static bool IsScriptCommentState(const int state) {
  426. return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
  427. state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
  428. }
  429. static bool isLineEnd(int ch) {
  430. return ch == '\r' || ch == '\n';
  431. }
  432. static bool isOKBeforeRE(int ch) {
  433. return (ch == '(') || (ch == '=') || (ch == ',');
  434. }
  435. static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
  436. if (strlen(blockType) == 0) {
  437. return ((ch == '%') && (chNext == '>'));
  438. } else if ((0 == strcmp(blockType, "inherit")) ||
  439. (0 == strcmp(blockType, "namespace")) ||
  440. (0 == strcmp(blockType, "include")) ||
  441. (0 == strcmp(blockType, "page"))) {
  442. return ((ch == '/') && (chNext == '>'));
  443. } else if (0 == strcmp(blockType, "%")) {
  444. if (ch == '/' && isLineEnd(chNext))
  445. return 1;
  446. else
  447. return isLineEnd(ch);
  448. } else if (0 == strcmp(blockType, "{")) {
  449. return ch == '}';
  450. } else {
  451. return (ch == '>');
  452. }
  453. }
  454. static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
  455. if (strlen(blockType) == 0) {
  456. return 0;
  457. } else if (0 == strcmp(blockType, "%")) {
  458. return ((ch == '%') && (chNext == '}'));
  459. } else if (0 == strcmp(blockType, "{")) {
  460. return ((ch == '}') && (chNext == '}'));
  461. } else {
  462. return 0;
  463. }
  464. }
  465. static bool isPHPStringState(int state) {
  466. return
  467. (state == SCE_HPHP_HSTRING) ||
  468. (state == SCE_HPHP_SIMPLESTRING) ||
  469. (state == SCE_HPHP_HSTRING_VARIABLE) ||
  470. (state == SCE_HPHP_COMPLEX_VARIABLE);
  471. }
  472. static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
  473. int j;
  474. const int beginning = i - 1;
  475. bool isValidSimpleString = false;
  476. while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
  477. i++;
  478. char ch = styler.SafeGetCharAt(i);
  479. const char chNext = styler.SafeGetCharAt(i + 1);
  480. if (!IsPhpWordStart(ch)) {
  481. if (ch == '\'' && IsPhpWordStart(chNext)) {
  482. i++;
  483. ch = chNext;
  484. isSimpleString = true;
  485. } else {
  486. phpStringDelimiter[0] = '\0';
  487. return beginning;
  488. }
  489. }
  490. phpStringDelimiter[0] = ch;
  491. i++;
  492. for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
  493. if (!IsPhpWordChar(styler[j])) {
  494. if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
  495. isValidSimpleString = true;
  496. j++;
  497. break;
  498. } else {
  499. phpStringDelimiter[0] = '\0';
  500. return beginning;
  501. }
  502. }
  503. if (j - i < phpStringDelimiterSize - 2)
  504. phpStringDelimiter[j-i+1] = styler[j];
  505. else
  506. i++;
  507. }
  508. if (isSimpleString && !isValidSimpleString) {
  509. phpStringDelimiter[0] = '\0';
  510. return beginning;
  511. }
  512. phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
  513. return j - 1;
  514. }
  515. static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
  516. Accessor &styler, bool isXml) {
  517. WordList &keywords = *keywordlists[0];
  518. WordList &keywords2 = *keywordlists[1];
  519. WordList &keywords3 = *keywordlists[2];
  520. WordList &keywords4 = *keywordlists[3];
  521. WordList &keywords5 = *keywordlists[4];
  522. WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
  523. // Lexer for HTML requires more lexical states (8 bits worth) than most lexers
  524. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  525. char prevWord[200];
  526. prevWord[0] = '\0';
  527. char phpStringDelimiter[200]; // PHP is not limited in length, we are
  528. phpStringDelimiter[0] = '\0';
  529. int StateToPrint = initStyle;
  530. int state = stateForPrintState(StateToPrint);
  531. char makoBlockType[200];
  532. makoBlockType[0] = '\0';
  533. int makoComment = 0;
  534. char djangoBlockType[2];
  535. djangoBlockType[0] = '\0';
  536. // If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
  537. if (InTagState(state)) {
  538. while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
  539. startPos--;
  540. length++;
  541. }
  542. state = SCE_H_DEFAULT;
  543. }
  544. // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
  545. if (isPHPStringState(state)) {
  546. while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
  547. startPos--;
  548. length++;
  549. state = styler.StyleAt(startPos);
  550. }
  551. if (startPos == 0)
  552. state = SCE_H_DEFAULT;
  553. }
  554. styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
  555. int lineCurrent = styler.GetLine(startPos);
  556. int lineState;
  557. if (lineCurrent > 0) {
  558. lineState = styler.GetLineState(lineCurrent-1);
  559. } else {
  560. // Default client and ASP scripting language is JavaScript
  561. lineState = eScriptJS << 8;
  562. // property asp.default.language
  563. // Script in ASP code is initially assumed to be in JavaScript.
  564. // To change this to VBScript set asp.default.language to 2. Python is 3.
  565. lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
  566. }
  567. script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
  568. bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
  569. bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
  570. bool tagDontFold = false; //some HTML tags should not be folded
  571. script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
  572. script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
  573. int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
  574. script_type scriptLanguage = ScriptOfState(state);
  575. // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
  576. if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
  577. scriptLanguage = eScriptComment;
  578. }
  579. script_type beforeLanguage = ScriptOfState(beforePreProc);
  580. // property fold.html
  581. // Folding is turned on or off for HTML and XML files with this option.
  582. // The fold option must also be on for folding to occur.
  583. const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
  584. const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
  585. // property fold.html.preprocessor
  586. // Folding is turned on or off for scripts embedded in HTML files with this option.
  587. // The default is on.
  588. const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
  589. const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
  590. // property fold.hypertext.comment
  591. // Allow folding for comments in scripts embedded in HTML.
  592. // The default is off.
  593. const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
  594. // property fold.hypertext.heredoc
  595. // Allow folding for heredocs in scripts embedded in HTML.
  596. // The default is off.
  597. const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
  598. // property html.tags.case.sensitive
  599. // For XML and HTML, setting this property to 1 will make tags match in a case
  600. // sensitive way which is the expected behaviour for XML and XHTML.
  601. const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
  602. // property lexer.xml.allow.scripts
  603. // Set to 0 to disable scripts in XML.
  604. const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
  605. // property lexer.html.mako
  606. // Set to 1 to enable the mako template language.
  607. const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
  608. // property lexer.html.django
  609. // Set to 1 to enable the django template language.
  610. const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
  611. const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
  612. const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
  613. const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
  614. int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
  615. int levelCurrent = levelPrev;
  616. int visibleChars = 0;
  617. int lineStartVisibleChars = 0;
  618. int chPrev = ' ';
  619. int ch = ' ';
  620. int chPrevNonWhite = ' ';
  621. // look back to set chPrevNonWhite properly for better regex colouring
  622. if (scriptLanguage == eScriptJS && startPos > 0) {
  623. int back = startPos;
  624. int style = 0;
  625. while (--back) {
  626. style = styler.StyleAt(back);
  627. if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
  628. // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
  629. break;
  630. }
  631. if (style == SCE_HJ_SYMBOLS) {
  632. chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
  633. }
  634. }
  635. styler.StartSegment(startPos);
  636. const int lengthDoc = startPos + length;
  637. for (int i = startPos; i < lengthDoc; i++) {
  638. const int chPrev2 = chPrev;
  639. chPrev = ch;
  640. if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
  641. state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
  642. chPrevNonWhite = ch;
  643. ch = static_cast<unsigned char>(styler[i]);
  644. int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
  645. const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
  646. // Handle DBCS codepages
  647. if (styler.IsLeadByte(static_cast<char>(ch))) {
  648. chPrev = ' ';
  649. i += 1;
  650. continue;
  651. }
  652. if ((!IsASpace(ch) || !foldCompact) && fold)
  653. visibleChars++;
  654. if (!IsASpace(ch))
  655. lineStartVisibleChars++;
  656. // decide what is the current state to print (depending of the script tag)
  657. StateToPrint = statePrintForState(state, inScriptType);
  658. // handle script folding
  659. if (fold) {
  660. switch (scriptLanguage) {
  661. case eScriptJS:
  662. case eScriptPHP:
  663. //not currently supported case eScriptVBS:
  664. if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
  665. //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
  666. //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
  667. if (ch == '#') {
  668. int j = i + 1;
  669. while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
  670. j++;
  671. }
  672. if (styler.Match(j, "region") || styler.Match(j, "if")) {
  673. levelCurrent++;
  674. } else if (styler.Match(j, "end")) {
  675. levelCurrent--;
  676. }
  677. } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
  678. levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
  679. }
  680. } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
  681. levelCurrent--;
  682. }
  683. break;
  684. case eScriptPython:
  685. if (state != SCE_HP_COMMENTLINE && !isMako) {
  686. if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
  687. levelCurrent++;
  688. } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
  689. // check if the number of tabs is lower than the level
  690. int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
  691. for (int j = 0; Findlevel > 0; j++) {
  692. char chTmp = styler.SafeGetCharAt(i + j + 1);
  693. if (chTmp == '\t') {
  694. Findlevel -= 8;
  695. } else if (chTmp == ' ') {
  696. Findlevel--;
  697. } else {
  698. break;
  699. }
  700. }
  701. if (Findlevel > 0) {
  702. levelCurrent -= Findlevel / 8;
  703. if (Findlevel % 8)
  704. levelCurrent--;
  705. }
  706. }
  707. }
  708. break;
  709. default:
  710. break;
  711. }
  712. }
  713. if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
  714. // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
  715. // Avoid triggering two times on Dos/Win
  716. // New line -> record any line state onto /next/ line
  717. if (fold) {
  718. int lev = levelPrev;
  719. if (visibleChars == 0)
  720. lev |= SC_FOLDLEVELWHITEFLAG;
  721. if ((levelCurrent > levelPrev) && (visibleChars > 0))
  722. lev |= SC_FOLDLEVELHEADERFLAG;
  723. styler.SetLevel(lineCurrent, lev);
  724. visibleChars = 0;
  725. levelPrev = levelCurrent;
  726. }
  727. styler.SetLineState(lineCurrent,
  728. ((inScriptType & 0x03) << 0) |
  729. ((tagOpened & 0x01) << 2) |
  730. ((tagClosing & 0x01) << 3) |
  731. ((aspScript & 0x0F) << 4) |
  732. ((clientScript & 0x0F) << 8) |
  733. ((beforePreProc & 0xFF) << 12));
  734. lineCurrent++;
  735. lineStartVisibleChars = 0;
  736. }
  737. // handle start of Mako comment line
  738. if (isMako && ch == '#' && chNext == '#') {
  739. makoComment = 1;
  740. }
  741. // handle end of Mako comment line
  742. else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
  743. makoComment = 0;
  744. styler.ColourTo(i, SCE_HP_COMMENTLINE);
  745. state = SCE_HP_DEFAULT;
  746. }
  747. // Allow falling through to mako handling code if newline is going to end a block
  748. if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
  749. (!isMako || (0 != strcmp(makoBlockType, "%")))) {
  750. }
  751. // generic end of script processing
  752. else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
  753. // Check if it's the end of the script tag (or any other HTML tag)
  754. switch (state) {
  755. // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
  756. case SCE_H_DOUBLESTRING:
  757. case SCE_H_SINGLESTRING:
  758. case SCE_HJ_COMMENT:
  759. case SCE_HJ_COMMENTDOC:
  760. //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
  761. // the end of script marker from some JS interpreters.
  762. case SCE_HB_COMMENTLINE:
  763. case SCE_HBA_COMMENTLINE:
  764. case SCE_HJ_DOUBLESTRING:
  765. case SCE_HJ_SINGLESTRING:
  766. case SCE_HJ_REGEX:
  767. case SCE_HB_STRING:
  768. case SCE_HBA_STRING:
  769. case SCE_HP_STRING:
  770. case SCE_HP_TRIPLE:
  771. case SCE_HP_TRIPLEDOUBLE:
  772. case SCE_HPHP_HSTRING:
  773. case SCE_HPHP_SIMPLESTRING:
  774. case SCE_HPHP_COMMENT:
  775. case SCE_HPHP_COMMENTLINE:
  776. break;
  777. default :
  778. // check if the closing tag is a script tag
  779. if (const char *tag =
  780. state == SCE_HJ_COMMENTLINE || isXml ? "script" :
  781. state == SCE_H_COMMENT ? "comment" : 0) {
  782. int j = i + 2;
  783. int chr;
  784. do {
  785. chr = static_cast<int>(*tag++);
  786. } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
  787. if (chr != 0) break;
  788. }
  789. // closing tag of the script (it's a closing HTML tag anyway)
  790. styler.ColourTo(i - 1, StateToPrint);
  791. state = SCE_H_TAGUNKNOWN;
  792. inScriptType = eHtml;
  793. scriptLanguage = eScriptNone;
  794. clientScript = eScriptJS;
  795. i += 2;
  796. visibleChars += 2;
  797. tagClosing = true;
  798. continue;
  799. }
  800. }
  801. /////////////////////////////////////
  802. // handle the start of PHP pre-processor = Non-HTML
  803. else if ((state != SCE_H_ASPAT) &&
  804. !isPHPStringState(state) &&
  805. (state != SCE_HPHP_COMMENT) &&
  806. (state != SCE_HPHP_COMMENTLINE) &&
  807. (ch == '<') &&
  808. (chNext == '?') &&
  809. !IsScriptCommentState(state)) {
  810. beforeLanguage = scriptLanguage;
  811. scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
  812. if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
  813. styler.ColourTo(i - 1, StateToPrint);
  814. beforePreProc = state;
  815. i++;
  816. visibleChars++;
  817. i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
  818. if (scriptLanguage == eScriptXML)
  819. styler.ColourTo(i, SCE_H_XMLSTART);
  820. else
  821. styler.ColourTo(i, SCE_H_QUESTION);
  822. state = StateForScript(scriptLanguage);
  823. if (inScriptType == eNonHtmlScript)
  824. inScriptType = eNonHtmlScriptPreProc;
  825. else
  826. inScriptType = eNonHtmlPreProc;
  827. // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
  828. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  829. levelCurrent++;
  830. }
  831. // should be better
  832. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  833. continue;
  834. }
  835. // handle the start Mako template Python code
  836. else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
  837. (lineStartVisibleChars == 1 && ch == '%') ||
  838. (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
  839. (ch == '$' && chNext == '{') ||
  840. (ch == '<' && chNext == '/' && chNext2 == '%'))) {
  841. if (ch == '%' || ch == '/')
  842. strcpy(makoBlockType, "%");
  843. else if (ch == '$')
  844. strcpy(makoBlockType, "{");
  845. else if (chNext == '/')
  846. GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
  847. else
  848. GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
  849. styler.ColourTo(i - 1, StateToPrint);
  850. beforePreProc = state;
  851. if (inScriptType == eNonHtmlScript)
  852. inScriptType = eNonHtmlScriptPreProc;
  853. else
  854. inScriptType = eNonHtmlPreProc;
  855. if (chNext == '/') {
  856. i += 2;
  857. visibleChars += 2;
  858. } else if (ch != '%') {
  859. i++;
  860. visibleChars++;
  861. }
  862. state = SCE_HP_START;
  863. scriptLanguage = eScriptPython;
  864. styler.ColourTo(i, SCE_H_ASP);
  865. if (ch != '%' && ch != '$' && ch != '/') {
  866. i += static_cast<int>(strlen(makoBlockType));
  867. visibleChars += static_cast<int>(strlen(makoBlockType));
  868. if (keywords4.InList(makoBlockType))
  869. styler.ColourTo(i, SCE_HP_WORD);
  870. else
  871. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  872. }
  873. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  874. continue;
  875. }
  876. // handle the start/end of Django comment
  877. else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
  878. styler.ColourTo(i - 1, StateToPrint);
  879. beforePreProc = state;
  880. beforeLanguage = scriptLanguage;
  881. if (inScriptType == eNonHtmlScript)
  882. inScriptType = eNonHtmlScriptPreProc;
  883. else
  884. inScriptType = eNonHtmlPreProc;
  885. i += 1;
  886. visibleChars += 1;
  887. scriptLanguage = eScriptComment;
  888. state = SCE_H_COMMENT;
  889. styler.ColourTo(i, SCE_H_ASP);
  890. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  891. continue;
  892. } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
  893. styler.ColourTo(i - 1, StateToPrint);
  894. i += 1;
  895. visibleChars += 1;
  896. styler.ColourTo(i, SCE_H_ASP);
  897. state = beforePreProc;
  898. if (inScriptType == eNonHtmlScriptPreProc)
  899. inScriptType = eNonHtmlScript;
  900. else
  901. inScriptType = eHtml;
  902. scriptLanguage = beforeLanguage;
  903. continue;
  904. }
  905. // handle the start Django template code
  906. else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' || chNext == '{'))) {
  907. if (chNext == '%')
  908. strcpy(djangoBlockType, "%");
  909. else
  910. strcpy(djangoBlockType, "{");
  911. styler.ColourTo(i - 1, StateToPrint);
  912. beforePreProc = state;
  913. if (inScriptType == eNonHtmlScript)
  914. inScriptType = eNonHtmlScriptPreProc;
  915. else
  916. inScriptType = eNonHtmlPreProc;
  917. i += 1;
  918. visibleChars += 1;
  919. state = SCE_HP_START;
  920. beforeLanguage = scriptLanguage;
  921. scriptLanguage = eScriptPython;
  922. styler.ColourTo(i, SCE_H_ASP);
  923. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  924. continue;
  925. }
  926. // handle the start of ASP pre-processor = Non-HTML
  927. else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
  928. styler.ColourTo(i - 1, StateToPrint);
  929. beforePreProc = state;
  930. if (inScriptType == eNonHtmlScript)
  931. inScriptType = eNonHtmlScriptPreProc;
  932. else
  933. inScriptType = eNonHtmlPreProc;
  934. if (chNext2 == '@') {
  935. i += 2; // place as if it was the second next char treated
  936. visibleChars += 2;
  937. state = SCE_H_ASPAT;
  938. } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
  939. styler.ColourTo(i + 3, SCE_H_ASP);
  940. state = SCE_H_XCCOMMENT;
  941. scriptLanguage = eScriptVBS;
  942. continue;
  943. } else {
  944. if (chNext2 == '=') {
  945. i += 2; // place as if it was the second next char treated
  946. visibleChars += 2;
  947. } else {
  948. i++; // place as if it was the next char treated
  949. visibleChars++;
  950. }
  951. state = StateForScript(aspScript);
  952. }
  953. scriptLanguage = eScriptVBS;
  954. styler.ColourTo(i, SCE_H_ASP);
  955. // fold whole script
  956. if (foldHTMLPreprocessor)
  957. levelCurrent++;
  958. // should be better
  959. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  960. continue;
  961. }
  962. /////////////////////////////////////
  963. // handle the start of SGML language (DTD)
  964. else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
  965. (chPrev == '<') &&
  966. (ch == '!') &&
  967. (StateToPrint != SCE_H_CDATA) &&
  968. (!IsCommentState(StateToPrint)) &&
  969. (!IsScriptCommentState(StateToPrint))) {
  970. beforePreProc = state;
  971. styler.ColourTo(i - 2, StateToPrint);
  972. if ((chNext == '-') && (chNext2 == '-')) {
  973. state = SCE_H_COMMENT; // wait for a pending command
  974. styler.ColourTo(i + 2, SCE_H_COMMENT);
  975. i += 2; // follow styling after the --
  976. } else if (isWordCdata(i + 1, i + 7, styler)) {
  977. state = SCE_H_CDATA;
  978. } else {
  979. styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
  980. scriptLanguage = eScriptSGML;
  981. state = SCE_H_SGML_COMMAND; // wait for a pending command
  982. }
  983. // fold whole tag (-- when closing the tag)
  984. if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
  985. levelCurrent++;
  986. continue;
  987. }
  988. // handle the end of Mako Python code
  989. else if (isMako &&
  990. ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  991. (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
  992. isMakoBlockEnd(ch, chNext, makoBlockType)) {
  993. if (state == SCE_H_ASPAT) {
  994. aspScript = segIsScriptingIndicator(styler,
  995. styler.GetStartSegment(), i - 1, aspScript);
  996. }
  997. if (state == SCE_HP_WORD) {
  998. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
  999. } else {
  1000. styler.ColourTo(i - 1, StateToPrint);
  1001. }
  1002. if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
  1003. i++;
  1004. visibleChars++;
  1005. }
  1006. else if (0 == strcmp(makoBlockType, "%") && ch == '/') {
  1007. i++;
  1008. visibleChars++;
  1009. }
  1010. if (0 != strcmp(makoBlockType, "%") || ch == '/') {
  1011. styler.ColourTo(i, SCE_H_ASP);
  1012. }
  1013. state = beforePreProc;
  1014. if (inScriptType == eNonHtmlScriptPreProc)
  1015. inScriptType = eNonHtmlScript;
  1016. else
  1017. inScriptType = eHtml;
  1018. scriptLanguage = eScriptNone;
  1019. continue;
  1020. }
  1021. // handle the end of Django template code
  1022. else if (isDjango &&
  1023. ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  1024. (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
  1025. isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
  1026. if (state == SCE_H_ASPAT) {
  1027. aspScript = segIsScriptingIndicator(styler,
  1028. styler.GetStartSegment(), i - 1, aspScript);
  1029. }
  1030. if (state == SCE_HP_WORD) {
  1031. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
  1032. } else {
  1033. styler.ColourTo(i - 1, StateToPrint);
  1034. }
  1035. i += 1;
  1036. visibleChars += 1;
  1037. styler.ColourTo(i, SCE_H_ASP);
  1038. state = beforePreProc;
  1039. if (inScriptType == eNonHtmlScriptPreProc)
  1040. inScriptType = eNonHtmlScript;
  1041. else
  1042. inScriptType = eHtml;
  1043. scriptLanguage = beforeLanguage;
  1044. continue;
  1045. }
  1046. // handle the end of a pre-processor = Non-HTML
  1047. else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
  1048. (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
  1049. (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
  1050. ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
  1051. if (state == SCE_H_ASPAT) {
  1052. aspScript = segIsScriptingIndicator(styler,
  1053. styler.GetStartSegment(), i - 1, aspScript);
  1054. }
  1055. // Bounce out of any ASP mode
  1056. switch (state) {
  1057. case SCE_HJ_WORD:
  1058. classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
  1059. break;
  1060. case SCE_HB_WORD:
  1061. classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
  1062. break;
  1063. case SCE_HP_WORD:
  1064. classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
  1065. break;
  1066. case SCE_HPHP_WORD:
  1067. classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
  1068. break;
  1069. case SCE_H_XCCOMMENT:
  1070. styler.ColourTo(i - 1, state);
  1071. break;
  1072. default :
  1073. styler.ColourTo(i - 1, StateToPrint);
  1074. break;
  1075. }
  1076. if (scriptLanguage != eScriptSGML) {
  1077. i++;
  1078. visibleChars++;
  1079. }
  1080. if (ch == '%')
  1081. styler.ColourTo(i, SCE_H_ASP);
  1082. else if (scriptLanguage == eScriptXML)
  1083. styler.ColourTo(i, SCE_H_XMLEND);
  1084. else if (scriptLanguage == eScriptSGML)
  1085. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1086. else
  1087. styler.ColourTo(i, SCE_H_QUESTION);
  1088. state = beforePreProc;
  1089. if (inScriptType == eNonHtmlScriptPreProc)
  1090. inScriptType = eNonHtmlScript;
  1091. else
  1092. inScriptType = eHtml;
  1093. // Unfold all scripting languages, except for XML tag
  1094. if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
  1095. levelCurrent--;
  1096. }
  1097. scriptLanguage = beforeLanguage;
  1098. continue;
  1099. }
  1100. /////////////////////////////////////
  1101. switch (state) {
  1102. case SCE_H_DEFAULT:
  1103. if (ch == '<') {
  1104. // in HTML, fold on tag open and unfold on tag close
  1105. tagOpened = true;
  1106. tagClosing = (chNext == '/');
  1107. styler.ColourTo(i - 1, StateToPrint);
  1108. if (chNext != '!')
  1109. state = SCE_H_TAGUNKNOWN;
  1110. } else if (ch == '&') {
  1111. styler.ColourTo(i - 1, SCE_H_DEFAULT);
  1112. state = SCE_H_ENTITY;
  1113. }
  1114. break;
  1115. case SCE_H_SGML_DEFAULT:
  1116. case SCE_H_SGML_BLOCK_DEFAULT:
  1117. // if (scriptLanguage == eScriptSGMLblock)
  1118. // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
  1119. if (ch == '\"') {
  1120. styler.ColourTo(i - 1, StateToPrint);
  1121. state = SCE_H_SGML_DOUBLESTRING;
  1122. } else if (ch == '\'') {
  1123. styler.ColourTo(i - 1, StateToPrint);
  1124. state = SCE_H_SGML_SIMPLESTRING;
  1125. } else if ((ch == '-') && (chPrev == '-')) {
  1126. if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
  1127. styler.ColourTo(i - 2, StateToPrint);
  1128. }
  1129. state = SCE_H_SGML_COMMENT;
  1130. } else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
  1131. styler.ColourTo(i - 2, StateToPrint);
  1132. state = SCE_H_SGML_ENTITY;
  1133. } else if (ch == '#') {
  1134. styler.ColourTo(i - 1, StateToPrint);
  1135. state = SCE_H_SGML_SPECIAL;
  1136. } else if (ch == '[') {
  1137. styler.ColourTo(i - 1, StateToPrint);
  1138. scriptLanguage = eScriptSGMLblock;
  1139. state = SCE_H_SGML_BLOCK_DEFAULT;
  1140. } else if (ch == ']') {
  1141. if (scriptLanguage == eScriptSGMLblock) {
  1142. styler.ColourTo(i, StateToPrint);
  1143. scriptLanguage = eScriptSGML;
  1144. } else {
  1145. styler.ColourTo(i - 1, StateToPrint);
  1146. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1147. }
  1148. state = SCE_H_SGML_DEFAULT;
  1149. } else if (scriptLanguage == eScriptSGMLblock) {
  1150. if ((ch == '!') && (chPrev == '<')) {
  1151. styler.ColourTo(i - 2, StateToPrint);
  1152. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1153. state = SCE_H_SGML_COMMAND;
  1154. } else if (ch == '>') {
  1155. styler.ColourTo(i - 1, StateToPrint);
  1156. styler.ColourTo(i, SCE_H_SGML_DEFAULT);
  1157. }
  1158. }
  1159. break;
  1160. case SCE_H_SGML_COMMAND:
  1161. if ((ch == '-') && (chPrev == '-')) {
  1162. styler.ColourTo(i - 2, StateToPrint);
  1163. state = SCE_H_SGML_COMMENT;
  1164. } else if (!issgmlwordchar(ch)) {
  1165. if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
  1166. styler.ColourTo(i - 1, StateToPrint);
  1167. state = SCE_H_SGML_1ST_PARAM;
  1168. } else {
  1169. state = SCE_H_SGML_ERROR;
  1170. }
  1171. }
  1172. break;
  1173. case SCE_H_SGML_1ST_PARAM:
  1174. // wait for the beginning of the word
  1175. if ((ch == '-') && (chPrev == '-')) {
  1176. if (scriptLanguage == eScriptSGMLblock) {
  1177. styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
  1178. } else {
  1179. styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
  1180. }
  1181. state = SCE_H_SGML_1ST_PARAM_COMMENT;
  1182. } else if (issgmlwordchar(ch)) {
  1183. if (scriptLanguage == eScriptSGMLblock) {
  1184. styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
  1185. } else {
  1186. styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
  1187. }
  1188. // find the length of the word
  1189. int size = 1;
  1190. while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
  1191. size++;
  1192. styler.ColourTo(i + size - 1, StateToPrint);
  1193. i += size - 1;
  1194. visibleChars += size - 1;
  1195. ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
  1196. if (scriptLanguage == eScriptSGMLblock) {
  1197. state = SCE_H_SGML_BLOCK_DEFAULT;
  1198. } else {
  1199. state = SCE_H_SGML_DEFAULT;
  1200. }
  1201. continue;
  1202. }
  1203. break;
  1204. case SCE_H_SGML_ERROR:
  1205. if ((ch == '-') && (chPrev == '-')) {
  1206. styler.ColourTo(i - 2, StateToPrint);
  1207. state = SCE_H_SGML_COMMENT;
  1208. }
  1209. case SCE_H_SGML_DOUBLESTRING:
  1210. if (ch == '\"') {
  1211. styler.ColourTo(i, StateToPrint);
  1212. state = SCE_H_SGML_DEFAULT;
  1213. }
  1214. break;
  1215. case SCE_H_SGML_SIMPLESTRING:
  1216. if (ch == '\'') {
  1217. styler.ColourTo(i, StateToPrint);
  1218. state = SCE_H_SGML_DEFAULT;
  1219. }
  1220. break;
  1221. case SCE_H_SGML_COMMENT:
  1222. if ((ch == '-') && (chPrev == '-')) {
  1223. styler.ColourTo(i, StateToPrint);
  1224. state = SCE_H_SGML_DEFAULT;
  1225. }
  1226. break;
  1227. case SCE_H_CDATA:
  1228. if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
  1229. styler.ColourTo(i, StateToPrint);
  1230. state = SCE_H_DEFAULT;
  1231. levelCurrent--;
  1232. }
  1233. break;
  1234. case SCE_H_COMMENT:
  1235. if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
  1236. styler.ColourTo(i, StateToPrint);
  1237. state = SCE_H_DEFAULT;
  1238. levelCurrent--;
  1239. }
  1240. break;
  1241. case SCE_H_SGML_1ST_PARAM_COMMENT:
  1242. if ((ch == '-') && (chPrev == '-')) {
  1243. styler.ColourTo(i, SCE_H_SGML_COMMENT);
  1244. state = SCE_H_SGML_1ST_PARAM;
  1245. }
  1246. break;
  1247. case SCE_H_SGML_SPECIAL:
  1248. if (!(isascii(ch) && isupper(ch))) {
  1249. styler.ColourTo(i - 1, StateToPrint);
  1250. if (isalnum(ch)) {
  1251. state = SCE_H_SGML_ERROR;
  1252. } else {
  1253. state = SCE_H_SGML_DEFAULT;
  1254. }
  1255. }
  1256. break;
  1257. case SCE_H_SGML_ENTITY:
  1258. if (ch == ';') {
  1259. styler.ColourTo(i, StateToPrint);
  1260. state = SCE_H_SGML_DEFAULT;
  1261. } else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
  1262. styler.ColourTo(i, SCE_H_SGML_ERROR);
  1263. state = SCE_H_SGML_DEFAULT;
  1264. }
  1265. break;
  1266. case SCE_H_ENTITY:
  1267. if (ch == ';') {
  1268. styler.ColourTo(i, StateToPrint);
  1269. state = SCE_H_DEFAULT;
  1270. }
  1271. if (ch != '#' && !(isascii(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
  1272. && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
  1273. if (!isascii(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
  1274. styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
  1275. else
  1276. styler.ColourTo(i, SCE_H_TAGUNKNOWN);
  1277. state = SCE_H_DEFAULT;
  1278. }
  1279. break;
  1280. case SCE_H_TAGUNKNOWN:
  1281. if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
  1282. int eClass = classifyTagHTML(styler.GetStartSegment(),
  1283. i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
  1284. if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
  1285. if (!tagClosing) {
  1286. inScriptType = eNonHtmlScript;
  1287. scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
  1288. } else {
  1289. scriptLanguage = eScriptNone;
  1290. }
  1291. eClass = SCE_H_TAG;
  1292. }
  1293. if (ch == '>') {
  1294. styler.ColourTo(i, eClass);
  1295. if (inScriptType == eNonHtmlScript) {
  1296. state = StateForScript(scriptLanguage);
  1297. } else {
  1298. state = SCE_H_DEFAULT;
  1299. }
  1300. tagOpened = false;
  1301. if (!tagDontFold) {
  1302. if (tagClosing) {
  1303. levelCurrent--;
  1304. } else {
  1305. levelCurrent++;
  1306. }
  1307. }
  1308. tagClosing = false;
  1309. } else if (ch == '/' && chNext == '>') {
  1310. if (eClass == SCE_H_TAGUNKNOWN) {
  1311. styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
  1312. } else {
  1313. styler.ColourTo(i - 1, StateToPrint);
  1314. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1315. }
  1316. i++;
  1317. ch = chNext;
  1318. state = SCE_H_DEFAULT;
  1319. tagOpened = false;
  1320. } else {
  1321. if (eClass != SCE_H_TAGUNKNOWN) {
  1322. if (eClass == SCE_H_SGML_DEFAULT) {
  1323. state = SCE_H_SGML_DEFAULT;
  1324. } else {
  1325. state = SCE_H_OTHER;
  1326. }
  1327. }
  1328. }
  1329. }
  1330. break;
  1331. case SCE_H_ATTRIBUTE:
  1332. if (!setAttributeContinue.Contains(ch)) {
  1333. if (inScriptType == eNonHtmlScript) {
  1334. int scriptLanguagePrev = scriptLanguage;
  1335. clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
  1336. scriptLanguage = clientScript;
  1337. if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
  1338. inScriptType = eHtml;
  1339. }
  1340. classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
  1341. if (ch == '>') {
  1342. styler.ColourTo(i, SCE_H_TAG);
  1343. if (inScriptType == eNonHtmlScript) {
  1344. state = StateForScript(scriptLanguage);
  1345. } else {
  1346. state = SCE_H_DEFAULT;
  1347. }
  1348. tagOpened = false;
  1349. if (!tagDontFold) {
  1350. if (tagClosing) {
  1351. levelCurrent--;
  1352. } else {
  1353. levelCurrent++;
  1354. }
  1355. }
  1356. tagClosing = false;
  1357. } else if (ch == '=') {
  1358. styler.ColourTo(i, SCE_H_OTHER);
  1359. state = SCE_H_VALUE;
  1360. } else {
  1361. state = SCE_H_OTHER;
  1362. }
  1363. }
  1364. break;
  1365. case SCE_H_OTHER:
  1366. if (ch == '>') {
  1367. styler.ColourTo(i - 1, StateToPrint);
  1368. styler.ColourTo(i, SCE_H_TAG);
  1369. if (inScriptType == eNonHtmlScript) {
  1370. state = StateForScript(scriptLanguage);
  1371. } else {
  1372. state = SCE_H_DEFAULT;
  1373. }
  1374. tagOpened = false;
  1375. if (!tagDontFold) {
  1376. if (tagClosing) {
  1377. levelCurrent--;
  1378. } else {
  1379. levelCurrent++;
  1380. }
  1381. }
  1382. tagClosing = false;
  1383. } else if (ch == '\"') {
  1384. styler.ColourTo(i - 1, StateToPrint);
  1385. state = SCE_H_DOUBLESTRING;
  1386. } else if (ch == '\'') {
  1387. styler.ColourTo(i - 1, StateToPrint);
  1388. state = SCE_H_SINGLESTRING;
  1389. } else if (ch == '=') {
  1390. styler.ColourTo(i, StateToPrint);
  1391. state = SCE_H_VALUE;
  1392. } else if (ch == '/' && chNext == '>') {
  1393. styler.ColourTo(i - 1, StateToPrint);
  1394. styler.ColourTo(i + 1, SCE_H_TAGEND);
  1395. i++;
  1396. ch = chNext;
  1397. state = SCE_H_DEFAULT;
  1398. tagOpened = false;
  1399. } else if (ch == '?' && chNext == '>') {
  1400. styler.ColourTo(i - 1, StateToPrint);
  1401. styler.ColourTo(i + 1, SCE_H_XMLEND);
  1402. i++;
  1403. ch = chNext;
  1404. state = SCE_H_DEFAULT;
  1405. } else if (setHTMLWord.Contains(ch)) {
  1406. styler.ColourTo(i - 1, StateToPrint);
  1407. state = SCE_H_ATTRIBUTE;
  1408. }
  1409. break;
  1410. case SCE_H_DOUBLESTRING:
  1411. if (ch == '\"') {
  1412. if (inScriptType == eNonHtmlScript) {
  1413. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1414. }
  1415. styler.ColourTo(i, SCE_H_DOUBLESTRING);
  1416. state = SCE_H_OTHER;
  1417. }
  1418. break;
  1419. case SCE_H_SINGLESTRING:
  1420. if (ch == '\'') {
  1421. if (inScriptType == eNonHtmlScript) {
  1422. scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
  1423. }
  1424. styler.ColourTo(i, SCE_H_SINGLESTRING);
  1425. state = SCE_H_OTHER;
  1426. }
  1427. break;
  1428. case SCE_H_VALUE:
  1429. if (!setHTMLWord.Contains(ch)) {
  1430. if (ch == '\"' && chPrev == '=') {
  1431. // Should really test for being first character
  1432. state = SCE_H_DOUBLESTRING;
  1433. } else if (ch == '\'' && chPrev == '=') {
  1434. state = SCE_H_SINGLESTRING;
  1435. } else {
  1436. if (IsNumber(styler.GetStartSegment(), styler)) {
  1437. styler.ColourTo(i - 1, SCE_H_NUMBER);
  1438. } else {
  1439. styler.ColourTo(i - 1, StateToPrint);
  1440. }
  1441. if (ch == '>') {
  1442. styler.ColourTo(i, SCE_H_TAG);
  1443. if (inScriptType == eNonHtmlScript) {
  1444. state = StateForScript(scriptLanguage);
  1445. } else {
  1446. state = SCE_H_DEFAULT;
  1447. }
  1448. tagOpened = false;
  1449. if (!tagDontFold) {
  1450. if (tagClosing) {
  1451. levelCurrent--;
  1452. } else {
  1453. levelCurrent++;
  1454. }
  1455. }
  1456. tagClosing = false;
  1457. } else {
  1458. state = SCE_H_OTHER;
  1459. }
  1460. }
  1461. }
  1462. break;
  1463. case SCE_HJ_DEFAULT:
  1464. case SCE_HJ_START:
  1465. case SCE_HJ_SYMBOLS:
  1466. if (IsAWordStart(ch)) {
  1467. styler.ColourTo(i - 1, StateToPrint);
  1468. state = SCE_HJ_WORD;
  1469. } else if (ch == '/' && chNext == '*') {
  1470. styler.ColourTo(i - 1, StateToPrint);
  1471. if (chNext2 == '*')
  1472. state = SCE_HJ_COMMENTDOC;
  1473. else
  1474. state = SCE_HJ_COMMENT;
  1475. } else if (ch == '/' && chNext == '/') {
  1476. styler.ColourTo(i - 1, StateToPrint);
  1477. state = SCE_HJ_COMMENTLINE;
  1478. } else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
  1479. styler.ColourTo(i - 1, StateToPrint);

Large files files are truncated, but you can click here to view the full file