PageRenderTime 272ms CodeModel.GetById 30ms app.highlight 202ms RepoModel.GetById 19ms app.codeStats 2ms

/scintilla/lexers/LexHTML.cxx

https://github.com/cezary12/notepad2-mod
C++ | 2184 lines | 1952 code | 119 blank | 113 comment | 1444 complexity | 88d00d6c6888a757dc4386a8b09e7a8a MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Scintilla source code edit control
   2/** @file LexHTML.cxx
   3 ** Lexer for HTML.
   4 **/
   5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   6// The License.txt file describes the conditions under which this software may be distributed.
   7
   8#include <stdlib.h>

   9#include <string.h>

  10#include <stdio.h>

  11#include <stdarg.h>

  12#include <assert.h>

  13#include <ctype.h>

  14
  15#include "ILexer.h"

  16#include "Scintilla.h"

  17#include "SciLexer.h"

  18
  19#include "StringCopy.h"

  20#include "WordList.h"

  21#include "LexAccessor.h"

  22#include "Accessor.h"

  23#include "StyleContext.h"

  24#include "CharacterSet.h"

  25#include "LexerModule.h"

  26
  27#ifdef SCI_NAMESPACE

  28using namespace Scintilla;
  29#endif

  30
  31#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)

  32#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)

  33#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)

  34
  35enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  36enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  37
  38static inline bool IsAWordChar(const int ch) {
  39	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  40}
  41
  42static inline bool IsAWordStart(const int ch) {
  43	return (ch < 0x80) && (isalnum(ch) || ch == '_');
  44}
  45
  46inline bool IsOperator(int ch) {
  47	if (IsASCII(ch) && isalnum(ch))
  48		return false;
  49	// '.' left out as it is used to make up numbers
  50	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  51	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  52	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  53	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  54	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  55	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
  56		return true;
  57	return false;
  58}
  59
  60static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  61	unsigned int i = 0;
  62	for (; (i < end - start + 1) && (i < len-1); i++) {
  63		s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  64	}
  65	s[i] = '\0';
  66}
  67
  68static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  69
  70	unsigned int i = 0;
  71	for (; i < sLen-1; i++) {
  72		char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  73		if ((i == 0) && !IsAWordStart(ch))
  74			break;
  75		if ((i > 0) && !IsAWordChar(ch))
  76			break;
  77		s[i] = ch;
  78	}
  79	s[i] = '\0';
  80
  81	return s;
  82}
  83
  84static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  85	char s[100];
  86	GetTextSegment(styler, start, end, s, sizeof(s));
  87	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  88	if (strstr(s, "src"))	// External script
  89		return eScriptNone;
  90	if (strstr(s, "vbs"))
  91		return eScriptVBS;
  92	if (strstr(s, "pyth"))
  93		return eScriptPython;
  94	if (strstr(s, "javas"))
  95		return eScriptJS;
  96	if (strstr(s, "jscr"))
  97		return eScriptJS;
  98	if (strstr(s, "php"))
  99		return eScriptPHP;
 100	if (strstr(s, "xml")) {
 101		const char *xml = strstr(s, "xml");
 102		for (const char *t=s; t<xml; t++) {
 103			if (!IsASpace(*t)) {
 104				return prevValue;
 105			}
 106		}
 107		return eScriptXML;
 108	}
 109
 110	return prevValue;
 111}
 112
 113static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
 114	int iResult = 0;
 115	char s[100];
 116	GetTextSegment(styler, start, end, s, sizeof(s));
 117	if (0 == strncmp(s, "php", 3)) {
 118		iResult = 3;
 119	}
 120
 121	return iResult;
 122}
 123
 124static script_type ScriptOfState(int state) {
 125	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 126		return eScriptPython;
 127	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 128		return eScriptVBS;
 129	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 130		return eScriptJS;
 131	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
 132		return eScriptPHP;
 133	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
 134		return eScriptSGML;
 135	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
 136		return eScriptSGMLblock;
 137	} else {
 138		return eScriptNone;
 139	}
 140}
 141
 142static int statePrintForState(int state, script_mode inScriptType) {
 143	int StateToPrint = state;
 144
 145	if (state >= SCE_HJ_START) {
 146		if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 147			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 148		} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 149			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 150		} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 151			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 152		}
 153	}
 154
 155	return StateToPrint;
 156}
 157
 158static int stateForPrintState(int StateToPrint) {
 159	int state;
 160
 161	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
 162		state = StateToPrint - SCE_HA_PYTHON;
 163	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
 164		state = StateToPrint - SCE_HA_VBS;
 165	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
 166		state = StateToPrint - SCE_HA_JS;
 167	} else {
 168		state = StateToPrint;
 169	}
 170
 171	return state;
 172}
 173
 174static inline bool IsNumber(unsigned int start, Accessor &styler) {
 175	return IsADigit(styler[start]) || (styler[start] == '.') ||
 176	       (styler[start] == '-') || (styler[start] == '#');
 177}
 178
 179static inline bool isStringState(int state) {
 180	bool bResult;
 181
 182	switch (state) {
 183	case SCE_HJ_DOUBLESTRING:
 184	case SCE_HJ_SINGLESTRING:
 185	case SCE_HJA_DOUBLESTRING:
 186	case SCE_HJA_SINGLESTRING:
 187	case SCE_HB_STRING:
 188	case SCE_HBA_STRING:
 189	case SCE_HP_STRING:
 190	case SCE_HP_CHARACTER:
 191	case SCE_HP_TRIPLE:
 192	case SCE_HP_TRIPLEDOUBLE:
 193	case SCE_HPA_STRING:
 194	case SCE_HPA_CHARACTER:
 195	case SCE_HPA_TRIPLE:
 196	case SCE_HPA_TRIPLEDOUBLE:
 197	case SCE_HPHP_HSTRING:
 198	case SCE_HPHP_SIMPLESTRING:
 199	case SCE_HPHP_HSTRING_VARIABLE:
 200	case SCE_HPHP_COMPLEX_VARIABLE:
 201		bResult = true;
 202		break;
 203	default :
 204		bResult = false;
 205		break;
 206	}
 207	return bResult;
 208}
 209
 210static inline bool stateAllowsTermination(int state) {
 211	bool allowTermination = !isStringState(state);
 212	if (allowTermination) {
 213		switch (state) {
 214		case SCE_HB_COMMENTLINE:
 215		case SCE_HPHP_COMMENT:
 216		case SCE_HP_COMMENTLINE:
 217		case SCE_HPA_COMMENTLINE:
 218			allowTermination = false;
 219		}
 220	}
 221	return allowTermination;
 222}
 223
 224// not really well done, since it's only comments that should lex the %> and <%
 225static inline bool isCommentASPState(int state) {
 226	bool bResult;
 227
 228	switch (state) {
 229	case SCE_HJ_COMMENT:
 230	case SCE_HJ_COMMENTLINE:
 231	case SCE_HJ_COMMENTDOC:
 232	case SCE_HB_COMMENTLINE:
 233	case SCE_HP_COMMENTLINE:
 234	case SCE_HPHP_COMMENT:
 235	case SCE_HPHP_COMMENTLINE:
 236		bResult = true;
 237		break;
 238	default :
 239		bResult = false;
 240		break;
 241	}
 242	return bResult;
 243}
 244
 245static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 246	bool wordIsNumber = IsNumber(start, styler);
 247	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
 248	if (wordIsNumber) {
 249		chAttr = SCE_H_NUMBER;
 250	} else {
 251		char s[100];
 252		GetTextSegment(styler, start, end, s, sizeof(s));
 253		if (keywords.InList(s))
 254			chAttr = SCE_H_ATTRIBUTE;
 255	}
 256	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
 257		// No keywords -> all are known
 258		chAttr = SCE_H_ATTRIBUTE;
 259	styler.ColourTo(end, chAttr);
 260}
 261
 262static int classifyTagHTML(unsigned int start, unsigned int end,
 263                           WordList &keywords, Accessor &styler, bool &tagDontFold,
 264			   bool caseSensitive, bool isXml, bool allowScripts) {
 265	char withSpace[30 + 2] = " ";
 266	const char *s = withSpace + 1;
 267	// Copy after the '<'
 268	unsigned int i = 1;
 269	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
 270		char ch = styler[cPos];
 271		if ((ch != '<') && (ch != '/')) {
 272			withSpace[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
 273		}
 274	}
 275
 276	//The following is only a quick hack, to see if this whole thing would work
 277	//we first need the tagname with a trailing space...
 278	withSpace[i] = ' ';
 279	withSpace[i+1] = '\0';
 280
 281	// if the current language is XML, I can fold any tag
 282	// if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
 283	//...to find it in the list of no-container-tags
 284	tagDontFold = (!isXml) && (NULL != strstr(" area base basefont br col command embed frame hr img input isindex keygen link meta param source track wbr ", withSpace));
 285
 286	//now we can remove the trailing space
 287	withSpace[i] = '\0';
 288
 289	// No keywords -> all are known
 290	char chAttr = SCE_H_TAGUNKNOWN;
 291	if (s[0] == '!') {
 292		chAttr = SCE_H_SGML_DEFAULT;
 293	} else if (!keywords || keywords.InList(s)) {
 294		chAttr = SCE_H_TAG;
 295	}
 296	styler.ColourTo(end, chAttr);
 297	if (chAttr == SCE_H_TAG) {
 298		if (allowScripts && 0 == strcmp(s, "script")) {
 299			// check to see if this is a self-closing tag by sniffing ahead
 300			bool isSelfClose = false;
 301			for (unsigned int cPos = end; cPos <= end + 200; cPos++) {
 302				char ch = styler.SafeGetCharAt(cPos, '\0');
 303				if (ch == '\0' || ch == '>')
 304					break;
 305				else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
 306					isSelfClose = true;
 307					break;
 308				}
 309			}
 310
 311			// do not enter a script state if the tag self-closed
 312			if (!isSelfClose)
 313				chAttr = SCE_H_SCRIPT;
 314		} else if (!isXml && 0 == strcmp(s, "comment")) {
 315			chAttr = SCE_H_COMMENT;
 316		}
 317	}
 318	return chAttr;
 319}
 320
 321static void classifyWordHTJS(unsigned int start, unsigned int end,
 322                             WordList &keywords, Accessor &styler, script_mode inScriptType) {
 323	char s[30 + 1];
 324	unsigned int i = 0;
 325	for (; i < end - start + 1 && i < 30; i++) {
 326		s[i] = styler[start + i];
 327	}
 328	s[i] = '\0';
 329
 330	char chAttr = SCE_HJ_WORD;
 331	bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
 332	if (wordIsNumber) {
 333		chAttr = SCE_HJ_NUMBER;
 334	} else if (keywords.InList(s)) {
 335		chAttr = SCE_HJ_KEYWORD;
 336	}
 337	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 338}
 339
 340static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
 341	char chAttr = SCE_HB_IDENTIFIER;
 342	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 343	if (wordIsNumber) {
 344		chAttr = SCE_HB_NUMBER;
 345	} else {
 346		char s[100];
 347		GetTextSegment(styler, start, end, s, sizeof(s));
 348		if (keywords.InList(s)) {
 349			chAttr = SCE_HB_WORD;
 350			if (strcmp(s, "rem") == 0)
 351				chAttr = SCE_HB_COMMENTLINE;
 352		}
 353	}
 354	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 355	if (chAttr == SCE_HB_COMMENTLINE)
 356		return SCE_HB_COMMENTLINE;
 357	else
 358		return SCE_HB_DEFAULT;
 359}
 360
 361static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType, bool isMako) {
 362	bool wordIsNumber = IsADigit(styler[start]);
 363	char s[30 + 1];
 364	unsigned int i = 0;
 365	for (; i < end - start + 1 && i < 30; i++) {
 366		s[i] = styler[start + i];
 367	}
 368	s[i] = '\0';
 369	char chAttr = SCE_HP_IDENTIFIER;
 370	if (0 == strcmp(prevWord, "class"))
 371		chAttr = SCE_HP_CLASSNAME;
 372	else if (0 == strcmp(prevWord, "def"))
 373		chAttr = SCE_HP_DEFNAME;
 374	else if (wordIsNumber)
 375		chAttr = SCE_HP_NUMBER;
 376	else if (keywords.InList(s))
 377		chAttr = SCE_HP_WORD;
 378	else if (isMako && 0 == strcmp(s, "block"))
 379		chAttr = SCE_HP_WORD;
 380	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 381	strcpy(prevWord, s);
 382}
 383
 384// Update the word colour to default or keyword
 385// Called when in a PHP word
 386static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 387	char chAttr = SCE_HPHP_DEFAULT;
 388	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
 389	if (wordIsNumber) {
 390		chAttr = SCE_HPHP_NUMBER;
 391	} else {
 392		char s[100];
 393		GetTextSegment(styler, start, end, s, sizeof(s));
 394		if (keywords.InList(s))
 395			chAttr = SCE_HPHP_WORD;
 396	}
 397	styler.ColourTo(end, chAttr);
 398}
 399
 400static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 401	char s[30 + 1];
 402	unsigned int i = 0;
 403	for (; i < end - start + 1 && i < 30; i++) {
 404		s[i] = styler[start + i];
 405	}
 406	s[i] = '\0';
 407	return keywords.InList(s);
 408}
 409
 410static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
 411	char s[30 + 1];
 412	unsigned int i = 0;
 413	for (; i < end - start + 1 && i < 30; i++) {
 414		s[i] = styler[start + i];
 415	}
 416	s[i] = '\0';
 417	return (0 == strcmp(s, "[CDATA["));
 418}
 419
 420// Return the first state to reach when entering a scripting language
 421static int StateForScript(script_type scriptLanguage) {
 422	int Result;
 423	switch (scriptLanguage) {
 424	case eScriptVBS:
 425		Result = SCE_HB_START;
 426		break;
 427	case eScriptPython:
 428		Result = SCE_HP_START;
 429		break;
 430	case eScriptPHP:
 431		Result = SCE_HPHP_DEFAULT;
 432		break;
 433	case eScriptXML:
 434		Result = SCE_H_TAGUNKNOWN;
 435		break;
 436	case eScriptSGML:
 437		Result = SCE_H_SGML_DEFAULT;
 438		break;
 439	case eScriptComment:
 440		Result = SCE_H_COMMENT;
 441		break;
 442	default :
 443		Result = SCE_HJ_START;
 444		break;
 445	}
 446	return Result;
 447}
 448
 449static inline bool issgmlwordchar(int ch) {
 450	return !IsASCII(ch) ||
 451		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
 452}
 453
 454static inline bool IsPhpWordStart(int ch) {
 455	return (IsASCII(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
 456}
 457
 458static inline bool IsPhpWordChar(int ch) {
 459	return IsADigit(ch) || IsPhpWordStart(ch);
 460}
 461
 462static bool InTagState(int state) {
 463	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
 464	       state == SCE_H_SCRIPT ||
 465	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
 466	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
 467	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
 468}
 469
 470static bool IsCommentState(const int state) {
 471	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
 472}
 473
 474static bool IsScriptCommentState(const int state) {
 475	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
 476		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
 477}
 478
 479static bool isLineEnd(int ch) {
 480	return ch == '\r' || ch == '\n';
 481}
 482
 483static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
 484	if (strlen(blockType) == 0) {
 485		return ((ch == '%') && (chNext == '>'));
 486	} else if ((0 == strcmp(blockType, "inherit")) ||
 487			   (0 == strcmp(blockType, "namespace")) ||
 488			   (0 == strcmp(blockType, "include")) ||
 489			   (0 == strcmp(blockType, "page"))) {
 490		return ((ch == '/') && (chNext == '>'));
 491	} else if (0 == strcmp(blockType, "%")) {
 492		if (ch == '/' && isLineEnd(chNext))
 493			return 1;
 494		else
 495		    return isLineEnd(ch);
 496	} else if (0 == strcmp(blockType, "{")) {
 497		return ch == '}';
 498	} else {
 499		return (ch == '>');
 500	}
 501}
 502
 503static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
 504	if (strlen(blockType) == 0) {
 505		return 0;
 506	} else if (0 == strcmp(blockType, "%")) {
 507		return ((ch == '%') && (chNext == '}'));
 508	} else if (0 == strcmp(blockType, "{")) {
 509		return ((ch == '}') && (chNext == '}'));
 510	} else {
 511		return 0;
 512	}
 513}
 514
 515static bool isPHPStringState(int state) {
 516	return
 517	    (state == SCE_HPHP_HSTRING) ||
 518	    (state == SCE_HPHP_SIMPLESTRING) ||
 519	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
 520	    (state == SCE_HPHP_COMPLEX_VARIABLE);
 521}
 522
 523static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
 524	int j;
 525	const int beginning = i - 1;
 526	bool isValidSimpleString = false;
 527
 528	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
 529		i++;
 530
 531	char ch = styler.SafeGetCharAt(i);
 532	const char chNext = styler.SafeGetCharAt(i + 1);
 533	if (!IsPhpWordStart(ch)) {
 534		if (ch == '\'' && IsPhpWordStart(chNext)) {
 535			i++;
 536			ch = chNext;
 537			isSimpleString = true;
 538		} else {
 539			phpStringDelimiter[0] = '\0';
 540			return beginning;
 541		}
 542	}
 543	phpStringDelimiter[0] = ch;
 544	i++;
 545
 546	for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
 547		if (!IsPhpWordChar(styler[j])) {
 548			if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
 549				isValidSimpleString = true;
 550				j++;
 551				break;
 552			} else {
 553				phpStringDelimiter[0] = '\0';
 554				return beginning;
 555			}
 556		}
 557		if (j - i < phpStringDelimiterSize - 2)
 558			phpStringDelimiter[j-i+1] = styler[j];
 559		else
 560			i++;
 561	}
 562	if (isSimpleString && !isValidSimpleString) {
 563		phpStringDelimiter[0] = '\0';
 564		return beginning;
 565	}
 566	phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
 567	return j - 1;
 568}
 569
 570static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 571                                  Accessor &styler, bool isXml) {
 572	WordList &keywords = *keywordlists[0];
 573	WordList &keywords2 = *keywordlists[1];
 574	WordList &keywords3 = *keywordlists[2];
 575	WordList &keywords4 = *keywordlists[3];
 576	WordList &keywords5 = *keywordlists[4];
 577	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
 578
 579	styler.StartAt(startPos);
 580	char prevWord[200];
 581	prevWord[0] = '\0';
 582	char phpStringDelimiter[200]; // PHP is not limited in length, we are
 583	phpStringDelimiter[0] = '\0';
 584	int StateToPrint = initStyle;
 585	int state = stateForPrintState(StateToPrint);
 586	char makoBlockType[200];
 587	makoBlockType[0] = '\0';
 588	int makoComment = 0;
 589	char djangoBlockType[2];
 590	djangoBlockType[0] = '\0';
 591
 592	// If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
 593	if (InTagState(state)) {
 594		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
 595			int backLineStart = styler.LineStart(styler.GetLine(startPos-1));
 596			length += startPos - backLineStart;
 597			startPos = backLineStart;
 598		}
 599		state = SCE_H_DEFAULT;
 600	}
 601	// String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
 602	if (isPHPStringState(state)) {
 603		while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
 604			startPos--;
 605			length++;
 606			state = styler.StyleAt(startPos);
 607		}
 608		if (startPos == 0)
 609			state = SCE_H_DEFAULT;
 610	}
 611	styler.StartAt(startPos);
 612
 613	int lineCurrent = styler.GetLine(startPos);
 614	int lineState;
 615	if (lineCurrent > 0) {
 616		lineState = styler.GetLineState(lineCurrent-1);
 617	} else {
 618		// Default client and ASP scripting language is JavaScript
 619		lineState = eScriptJS << 8;
 620
 621		// property asp.default.language
 622		//	Script in ASP code is initially assumed to be in JavaScript.
 623		//	To change this to VBScript set asp.default.language to 2. Python is 3.
 624		lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
 625	}
 626	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
 627	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
 628	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
 629	bool tagDontFold = false; //some HTML tags should not be folded
 630	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
 631	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
 632	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
 633
 634	script_type scriptLanguage = ScriptOfState(state);
 635	// If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
 636	if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
 637		scriptLanguage = eScriptComment;
 638	}
 639	script_type beforeLanguage = ScriptOfState(beforePreProc);
 640
 641	// property fold.html
 642	//	Folding is turned on or off for HTML and XML files with this option.
 643	//	The fold option must also be on for folding to occur.
 644	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
 645
 646	const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
 647
 648	// property fold.html.preprocessor
 649	//	Folding is turned on or off for scripts embedded in HTML files with this option.
 650	//	The default is on.
 651	const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
 652
 653	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 654
 655	// property fold.hypertext.comment
 656	//	Allow folding for comments in scripts embedded in HTML.
 657	//	The default is off.
 658	const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
 659
 660	// property fold.hypertext.heredoc
 661	//	Allow folding for heredocs in scripts embedded in HTML.
 662	//	The default is off.
 663	const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
 664
 665	// property html.tags.case.sensitive
 666	//	For XML and HTML, setting this property to 1 will make tags match in a case
 667	//	sensitive way which is the expected behaviour for XML and XHTML.
 668	const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
 669
 670	// property lexer.xml.allow.scripts
 671	//	Set to 0 to disable scripts in XML.
 672	const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
 673
 674	// property lexer.html.mako
 675	//	Set to 1 to enable the mako template language.
 676	const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
 677
 678	// property lexer.html.django
 679	//	Set to 1 to enable the django template language.
 680	const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
 681
 682	const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
 683	const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
 684	const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
 685	// TODO: also handle + and - (except if they're part of ++ or --) and return keywords
 686	const CharacterSet setOKBeforeJSRE(CharacterSet::setNone, "([{=,:;!%^&*|?~");
 687
 688	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
 689	int levelCurrent = levelPrev;
 690	int visibleChars = 0;
 691	int lineStartVisibleChars = 0;
 692
 693	int chPrev = ' ';
 694	int ch = ' ';
 695	int chPrevNonWhite = ' ';
 696	// look back to set chPrevNonWhite properly for better regex colouring
 697	if (scriptLanguage == eScriptJS && startPos > 0) {
 698		int back = startPos;
 699		int style = 0;
 700		while (--back) {
 701			style = styler.StyleAt(back);
 702			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
 703				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
 704				break;
 705		}
 706		if (style == SCE_HJ_SYMBOLS) {
 707			chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
 708		}
 709	}
 710
 711	styler.StartSegment(startPos);
 712	const int lengthDoc = startPos + length;
 713	for (int i = startPos; i < lengthDoc; i++) {
 714		const int chPrev2 = chPrev;
 715		chPrev = ch;
 716		if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
 717			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
 718			chPrevNonWhite = ch;
 719		ch = static_cast<unsigned char>(styler[i]);
 720		int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
 721		const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
 722
 723		// Handle DBCS codepages
 724		if (styler.IsLeadByte(static_cast<char>(ch))) {
 725			chPrev = ' ';
 726			i += 1;
 727			continue;
 728		}
 729
 730		if ((!IsASpace(ch) || !foldCompact) && fold)
 731			visibleChars++;
 732		if (!IsASpace(ch))
 733			lineStartVisibleChars++;
 734
 735		// decide what is the current state to print (depending of the script tag)
 736		StateToPrint = statePrintForState(state, inScriptType);
 737
 738		// handle script folding
 739		if (fold) {
 740			switch (scriptLanguage) {
 741			case eScriptJS:
 742			case eScriptPHP:
 743				//not currently supported				case eScriptVBS:
 744
 745				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
 746				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
 747				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
 748					if (ch == '#') {
 749						int j = i + 1;
 750						while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
 751							j++;
 752						}
 753						if (styler.Match(j, "region") || styler.Match(j, "if")) {
 754							levelCurrent++;
 755						} else if (styler.Match(j, "end")) {
 756							levelCurrent--;
 757						}
 758					} else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
 759						levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
 760					}
 761				} else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
 762					levelCurrent--;
 763				}
 764				break;
 765			case eScriptPython:
 766				if (state != SCE_HP_COMMENTLINE && !isMako) {
 767					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
 768						levelCurrent++;
 769					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
 770						// check if the number of tabs is lower than the level
 771						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
 772						for (int j = 0; Findlevel > 0; j++) {
 773							char chTmp = styler.SafeGetCharAt(i + j + 1);
 774							if (chTmp == '\t') {
 775								Findlevel -= 8;
 776							} else if (chTmp == ' ') {
 777								Findlevel--;
 778							} else {
 779								break;
 780							}
 781						}
 782
 783						if (Findlevel > 0) {
 784							levelCurrent -= Findlevel / 8;
 785							if (Findlevel % 8)
 786								levelCurrent--;
 787						}
 788					}
 789				}
 790				break;
 791			default:
 792				break;
 793			}
 794		}
 795
 796		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 797			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 798			// Avoid triggering two times on Dos/Win
 799			// New line -> record any line state onto /next/ line
 800			if (fold) {
 801				int lev = levelPrev;
 802				if (visibleChars == 0)
 803					lev |= SC_FOLDLEVELWHITEFLAG;
 804				if ((levelCurrent > levelPrev) && (visibleChars > 0))
 805					lev |= SC_FOLDLEVELHEADERFLAG;
 806
 807				styler.SetLevel(lineCurrent, lev);
 808				visibleChars = 0;
 809				levelPrev = levelCurrent;
 810			}
 811			styler.SetLineState(lineCurrent,
 812			                    ((inScriptType & 0x03) << 0) |
 813			                    ((tagOpened ? 1 : 0) << 2) |
 814			                    ((tagClosing ? 1 : 0) << 3) |
 815			                    ((aspScript & 0x0F) << 4) |
 816			                    ((clientScript & 0x0F) << 8) |
 817			                    ((beforePreProc & 0xFF) << 12));
 818			lineCurrent++;
 819			lineStartVisibleChars = 0;
 820		}
 821
 822		// handle start of Mako comment line
 823		if (isMako && ch == '#' && chNext == '#') {
 824			makoComment = 1;
 825		}
 826
 827		// handle end of Mako comment line
 828		else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
 829			makoComment = 0;
 830			styler.ColourTo(i, SCE_HP_COMMENTLINE);
 831			state = SCE_HP_DEFAULT;
 832		}
 833
 834		// Allow falling through to mako handling code if newline is going to end a block
 835		if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
 836			(!isMako || (0 != strcmp(makoBlockType, "%")))) {
 837		}
 838
 839		// generic end of script processing
 840		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
 841			// Check if it's the end of the script tag (or any other HTML tag)
 842			switch (state) {
 843				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
 844			case SCE_H_DOUBLESTRING:
 845			case SCE_H_SINGLESTRING:
 846			case SCE_HJ_COMMENT:
 847			case SCE_HJ_COMMENTDOC:
 848			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
 849			// the end of script marker from some JS interpreters.
 850			case SCE_HB_COMMENTLINE:
 851			case SCE_HBA_COMMENTLINE:
 852			case SCE_HJ_DOUBLESTRING:
 853			case SCE_HJ_SINGLESTRING:
 854			case SCE_HJ_REGEX:
 855			case SCE_HB_STRING:
 856			case SCE_HBA_STRING:
 857			case SCE_HP_STRING:
 858			case SCE_HP_TRIPLE:
 859			case SCE_HP_TRIPLEDOUBLE:
 860			case SCE_HPHP_HSTRING:
 861			case SCE_HPHP_SIMPLESTRING:
 862			case SCE_HPHP_COMMENT:
 863			case SCE_HPHP_COMMENTLINE:
 864				break;
 865			default :
 866				// check if the closing tag is a script tag
 867				if (const char *tag =
 868						state == SCE_HJ_COMMENTLINE || isXml ? "script" :
 869						state == SCE_H_COMMENT ? "comment" : 0) {
 870					int j = i + 2;
 871					int chr;
 872					do {
 873						chr = static_cast<int>(*tag++);
 874					} while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
 875					if (chr != 0) break;
 876				}
 877				// closing tag of the script (it's a closing HTML tag anyway)
 878				styler.ColourTo(i - 1, StateToPrint);
 879				state = SCE_H_TAGUNKNOWN;
 880				inScriptType = eHtml;
 881				scriptLanguage = eScriptNone;
 882				clientScript = eScriptJS;
 883				i += 2;
 884				visibleChars += 2;
 885				tagClosing = true;
 886				continue;
 887			}
 888		}
 889
 890		/////////////////////////////////////
 891		// handle the start of PHP pre-processor = Non-HTML
 892		else if ((state != SCE_H_ASPAT) &&
 893		         !isPHPStringState(state) &&
 894		         (state != SCE_HPHP_COMMENT) &&
 895		         (state != SCE_HPHP_COMMENTLINE) &&
 896		         (ch == '<') &&
 897		         (chNext == '?') &&
 898				 !IsScriptCommentState(state)) {
 899 			beforeLanguage = scriptLanguage;
 900			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
 901			if ((scriptLanguage != eScriptPHP) && (isStringState(state) || (state==SCE_H_COMMENT))) continue;
 902			styler.ColourTo(i - 1, StateToPrint);
 903			beforePreProc = state;
 904			i++;
 905			visibleChars++;
 906			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
 907			if (scriptLanguage == eScriptXML)
 908				styler.ColourTo(i, SCE_H_XMLSTART);
 909			else
 910				styler.ColourTo(i, SCE_H_QUESTION);
 911			state = StateForScript(scriptLanguage);
 912			if (inScriptType == eNonHtmlScript)
 913				inScriptType = eNonHtmlScriptPreProc;
 914			else
 915				inScriptType = eNonHtmlPreProc;
 916			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
 917			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 918				levelCurrent++;
 919			}
 920			// should be better
 921			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 922			continue;
 923		}
 924
 925		// handle the start Mako template Python code
 926		else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
 927															 (lineStartVisibleChars == 1 && ch == '%') ||
 928															 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
 929															 (ch == '$' && chNext == '{') ||
 930															 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
 931			if (ch == '%' || ch == '/')
 932				StringCopy(makoBlockType, "%");
 933			else if (ch == '$')
 934				StringCopy(makoBlockType, "{");
 935			else if (chNext == '/')
 936				GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
 937			else
 938				GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
 939			styler.ColourTo(i - 1, StateToPrint);
 940			beforePreProc = state;
 941			if (inScriptType == eNonHtmlScript)
 942				inScriptType = eNonHtmlScriptPreProc;
 943			else
 944				inScriptType = eNonHtmlPreProc;
 945
 946			if (chNext == '/') {
 947				i += 2;
 948				visibleChars += 2;
 949			} else if (ch != '%') {
 950				i++;
 951				visibleChars++;
 952			}
 953			state = SCE_HP_START;
 954			scriptLanguage = eScriptPython;
 955			styler.ColourTo(i, SCE_H_ASP);
 956
 957			if (ch != '%' && ch != '$' && ch != '/') {
 958				i += static_cast<int>(strlen(makoBlockType));
 959				visibleChars += static_cast<int>(strlen(makoBlockType));
 960				if (keywords4.InList(makoBlockType))
 961					styler.ColourTo(i, SCE_HP_WORD);
 962				else
 963					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
 964			}
 965
 966			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 967			continue;
 968		}
 969
 970		// handle the start/end of Django comment
 971		else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
 972			styler.ColourTo(i - 1, StateToPrint);
 973			beforePreProc = state;
 974			beforeLanguage = scriptLanguage;
 975			if (inScriptType == eNonHtmlScript)
 976				inScriptType = eNonHtmlScriptPreProc;
 977			else
 978				inScriptType = eNonHtmlPreProc;
 979			i += 1;
 980			visibleChars += 1;
 981			scriptLanguage = eScriptComment;
 982			state = SCE_H_COMMENT;
 983			styler.ColourTo(i, SCE_H_ASP);
 984			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 985			continue;
 986		} else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
 987			styler.ColourTo(i - 1, StateToPrint);
 988			i += 1;
 989			visibleChars += 1;
 990			styler.ColourTo(i, SCE_H_ASP);
 991			state = beforePreProc;
 992			if (inScriptType == eNonHtmlScriptPreProc)
 993				inScriptType = eNonHtmlScript;
 994			else
 995				inScriptType = eHtml;
 996			scriptLanguage = beforeLanguage;
 997			continue;
 998		}
 999
1000		// handle the start Django template code
1001		else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' ||  chNext == '{'))) {
1002			if (chNext == '%')
1003				StringCopy(djangoBlockType, "%");
1004			else
1005				StringCopy(djangoBlockType, "{");
1006			styler.ColourTo(i - 1, StateToPrint);
1007			beforePreProc = state;
1008			if (inScriptType == eNonHtmlScript)
1009				inScriptType = eNonHtmlScriptPreProc;
1010			else
1011				inScriptType = eNonHtmlPreProc;
1012
1013			i += 1;
1014			visibleChars += 1;
1015			state = SCE_HP_START;
1016			beforeLanguage = scriptLanguage;
1017			scriptLanguage = eScriptPython;
1018			styler.ColourTo(i, SCE_H_ASP);
1019
1020			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1021			continue;
1022		}
1023
1024		// handle the start of ASP pre-processor = Non-HTML
1025		else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1026			styler.ColourTo(i - 1, StateToPrint);
1027			beforePreProc = state;
1028			if (inScriptType == eNonHtmlScript)
1029				inScriptType = eNonHtmlScriptPreProc;
1030			else
1031				inScriptType = eNonHtmlPreProc;
1032
1033			if (chNext2 == '@') {
1034				i += 2; // place as if it was the second next char treated
1035				visibleChars += 2;
1036				state = SCE_H_ASPAT;
1037			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1038				styler.ColourTo(i + 3, SCE_H_ASP);
1039				state = SCE_H_XCCOMMENT;
1040				scriptLanguage = eScriptVBS;
1041				continue;
1042			} else {
1043				if (chNext2 == '=') {
1044					i += 2; // place as if it was the second next char treated
1045					visibleChars += 2;
1046				} else {
1047					i++; // place as if it was the next char treated
1048					visibleChars++;
1049				}
1050
1051				state = StateForScript(aspScript);
1052			}
1053			scriptLanguage = eScriptVBS;
1054			styler.ColourTo(i, SCE_H_ASP);
1055			// fold whole script
1056			if (foldHTMLPreprocessor)
1057				levelCurrent++;
1058			// should be better
1059			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1060			continue;
1061		}
1062
1063		/////////////////////////////////////
1064		// handle the start of SGML language (DTD)
1065		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1066				 (chPrev == '<') &&
1067				 (ch == '!') &&
1068				 (StateToPrint != SCE_H_CDATA) &&
1069				 (!IsCommentState(StateToPrint)) &&
1070				 (!IsScriptCommentState(StateToPrint))) {
1071			beforePreProc = state;
1072			styler.ColourTo(i - 2, StateToPrint);
1073			if ((chNext == '-') && (chNext2 == '-')) {
1074				state = SCE_H_COMMENT; // wait for a pending command
1075				styler.ColourTo(i + 2, SCE_H_COMMENT);
1076				i += 2; // follow styling after the --
1077			} else if (isWordCdata(i + 1, i + 7, styler)) {
1078				state = SCE_H_CDATA;
1079			} else {
1080				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1081				scriptLanguage = eScriptSGML;
1082				state = SCE_H_SGML_COMMAND; // wait for a pending command
1083			}
1084			// fold whole tag (-- when closing the tag)
1085			if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
1086				levelCurrent++;
1087			continue;
1088		}
1089
1090		// handle the end of Mako Python code
1091		else if (isMako &&
1092			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1093				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1094				 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1095			if (state == SCE_H_ASPAT) {
1096				aspScript = segIsScriptingIndicator(styler,
1097				                                    styler.GetStartSegment(), i - 1, aspScript);
1098			}
1099			if (state == SCE_HP_WORD) {
1100				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1101			} else {
1102				styler.ColourTo(i - 1, StateToPrint);
1103			}
1104			if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1105				i++;
1106				visibleChars++;
1107		    }
1108			else if (0 == strcmp(makoBlockType, "%") && ch == '/') {
1109				i++;
1110				visibleChars++;
1111			}
1112			if (0 != strcmp(makoBlockType, "%") || ch == '/') {
1113				styler.ColourTo(i, SCE_H_ASP);
1114			}
1115			state = beforePreProc;
1116			if (inScriptType == eNonHtmlScriptPreProc)
1117				inScriptType = eNonHtmlScript;
1118			else
1119				inScriptType = eHtml;
1120			scriptLanguage = eScriptNone;
1121			continue;
1122		}
1123
1124		// handle the end of Django template code
1125		else if (isDjango &&
1126			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1127				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1128				 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1129			if (state == SCE_H_ASPAT) {
1130				aspScript = segIsScriptingIndicator(styler,
1131				                                    styler.GetStartSegment(), i - 1, aspScript);
1132			}
1133			if (state == SCE_HP_WORD) {
1134				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1135			} else {
1136				styler.ColourTo(i - 1, StateToPrint);
1137			}
1138			i += 1;
1139			visibleChars += 1;
1140			styler.ColourTo(i, SCE_H_ASP);
1141			state = beforePreProc;
1142			if (inScriptType == eNonHtmlScriptPreProc)
1143				inScriptType = eNonHtmlScript;
1144			else
1145				inScriptType = eHtml;
1146			scriptLanguage = beforeLanguage;
1147			continue;
1148		}
1149
1150		// handle the end of a pre-processor = Non-HTML
1151		else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1152				  (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1153				  (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1154		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1155			if (state == SCE_H_ASPAT) {
1156				aspScript = segIsScriptingIndicator(styler,
1157				                                    styler.GetStartSegment(), i - 1, aspScript);
1158			}
1159			// Bounce out of any ASP mode
1160			switch (state) {
1161			case SCE_HJ_WORD:
1162				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1163				break;
1164			case SCE_HB_WORD:
1165				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1166				break;
1167			case SCE_HP_WORD:
1168				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1169				break;
1170			case SCE_HPHP_WORD:
1171				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1172				break;
1173			case SCE_H_XCCOMMENT:
1174				styler.ColourTo(i - 1, state);
1175				break;
1176			default :
1177				styler.ColourTo(i - 1, StateToPrint);
1178				break;
1179			}
1180			if (scriptLanguage != eScriptSGML) {
1181				i++;
1182				visibleChars++;
1183			}
1184			if (ch == '%')
1185				styler.ColourTo(i, SCE_H_ASP);
1186			else if (scriptLanguage == eScriptXML)
1187				styler.ColourTo(i, SCE_H_XMLEND);
1188			else if (scriptLanguage == eScriptSGML)
1189				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1190			else
1191				styler.ColourTo(i, SCE_H_QUESTION);
1192			state = beforePreProc;
1193			if (inScriptType == eNonHtmlScriptPreProc)
1194				inScriptType = eNonHtmlScript;
1195			else
1196				inScriptType = eHtml;
1197			// Unfold all scripting languages, except for XML tag
1198			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1199				levelCurrent--;
1200			}
1201			scriptLanguage = beforeLanguage;
1202			continue;
1203		}
1204		/////////////////////////////////////
1205
1206		switch (state) {
1207		case SCE_H_DEFAULT:
1208			if (ch == '<') {
1209				// in HTML, fold on tag open and unfold on tag close
1210				tagOpened = true;
1211				tagClosing = (chNext == '/');
1212				styler.ColourTo(i - 1, StateToPrint);
1213				if (chNext != '!')
1214					state = SCE_H_TAGUNKNOWN;
1215			} else if (ch == '&') {
1216				styler.ColourTo(i - 1, SCE_H_DEFAULT);
1217				state = SCE_H_ENTITY;
1218			}
1219			break;
1220		case SCE_H_SGML_DEFAULT:
1221		case SCE_H_SGML_BLOCK_DEFAULT:
1222//			if (scriptLanguage == eScriptSGMLblock)
1223//				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1224
1225			if (ch == '\"') {
1226				styler.ColourTo(i - 1, StateToPrint);
1227				state = SCE_H_SGML_DOUBLESTRING;
1228			} else if (ch == '\'') {
1229				styler.ColourTo(i - 1, StateToPrint);
1230				state = SCE_H_SGML_SIMPLESTRING;
1231			} else if ((ch == '-') && (chPrev == '-')) {
1232				if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1233					styler.ColourTo(i - 2, StateToPrint);
1234				}
1235				state = SCE_H_SGML_COMMENT;
1236			} else if (IsASCII(ch) && isalpha(ch) && (chPrev == '%')) {
1237				styler.ColourTo(i - 2, StateToPrint);
1238				state = SCE_H_SGML_ENTITY;
1239			} else if (ch == '#') {
1240				styler.ColourTo(i - 1, StateToPrint);
1241				state = SCE_H_SGML_SPECIAL;
1242			} else if (ch == '[') {
1243				styler.ColourTo(i - 1, StateToPrint);
1244				scriptLanguage = eScriptSGMLblock;
1245				state = SCE_H_SGML_BLOCK_DEFAULT;
1246			} else if (ch == ']') {
1247				if (scriptLanguage == eScriptSGMLblock) {
1248					styler.ColourTo(i, StateToPrint);
1249					scriptLanguage = eScriptSGML;
1250				} else {
1251					styler.ColourTo(i - 1, StateToPrint);
1252					styler.ColourTo(i, SCE_H_SGML_ERROR);
1253				}
1254				state = SCE_H_SGML_DEFAULT;
1255			} else if (scriptLanguage == eScriptSGMLblock) {
1256				if ((ch == '!') && (chPrev == '<')) {
1257					styler.ColourTo(i - 2, StateToPrint);
1258					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1259					state = SCE_H_SGML_COMMAND;
1260				} else if (ch == '>') {
1261					styler.ColourTo(i - 1, StateToPrint);
1262					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1263				}
1264			}
1265			break;
1266		case SCE_H_SGML_COMMAND:
1267			if ((ch == '-') && (chPrev == '-')) {
1268				styler.ColourTo(i - 2, StateToPrint);
1269				state = SCE_H_SGML_COMMENT;
1270			} else if (!issgmlwordchar(ch)) {
1271				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1272					styler.ColourTo(i - 1, StateToPrint);
1273					state = SCE_H_SGML_1ST_PARAM;
1274				} else {
1275					state = SCE_H_SGML_ERROR;
1276				}
1277			}
1278			break;
1279		case SCE_H_SGML_1ST_PARAM:
1280			// wait for the beginning of the word
1281			if ((ch == '-') && (chPrev == '-')) {
1282				if (scriptLanguage == eScriptSGMLblock) {
1283					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1284				} else {
1285					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1286				}
1287				state = SCE_H_SGML_1ST_PARAM_COMMENT;
1288			} else if (issgmlwordchar(ch)) {
1289				if (scriptLanguage == eScriptSGMLblock) {
1290					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1291				} else {
1292					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1293				}
1294				// find the length of the word
1295				int size = 1;
1296				while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1297					size++;
1298				styler.ColourTo(i + size - 1, StateToPrint);
1299				i += size - 1;
1300				visibleChars += size - 1;
1301				ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1302				if (scriptLanguage == eScriptSGMLblock) {
1303					state = SCE_H_SGML_BLOCK_DEFAULT;
1304				} else {
1305					state = SCE_H_SGML_DEFAULT;
1306				}
1307				continue;
1308			}
1309			break;
1310		case SCE_H_SGML_ERROR:
1311			if ((ch == '-') && (chPrev == '-')) {
1312				styler.ColourTo(i - 2, StateToPrint);
1313				state = SCE_H_SGML_COMMENT;
1314			}
1315			break;
1316		case SCE_H_SGML_DOUBLESTRING:
1317			if (ch == '\"') {
1318				styler.ColourTo(i, StateToPrint);
1319				state = SCE_H_SGML_DEFAULT;
1320			}
1321			break;
1322		case SCE_H_SGML_SIMPLESTRING:
1323			if (ch == '\'') {
1324				styler.ColourTo(i, StateToPrint);
1325				state = SCE_H_SGML_DEFAULT;
1326			}
1327			break;
1328		case SCE_H_SGML_COMMENT:
1329			if ((ch == '-') && (chPrev == '-')) {
1330				styler.ColourTo(i, StateToPrint);
1331				state = SCE_H_SGML_DEFAULT;
1332			}
1333			break;
1334		case SCE_H_CDATA:
1335			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1336				styler.ColourTo(i, StateToPrint);
1337				state = SCE_H_DEFAULT;
1338				levelCurrent--;
1339			}
1340			break;
1341		case SCE_H_COMMENT:
1342			if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1343				styler.ColourTo(i, StateToPrint);
1344				state = SCE_H_DEFAULT;
1345				levelCurrent--;
1346			}
1347			break;
1348		case SCE_H_SGML_1ST_PARAM_COMMENT:
1349			if ((ch == '-') && (chPrev == '-')) {
1350				styler.ColourTo(i, SCE_H_SGML_COMMENT);
1351				state = SCE_H_SGML_1ST_PARAM;
1352			}
1353			break;
1354		case SCE_H_SGML_SPECIAL:
1355			if (!(IsASCII(ch) && isupper(ch))) {
1356				styler.ColourTo(i - 1, StateToPrint);
1357				if (isalnum(ch)) {
1358					state = SCE_H_SGML_ERROR;
1359				} else {
1360					state = SCE_H_SGML_DEFAULT;
1361				}
1362			}
1363			break;
1364		case SCE_H_SGML_ENTITY:
1365			if (ch == ';') {
1366				styler.ColourTo(i, StateToPrint);
1367				state = SCE_H_SGML_DEFAULT;
1368			} else if (!(IsASCII(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1369				styler.ColourTo(i, SCE_H_SGML_ERROR);
1370				state = SCE_H_SGML_DEFAULT;
1371			}
1372			break;
1373		case SCE_H_ENTITY:
1374			if (ch == ';') {
1375				styler.ColourTo(i, StateToPrint);
1376				state = SCE_H_DEFAULT;
1377			}
1378			if (ch != '#' && !(IsASCII(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1379				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1380				if (!IsASCII(ch))	// Possibly start of a multibyte character so don't allow this byte to be in entity style
1381					styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1382				else
1383					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1384				state = SCE_H_DEFAULT;
1385			}
1386			break;
1387		case SCE_H_TAGUNKNOWN:
1388			if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1389				int eClass = classifyTagHTML(styler.GetStartSegment(),
1390					i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1391				if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1392					if (!tagClosing) {
1393						inScriptType = eNonHtmlScript;
1394						scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1395					} else {
1396						scriptLanguage = eScriptNone;
1397					}
1398					eClass = SCE_H_TAG;
1399				}
1400				if (ch == '>') {
1401					styler.ColourTo(i, eClass);
1402					if (inScriptType == eNonHtmlScript) {
1403						state = StateForScript(scriptLanguage);
1404					} else {
1405						state = SCE_H_DEFAULT;
1406					}
1407					tagOpened = false;
1408					if (!tagDontFold) {
1409						if (tagClosing) {
1410							levelCurrent--;
1411						} else {
1412							levelCurrent++;
1413						}
1414					}
1415					tagClosing = false;
1416				} else if (ch == '/' && chNext == '>') {
1417					if (eClass == SCE_H_TAGUNKNOWN) {
1418						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1419					} else {
1420						styler.ColourTo(i - 1, StateToPrint);
1421						styler.ColourTo(i + 1, SCE_H_TAGEND);
1422					}
1423					i++;
1424					ch = chNext;
1425					state = SCE_H_DEFAULT;
1426					tagOpened = false;
1427				} else {
1428					if (eClass != SCE_H_TAGUNKNOWN) {
1429						if (eClass == SCE_H_SGML_DEFAULT) {
1430							state = SCE_H_SGML_DEFAULT;
1431						} else {
1432							state = SCE_H_OTHER;
1433						}
1434					}
1435				}
1436			}
1437			break;
1438		case SCE_H_ATTRIBUTE:
1439			if (!setAttributeContinue.Contains(ch)) {
1440				if (inScriptType == eNonHtmlScript) {
1441					int scriptLanguagePrev = scriptLanguage;
1442					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1443					scriptLanguage = clientScript;
1444					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1445						inScriptType = eHtml;
1446				}
1447				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1448				if (ch == '>') {
1449					styler.ColourTo(i, SCE_H_TAG);
1450					if (inScriptType == eNonHtmlScript) {
1451						state = StateForScript(scriptLanguage);
1452					} else {
1453						state = SCE_H_DEFAULT;
1454					}
1455					tagOpened = false;
1456					if (!tagDontFold) {
1457						if (tagClosing) {
1458							levelCurrent--;
1459						} else {
1460							levelCurrent++;
1461						}
1462					}
1463					tagClosing = false;
1464				} else if (ch == '=') {
1465					styler.ColourTo(i, SCE_H_OTHER);
1466					state = SCE_H_VALUE;
1467				} else {
1468					state = SCE_H_OTHER;
1469				}
1470			}
1471			break;
1472		case SCE_H_OTHER:
1473			if (ch == '>') {
1474				styler.ColourTo(i - 1, StateToPrint);
1475				styler.ColourTo(i, SCE_H_TAG);
1476				if (inScriptType == eNonHtmlScript) {
1477					state = StateForScript(scriptLanguage);
1478				} else {
1479					state = SCE_H_DEFAULT;
1480				}
1481				tagOpened = false;
1482				if (!tagDontFold) {
1483					if (tagClosing) {
1484						levelCurrent--;
1485					} else {
1486						levelCurrent++;
1487					}
1488				}
1489				tagClosing = false;
1490			} else if (ch == '\"') {
1491				styler.ColourTo(i - 1, StateToPrint);
1492				state = SCE_H_DOUBLESTRING;
1493			} else if (ch == '\'') {
1494				styler.ColourTo(i - 1, StateToPrint);
1495				state = SCE_H_SINGLESTRING;
1496			} else if (ch == '=') {
1497				styler.ColourTo(i, StateToPrint);
1498				state = SCE_H_VALUE;
1499			} else if (ch == '/' && chNext == '>') {
1500				styler.ColourTo(i - 1, StateToPrint);
1501				styler.ColourTo(i + 1, SCE_H_TAGEND);
1502				i++;
1503				ch = chNext;
1504				state = SCE_H_DEFAULT;
1505				tagOpened = false;
1506			} else if (ch == '?' && chNext == '>') {
1507				styler.ColourTo(i - 1, StateToPrint);
1508				styler.ColourTo(i + 1, SCE_H_XMLEND);
1509				i++;
1510				ch = chNext;
1511				state = SCE_H_DEFAULT;
1512			} else if (setHTMLWord.Contains(ch)) {
1513				styler.ColourTo(i - 1, StateToPrint);
1514				state = SCE_H_ATTRIBUTE;
1515			}
1516			break;
1517		case SCE_H_DOUBLESTRING:
1518			if (ch == '\"') {
1519				if (inScriptType == eNonHtmlScript) {
1520					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1521				}
1522				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1523				state = SCE_H_OTHER;
1524			}
1525			break;
1526		case SCE_H_

Large files files are truncated, but you can click here to view the full file