PageRenderTime 260ms CodeModel.GetById 16ms app.highlight 221ms RepoModel.GetById 1ms app.codeStats 1ms

/ext/scintilla/lexers/LexHTML.cxx

http://github.com/jwu/exlibs
C++ | 2077 lines | 1860 code | 111 blank | 106 comment | 1390 complexity | 1de90cd72c96e72f5aa6c5c44c95fedf MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Scintilla source code edit control
   2/** @file LexHTML.cxx
   3 ** Lexer for HTML.
   4 **/
   5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   6// The License.txt file describes the conditions under which this software may be distributed.
   7
   8#include <stdlib.h>
   9#include <string.h>
  10#include <stdio.h>
  11#include <stdarg.h>
  12#include <assert.h>
  13#include <ctype.h>
  14
  15#include "ILexer.h"
  16#include "Scintilla.h"
  17#include "SciLexer.h"
  18
  19#include "PropSetSimple.h"
  20#include "WordList.h"
  21#include "LexAccessor.h"
  22#include "Accessor.h"
  23#include "StyleContext.h"
  24#include "CharacterSet.h"
  25#include "LexerModule.h"
  26
  27#ifdef SCI_NAMESPACE
  28using namespace Scintilla;
  29#endif
  30
  31#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  32#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  33#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  34
  35enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  36enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  37
  38static inline bool IsAWordChar(const int ch) {
  39	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  40}
  41
  42static inline bool IsAWordStart(const int ch) {
  43	return (ch < 0x80) && (isalnum(ch) || ch == '_');
  44}
  45
  46inline bool IsOperator(int ch) {
  47	if (isascii(ch) && isalnum(ch))
  48		return false;
  49	// '.' left out as it is used to make up numbers
  50	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  51	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  52	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  53	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  54	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  55	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
  56		return true;
  57	return false;
  58}
  59
  60static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  61	size_t i = 0;
  62	for (; (i < end - start + 1) && (i < len-1); i++) {
  63		s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  64	}
  65	s[i] = '\0';
  66}
  67
  68static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  69
  70	size_t i = 0;
  71	for (; i < sLen-1; i++) {
  72		char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  73		if ((i == 0) && !IsAWordStart(ch))
  74			break;
  75		if ((i > 0) && !IsAWordChar(ch))
  76			break;
  77		s[i] = ch;
  78	}
  79	s[i] = '\0';
  80
  81	return s;
  82}
  83
  84static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  85	char s[100];
  86	GetTextSegment(styler, start, end, s, sizeof(s));
  87	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  88	if (strstr(s, "src"))	// External script
  89		return eScriptNone;
  90	if (strstr(s, "vbs"))
  91		return eScriptVBS;
  92	if (strstr(s, "pyth"))
  93		return eScriptPython;
  94	if (strstr(s, "javas"))
  95		return eScriptJS;
  96	if (strstr(s, "jscr"))
  97		return eScriptJS;
  98	if (strstr(s, "php"))
  99		return eScriptPHP;
 100	if (strstr(s, "xml")) {
 101		const char *xml = strstr(s, "xml");
 102		for (const char *t=s; t<xml; t++) {
 103			if (!IsASpace(*t)) {
 104				return prevValue;
 105			}
 106		}
 107		return eScriptXML;
 108	}
 109
 110	return prevValue;
 111}
 112
 113static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
 114	int iResult = 0;
 115	char s[100];
 116	GetTextSegment(styler, start, end, s, sizeof(s));
 117	if (0 == strncmp(s, "php", 3)) {
 118		iResult = 3;
 119	}
 120
 121	return iResult;
 122}
 123
 124static script_type ScriptOfState(int state) {
 125	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 126		return eScriptPython;
 127	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 128		return eScriptVBS;
 129	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 130		return eScriptJS;
 131	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
 132		return eScriptPHP;
 133	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
 134		return eScriptSGML;
 135	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
 136		return eScriptSGMLblock;
 137	} else {
 138		return eScriptNone;
 139	}
 140}
 141
 142static int statePrintForState(int state, script_mode inScriptType) {
 143	int StateToPrint = state;
 144
 145	if (state >= SCE_HJ_START) {
 146		if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 147			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 148		} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 149			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 150		} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 151			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 152		}
 153	}
 154
 155	return StateToPrint;
 156}
 157
 158static int stateForPrintState(int StateToPrint) {
 159	int state;
 160
 161	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
 162		state = StateToPrint - SCE_HA_PYTHON;
 163	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
 164		state = StateToPrint - SCE_HA_VBS;
 165	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
 166		state = StateToPrint - SCE_HA_JS;
 167	} else {
 168		state = StateToPrint;
 169	}
 170
 171	return state;
 172}
 173
 174static inline bool IsNumber(unsigned int start, Accessor &styler) {
 175	return IsADigit(styler[start]) || (styler[start] == '.') ||
 176	       (styler[start] == '-') || (styler[start] == '#');
 177}
 178
 179static inline bool isStringState(int state) {
 180	bool bResult;
 181
 182	switch (state) {
 183	case SCE_HJ_DOUBLESTRING:
 184	case SCE_HJ_SINGLESTRING:
 185	case SCE_HJA_DOUBLESTRING:
 186	case SCE_HJA_SINGLESTRING:
 187	case SCE_HB_STRING:
 188	case SCE_HBA_STRING:
 189	case SCE_HP_STRING:
 190	case SCE_HP_CHARACTER:
 191	case SCE_HP_TRIPLE:
 192	case SCE_HP_TRIPLEDOUBLE:
 193	case SCE_HPA_STRING:
 194	case SCE_HPA_CHARACTER:
 195	case SCE_HPA_TRIPLE:
 196	case SCE_HPA_TRIPLEDOUBLE:
 197	case SCE_HPHP_HSTRING:
 198	case SCE_HPHP_SIMPLESTRING:
 199	case SCE_HPHP_HSTRING_VARIABLE:
 200	case SCE_HPHP_COMPLEX_VARIABLE:
 201		bResult = true;
 202		break;
 203	default :
 204		bResult = false;
 205		break;
 206	}
 207	return bResult;
 208}
 209
 210static inline bool stateAllowsTermination(int state) {
 211	bool allowTermination = !isStringState(state);
 212	if (allowTermination) {
 213		switch (state) {
 214		case SCE_HB_COMMENTLINE:
 215		case SCE_HPHP_COMMENT:
 216		case SCE_HP_COMMENTLINE:
 217		case SCE_HPA_COMMENTLINE:
 218			allowTermination = false;
 219		}
 220	}
 221	return allowTermination;
 222}
 223
 224// not really well done, since it's only comments that should lex the %> and <%
 225static inline bool isCommentASPState(int state) {
 226	bool bResult;
 227
 228	switch (state) {
 229	case SCE_HJ_COMMENT:
 230	case SCE_HJ_COMMENTLINE:
 231	case SCE_HJ_COMMENTDOC:
 232	case SCE_HB_COMMENTLINE:
 233	case SCE_HP_COMMENTLINE:
 234	case SCE_HPHP_COMMENT:
 235	case SCE_HPHP_COMMENTLINE:
 236		bResult = true;
 237		break;
 238	default :
 239		bResult = false;
 240		break;
 241	}
 242	return bResult;
 243}
 244
 245static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 246	bool wordIsNumber = IsNumber(start, styler);
 247	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
 248	if (wordIsNumber) {
 249		chAttr = SCE_H_NUMBER;
 250	} else {
 251		char s[100];
 252		GetTextSegment(styler, start, end, s, sizeof(s));
 253		if (keywords.InList(s))
 254			chAttr = SCE_H_ATTRIBUTE;
 255	}
 256	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
 257		// No keywords -> all are known
 258		chAttr = SCE_H_ATTRIBUTE;
 259	styler.ColourTo(end, chAttr);
 260}
 261
 262static int classifyTagHTML(unsigned int start, unsigned int end,
 263                           WordList &keywords, Accessor &styler, bool &tagDontFold,
 264			   bool caseSensitive, bool isXml, bool allowScripts) {
 265	char s[30 + 2];
 266	// Copy after the '<'
 267	unsigned int i = 0;
 268	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
 269		char ch = styler[cPos];
 270		if ((ch != '<') && (ch != '/')) {
 271			s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
 272		}
 273	}
 274
 275	//The following is only a quick hack, to see if this whole thing would work
 276	//we first need the tagname with a trailing space...
 277	s[i] = ' ';
 278	s[i+1] = '\0';
 279
 280	// if the current language is XML, I can fold any tag
 281	// if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
 282	//...to find it in the list of no-container-tags
 283	tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
 284
 285	//now we can remove the trailing space
 286	s[i] = '\0';
 287
 288	// No keywords -> all are known
 289	char chAttr = SCE_H_TAGUNKNOWN;
 290	if (s[0] == '!') {
 291		chAttr = SCE_H_SGML_DEFAULT;
 292	} else if (!keywords || keywords.InList(s)) {
 293		chAttr = SCE_H_TAG;
 294	}
 295	styler.ColourTo(end, chAttr);
 296	if (chAttr == SCE_H_TAG) {
 297		if (allowScripts && 0 == strcmp(s, "script")) {
 298			// check to see if this is a self-closing tag by sniffing ahead
 299			bool isSelfClose = false;
 300			for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
 301				char ch = styler.SafeGetCharAt(cPos, '\0');
 302				if (ch == '\0' || ch == '>')
 303					break;
 304				else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
 305					isSelfClose = true;
 306					break;
 307				}
 308			}
 309
 310			// do not enter a script state if the tag self-closed
 311			if (!isSelfClose)
 312				chAttr = SCE_H_SCRIPT;
 313		} else if (!isXml && 0 == strcmp(s, "comment")) {
 314			chAttr = SCE_H_COMMENT;
 315		}
 316	}
 317	return chAttr;
 318}
 319
 320static void classifyWordHTJS(unsigned int start, unsigned int end,
 321                             WordList &keywords, Accessor &styler, script_mode inScriptType) {
 322	char chAttr = SCE_HJ_WORD;
 323	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 324	if (wordIsNumber)
 325		chAttr = SCE_HJ_NUMBER;
 326	else {
 327		char s[30 + 1];
 328		unsigned int i = 0;
 329		for (; i < end - start + 1 && i < 30; i++) {
 330			s[i] = styler[start + i];
 331		}
 332		s[i] = '\0';
 333		if (keywords.InList(s))
 334			chAttr = SCE_HJ_KEYWORD;
 335	}
 336	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 337}
 338
 339static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
 340	char chAttr = SCE_HB_IDENTIFIER;
 341	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 342	if (wordIsNumber)
 343		chAttr = SCE_HB_NUMBER;
 344	else {
 345		char s[100];
 346		GetTextSegment(styler, start, end, s, sizeof(s));
 347		if (keywords.InList(s)) {
 348			chAttr = SCE_HB_WORD;
 349			if (strcmp(s, "rem") == 0)
 350				chAttr = SCE_HB_COMMENTLINE;
 351		}
 352	}
 353	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 354	if (chAttr == SCE_HB_COMMENTLINE)
 355		return SCE_HB_COMMENTLINE;
 356	else
 357		return SCE_HB_DEFAULT;
 358}
 359
 360static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
 361	bool wordIsNumber = IsADigit(styler[start]);
 362	char s[30 + 1];
 363	unsigned int i = 0;
 364	for (; i < end - start + 1 && i < 30; i++) {
 365		s[i] = styler[start + i];
 366	}
 367	s[i] = '\0';
 368	char chAttr = SCE_HP_IDENTIFIER;
 369	if (0 == strcmp(prevWord, "class"))
 370		chAttr = SCE_HP_CLASSNAME;
 371	else if (0 == strcmp(prevWord, "def"))
 372		chAttr = SCE_HP_DEFNAME;
 373	else if (wordIsNumber)
 374		chAttr = SCE_HP_NUMBER;
 375	else if (keywords.InList(s))
 376		chAttr = SCE_HP_WORD;
 377	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 378	strcpy(prevWord, s);
 379}
 380
 381// Update the word colour to default or keyword
 382// Called when in a PHP word
 383static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 384	char chAttr = SCE_HPHP_DEFAULT;
 385	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
 386	if (wordIsNumber)
 387		chAttr = SCE_HPHP_NUMBER;
 388	else {
 389		char s[100];
 390		GetTextSegment(styler, start, end, s, sizeof(s));
 391		if (keywords.InList(s))
 392			chAttr = SCE_HPHP_WORD;
 393	}
 394	styler.ColourTo(end, chAttr);
 395}
 396
 397static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 398	char s[30 + 1];
 399	unsigned int i = 0;
 400	for (; i < end - start + 1 && i < 30; i++) {
 401		s[i] = styler[start + i];
 402	}
 403	s[i] = '\0';
 404	return keywords.InList(s);
 405}
 406
 407static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
 408	char s[30 + 1];
 409	unsigned int i = 0;
 410	for (; i < end - start + 1 && i < 30; i++) {
 411		s[i] = styler[start + i];
 412	}
 413	s[i] = '\0';
 414	return (0 == strcmp(s, "[CDATA["));
 415}
 416
 417// Return the first state to reach when entering a scripting language
 418static int StateForScript(script_type scriptLanguage) {
 419	int Result;
 420	switch (scriptLanguage) {
 421	case eScriptVBS:
 422		Result = SCE_HB_START;
 423		break;
 424	case eScriptPython:
 425		Result = SCE_HP_START;
 426		break;
 427	case eScriptPHP:
 428		Result = SCE_HPHP_DEFAULT;
 429		break;
 430	case eScriptXML:
 431		Result = SCE_H_TAGUNKNOWN;
 432		break;
 433	case eScriptSGML:
 434		Result = SCE_H_SGML_DEFAULT;
 435		break;
 436	case eScriptComment:
 437		Result = SCE_H_COMMENT;
 438		break;
 439	default :
 440		Result = SCE_HJ_START;
 441		break;
 442	}
 443	return Result;
 444}
 445
 446static inline bool ishtmlwordchar(int ch) {
 447	return !isascii(ch) ||
 448		(isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
 449}
 450
 451static inline bool issgmlwordchar(int ch) {
 452	return !isascii(ch) ||
 453		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
 454}
 455
 456static inline bool IsPhpWordStart(int ch) {
 457	return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
 458}
 459
 460static inline bool IsPhpWordChar(int ch) {
 461	return IsADigit(ch) || IsPhpWordStart(ch);
 462}
 463
 464static bool InTagState(int state) {
 465	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
 466	       state == SCE_H_SCRIPT ||
 467	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
 468	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
 469	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
 470}
 471
 472static bool IsCommentState(const int state) {
 473	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
 474}
 475
 476static bool IsScriptCommentState(const int state) {
 477	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
 478		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
 479}
 480
 481static bool isLineEnd(int ch) {
 482	return ch == '\r' || ch == '\n';
 483}
 484
 485static bool isOKBeforeRE(int ch) {
 486	return (ch == '(') || (ch == '=') || (ch == ',');
 487}
 488
 489static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
 490	if (strlen(blockType) == 0) {
 491		return ((ch == '%') && (chNext == '>'));
 492	} else if ((0 == strcmp(blockType, "inherit")) ||
 493			   (0 == strcmp(blockType, "namespace")) ||
 494			   (0 == strcmp(blockType, "include")) ||
 495			   (0 == strcmp(blockType, "page"))) {
 496		return ((ch == '/') && (chNext == '>'));
 497	} else if (0 == strcmp(blockType, "%")) {
 498		return isLineEnd(ch);
 499	} else if (0 == strcmp(blockType, "{")) {
 500		return ch == '}';
 501	} else {
 502		return (ch == '>');
 503	}
 504}
 505
 506static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
 507	if (strlen(blockType) == 0) {
 508		return 0;
 509	} else if (0 == strcmp(blockType, "%")) {
 510		return ((ch == '%') && (chNext == '}'));
 511	} else if (0 == strcmp(blockType, "{")) {
 512		return ((ch == '}') && (chNext == '}'));
 513	} else {
 514		return 0;
 515	}
 516}
 517
 518static bool isPHPStringState(int state) {
 519	return
 520	    (state == SCE_HPHP_HSTRING) ||
 521	    (state == SCE_HPHP_SIMPLESTRING) ||
 522	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
 523	    (state == SCE_HPHP_COMPLEX_VARIABLE);
 524}
 525
 526static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
 527	int j;
 528	const int beginning = i - 1;
 529	bool isValidSimpleString = false;
 530
 531	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
 532		i++;
 533
 534	char ch = styler.SafeGetCharAt(i);
 535	const char chNext = styler.SafeGetCharAt(i + 1);
 536	if (!IsPhpWordStart(ch)) {
 537		if (ch == '\'' && IsPhpWordStart(chNext)) {
 538			i++;
 539			ch = chNext;
 540			isSimpleString = true;
 541		} else {
 542			phpStringDelimiter[0] = '\0';
 543			return beginning;
 544		}
 545	}
 546	phpStringDelimiter[0] = ch;
 547	i++;
 548
 549	for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
 550		if (!IsPhpWordChar(styler[j])) {
 551			if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
 552				isValidSimpleString = true;
 553				j++;
 554				break;
 555			} else {
 556				phpStringDelimiter[0] = '\0';
 557				return beginning;
 558			}
 559		}
 560		if (j - i < phpStringDelimiterSize - 2)
 561			phpStringDelimiter[j-i+1] = styler[j];
 562		else
 563			i++;
 564	}
 565	if (isSimpleString && !isValidSimpleString) {
 566		phpStringDelimiter[0] = '\0';
 567		return beginning;
 568	}
 569	phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
 570	return j - 1;
 571}
 572
 573static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 574                                  Accessor &styler, bool isXml) {
 575	WordList &keywords = *keywordlists[0];
 576	WordList &keywords2 = *keywordlists[1];
 577	WordList &keywords3 = *keywordlists[2];
 578	WordList &keywords4 = *keywordlists[3];
 579	WordList &keywords5 = *keywordlists[4];
 580	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
 581
 582	// Lexer for HTML requires more lexical states (8 bits worth) than most lexers
 583	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 584	char prevWord[200];
 585	prevWord[0] = '\0';
 586	char phpStringDelimiter[200]; // PHP is not limited in length, we are
 587	phpStringDelimiter[0] = '\0';
 588	int StateToPrint = initStyle;
 589	int state = stateForPrintState(StateToPrint);
 590	char makoBlockType[200];
 591	makoBlockType[0] = '\0';
 592	char djangoBlockType[2];
 593	djangoBlockType[0] = '\0';
 594
 595	// If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
 596	if (InTagState(state)) {
 597		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
 598			startPos--;
 599			length++;
 600		}
 601		state = SCE_H_DEFAULT;
 602	}
 603	// String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
 604	if (isPHPStringState(state)) {
 605		while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
 606			startPos--;
 607			length++;
 608			state = styler.StyleAt(startPos);
 609		}
 610		if (startPos == 0)
 611			state = SCE_H_DEFAULT;
 612	}
 613	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 614
 615	int lineCurrent = styler.GetLine(startPos);
 616	int lineState;
 617	if (lineCurrent > 0) {
 618		lineState = styler.GetLineState(lineCurrent);
 619	} else {
 620		// Default client and ASP scripting language is JavaScript
 621		lineState = eScriptJS << 8;
 622
 623		// property asp.default.language
 624		//	Script in ASP code is initially assumed to be in JavaScript.
 625		//	To change this to VBScript set asp.default.language to 2. Python is 3.
 626		lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
 627	}
 628	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
 629	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
 630	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
 631	bool tagDontFold = false; //some HTML tags should not be folded
 632	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
 633	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
 634	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
 635
 636	script_type scriptLanguage = ScriptOfState(state);
 637	// If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
 638	if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
 639		scriptLanguage = eScriptComment;
 640	}
 641	script_type beforeLanguage = ScriptOfState(beforePreProc);
 642
 643	// property fold.html
 644	//	Folding is turned on or off for HTML and XML files with this option.
 645	//	The fold option must also be on for folding to occur.
 646	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
 647
 648	const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
 649
 650	// property fold.html.preprocessor
 651	//	Folding is turned on or off for scripts embedded in HTML files with this option.
 652	//	The default is on.
 653	const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
 654
 655	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 656
 657	// property fold.hypertext.comment
 658	//	Allow folding for comments in scripts embedded in HTML.
 659	//	The default is off.
 660	const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
 661
 662	// property fold.hypertext.heredoc
 663	//	Allow folding for heredocs in scripts embedded in HTML.
 664	//	The default is off.
 665	const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
 666
 667	// property html.tags.case.sensitive
 668	//	For XML and HTML, setting this property to 1 will make tags match in a case
 669	//	sensitive way which is the expected behaviour for XML and XHTML.
 670	const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
 671
 672	// property lexer.xml.allow.scripts
 673	//	Set to 0 to disable scripts in XML.
 674	const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
 675
 676	// property lexer.html.mako
 677	//	Set to 1 to enable the mako template language.
 678	const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
 679
 680	// property lexer.html.django
 681	//	Set to 1 to enable the django template language.
 682	const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
 683
 684	const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
 685	const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
 686	const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
 687
 688	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
 689	int levelCurrent = levelPrev;
 690	int visibleChars = 0;
 691	int lineStartVisibleChars = 0;
 692
 693	int chPrev = ' ';
 694	int ch = ' ';
 695	int chPrevNonWhite = ' ';
 696	// look back to set chPrevNonWhite properly for better regex colouring
 697	if (scriptLanguage == eScriptJS && startPos > 0) {
 698		int back = startPos;
 699		int style = 0;
 700		while (--back) {
 701			style = styler.StyleAt(back);
 702			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
 703				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
 704				break;
 705		}
 706		if (style == SCE_HJ_SYMBOLS) {
 707			chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
 708		}
 709	}
 710
 711	styler.StartSegment(startPos);
 712	const int lengthDoc = startPos + length;
 713	for (int i = startPos; i < lengthDoc; i++) {
 714		const int chPrev2 = chPrev;
 715		chPrev = ch;
 716		if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
 717			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
 718			chPrevNonWhite = ch;
 719		ch = static_cast<unsigned char>(styler[i]);
 720		int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
 721		const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
 722
 723		// Handle DBCS codepages
 724		if (styler.IsLeadByte(static_cast<char>(ch))) {
 725			chPrev = ' ';
 726			i += 1;
 727			continue;
 728		}
 729
 730		if ((!IsASpace(ch) || !foldCompact) && fold)
 731			visibleChars++;
 732		if (!IsASpace(ch))
 733			lineStartVisibleChars++;
 734
 735		// decide what is the current state to print (depending of the script tag)
 736		StateToPrint = statePrintForState(state, inScriptType);
 737
 738		// handle script folding
 739		if (fold) {
 740			switch (scriptLanguage) {
 741			case eScriptJS:
 742			case eScriptPHP:
 743				//not currently supported				case eScriptVBS:
 744
 745				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
 746				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
 747				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
 748					if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
 749						levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
 750					}
 751				} else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
 752					levelCurrent--;
 753				}
 754				break;
 755			case eScriptPython:
 756				if (state != SCE_HP_COMMENTLINE) {
 757					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
 758						levelCurrent++;
 759					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
 760						// check if the number of tabs is lower than the level
 761						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
 762						for (int j = 0; Findlevel > 0; j++) {
 763							char chTmp = styler.SafeGetCharAt(i + j + 1);
 764							if (chTmp == '\t') {
 765								Findlevel -= 8;
 766							} else if (chTmp == ' ') {
 767								Findlevel--;
 768							} else {
 769								break;
 770							}
 771						}
 772
 773						if (Findlevel > 0) {
 774							levelCurrent -= Findlevel / 8;
 775							if (Findlevel % 8)
 776								levelCurrent--;
 777						}
 778					}
 779				}
 780				break;
 781			default:
 782				break;
 783			}
 784		}
 785
 786		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 787			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 788			// Avoid triggering two times on Dos/Win
 789			// New line -> record any line state onto /next/ line
 790			if (fold) {
 791				int lev = levelPrev;
 792				if (visibleChars == 0)
 793					lev |= SC_FOLDLEVELWHITEFLAG;
 794				if ((levelCurrent > levelPrev) && (visibleChars > 0))
 795					lev |= SC_FOLDLEVELHEADERFLAG;
 796
 797				styler.SetLevel(lineCurrent, lev);
 798				visibleChars = 0;
 799				levelPrev = levelCurrent;
 800			}
 801			lineCurrent++;
 802			lineStartVisibleChars = 0;
 803			styler.SetLineState(lineCurrent,
 804			                    ((inScriptType & 0x03) << 0) |
 805			                    ((tagOpened & 0x01) << 2) |
 806			                    ((tagClosing & 0x01) << 3) |
 807			                    ((aspScript & 0x0F) << 4) |
 808			                    ((clientScript & 0x0F) << 8) |
 809			                    ((beforePreProc & 0xFF) << 12));
 810		}
 811
 812		// Allow falling through to mako handling code if newline is going to end a block
 813		if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
 814			(!isMako || (0 != strcmp(makoBlockType, "%")))) {
 815		}
 816
 817		// generic end of script processing
 818		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
 819			// Check if it's the end of the script tag (or any other HTML tag)
 820			switch (state) {
 821				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
 822			case SCE_H_DOUBLESTRING:
 823			case SCE_H_SINGLESTRING:
 824			case SCE_HJ_COMMENT:
 825			case SCE_HJ_COMMENTDOC:
 826			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
 827			// the end of script marker from some JS interpreters.
 828			case SCE_HB_COMMENTLINE:
 829			case SCE_HBA_COMMENTLINE:
 830			case SCE_HJ_DOUBLESTRING:
 831			case SCE_HJ_SINGLESTRING:
 832			case SCE_HJ_REGEX:
 833			case SCE_HB_STRING:
 834			case SCE_HBA_STRING:
 835			case SCE_HP_STRING:
 836			case SCE_HP_TRIPLE:
 837			case SCE_HP_TRIPLEDOUBLE:
 838			case SCE_HPHP_HSTRING:
 839			case SCE_HPHP_SIMPLESTRING:
 840			case SCE_HPHP_COMMENT:
 841			case SCE_HPHP_COMMENTLINE:
 842				break;
 843			default :
 844				// check if the closing tag is a script tag
 845				if (const char *tag =
 846						state == SCE_HJ_COMMENTLINE || isXml ? "script" :
 847						state == SCE_H_COMMENT ? "comment" : 0) {
 848					int j = i + 2;
 849					int chr;
 850					do {
 851						chr = static_cast<int>(*tag++);
 852					} while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
 853					if (chr != 0) break;
 854				}
 855				// closing tag of the script (it's a closing HTML tag anyway)
 856				styler.ColourTo(i - 1, StateToPrint);
 857				state = SCE_H_TAGUNKNOWN;
 858				inScriptType = eHtml;
 859				scriptLanguage = eScriptNone;
 860				clientScript = eScriptJS;
 861				i += 2;
 862				visibleChars += 2;
 863				tagClosing = true;
 864				continue;
 865			}
 866		}
 867
 868		/////////////////////////////////////
 869		// handle the start of PHP pre-processor = Non-HTML
 870		else if ((state != SCE_H_ASPAT) &&
 871		         !isPHPStringState(state) &&
 872		         (state != SCE_HPHP_COMMENT) &&
 873		         (state != SCE_HPHP_COMMENTLINE) &&
 874		         (ch == '<') &&
 875		         (chNext == '?') &&
 876				 !IsScriptCommentState(state) ) {
 877			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
 878			if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
 879			styler.ColourTo(i - 1, StateToPrint);
 880			beforePreProc = state;
 881			i++;
 882			visibleChars++;
 883			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
 884			if (scriptLanguage == eScriptXML)
 885				styler.ColourTo(i, SCE_H_XMLSTART);
 886			else
 887				styler.ColourTo(i, SCE_H_QUESTION);
 888			state = StateForScript(scriptLanguage);
 889			if (inScriptType == eNonHtmlScript)
 890				inScriptType = eNonHtmlScriptPreProc;
 891			else
 892				inScriptType = eNonHtmlPreProc;
 893			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
 894			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 895				levelCurrent++;
 896			}
 897			// should be better
 898			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 899			continue;
 900		}
 901
 902		// handle the start Mako template Python code
 903		else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
 904															 (lineStartVisibleChars == 1 && ch == '%') ||
 905															 (ch == '$' && chNext == '{') ||
 906															 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
 907			if (ch == '%')
 908				strcpy(makoBlockType, "%");
 909			else if (ch == '$')
 910				strcpy(makoBlockType, "{");
 911			else if (chNext == '/')
 912				GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
 913			else
 914				GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
 915			styler.ColourTo(i - 1, StateToPrint);
 916			beforePreProc = state;
 917			if (inScriptType == eNonHtmlScript)
 918				inScriptType = eNonHtmlScriptPreProc;
 919			else
 920				inScriptType = eNonHtmlPreProc;
 921
 922			if (chNext == '/') {
 923				i += 2;
 924				visibleChars += 2;
 925			} else if (ch != '%') {
 926				i++;
 927				visibleChars++;
 928			}
 929			state = SCE_HP_START;
 930			scriptLanguage = eScriptPython;
 931			styler.ColourTo(i, SCE_H_ASP);
 932			if (foldHTMLPreprocessor && ch == '<')
 933				levelCurrent++;
 934
 935			if (ch != '%' && ch != '$') {
 936				i += strlen(makoBlockType);
 937				visibleChars += strlen(makoBlockType);
 938				if (keywords4.InList(makoBlockType))
 939					styler.ColourTo(i, SCE_HP_WORD);
 940				else
 941					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
 942			}
 943
 944			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 945			continue;
 946		}
 947
 948		// handle the start Django template code
 949		else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' ||  chNext == '{'))) {
 950			if (chNext == '%')
 951				strcpy(djangoBlockType, "%");
 952			else
 953				strcpy(djangoBlockType, "{");
 954			styler.ColourTo(i - 1, StateToPrint);
 955			beforePreProc = state;
 956			if (inScriptType == eNonHtmlScript)
 957				inScriptType = eNonHtmlScriptPreProc;
 958			else
 959				inScriptType = eNonHtmlPreProc;
 960
 961			i += 1;
 962			visibleChars += 1;
 963			state = SCE_HP_START;
 964			beforeLanguage = scriptLanguage;
 965			scriptLanguage = eScriptPython;
 966			styler.ColourTo(i, SCE_H_ASP);
 967			if (foldHTMLPreprocessor && chNext == '%')
 968				levelCurrent++;
 969
 970			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 971			continue;
 972		}
 973
 974		// handle the start of ASP pre-processor = Non-HTML
 975		else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
 976			styler.ColourTo(i - 1, StateToPrint);
 977			beforePreProc = state;
 978			if (inScriptType == eNonHtmlScript)
 979				inScriptType = eNonHtmlScriptPreProc;
 980			else
 981				inScriptType = eNonHtmlPreProc;
 982
 983			if (chNext2 == '@') {
 984				i += 2; // place as if it was the second next char treated
 985				visibleChars += 2;
 986				state = SCE_H_ASPAT;
 987			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
 988				styler.ColourTo(i + 3, SCE_H_ASP);
 989				state = SCE_H_XCCOMMENT;
 990				scriptLanguage = eScriptVBS;
 991				continue;
 992			} else {
 993				if (chNext2 == '=') {
 994					i += 2; // place as if it was the second next char treated
 995					visibleChars += 2;
 996				} else {
 997					i++; // place as if it was the next char treated
 998					visibleChars++;
 999				}
1000
1001				state = StateForScript(aspScript);
1002			}
1003			scriptLanguage = eScriptVBS;
1004			styler.ColourTo(i, SCE_H_ASP);
1005			// fold whole script
1006			if (foldHTMLPreprocessor)
1007				levelCurrent++;
1008			// should be better
1009			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1010			continue;
1011		}
1012
1013		/////////////////////////////////////
1014		// handle the start of SGML language (DTD)
1015		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1016				 (chPrev == '<') &&
1017				 (ch == '!') &&
1018				 (StateToPrint != SCE_H_CDATA) &&
1019				 (!IsCommentState(StateToPrint)) &&
1020				 (!IsScriptCommentState(StateToPrint)) ) {
1021			beforePreProc = state;
1022			styler.ColourTo(i - 2, StateToPrint);
1023			if ((chNext == '-') && (chNext2 == '-')) {
1024				state = SCE_H_COMMENT; // wait for a pending command
1025				styler.ColourTo(i + 2, SCE_H_COMMENT);
1026				i += 2; // follow styling after the --
1027			} else if (isWordCdata(i + 1, i + 7, styler)) {
1028				state = SCE_H_CDATA;
1029			} else {
1030				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1031				scriptLanguage = eScriptSGML;
1032				state = SCE_H_SGML_COMMAND; // wait for a pending command
1033			}
1034			// fold whole tag (-- when closing the tag)
1035			if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
1036				levelCurrent++;
1037			continue;
1038		}
1039
1040		// handle the end of Mako Python code
1041		else if (isMako &&
1042			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1043				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1044				 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1045			if (state == SCE_H_ASPAT) {
1046				aspScript = segIsScriptingIndicator(styler,
1047				                                    styler.GetStartSegment(), i - 1, aspScript);
1048			}
1049			if (state == SCE_HP_WORD) {
1050				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1051			} else {
1052				styler.ColourTo(i - 1, StateToPrint);
1053			}
1054			if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1055				i++;
1056				visibleChars++;
1057		    }
1058			if (0 != strcmp(makoBlockType, "%")) {
1059				styler.ColourTo(i, SCE_H_ASP);
1060			}
1061			state = beforePreProc;
1062			if (inScriptType == eNonHtmlScriptPreProc)
1063				inScriptType = eNonHtmlScript;
1064			else
1065				inScriptType = eHtml;
1066			if (foldHTMLPreprocessor && ch != '\n' && ch != '\r') {
1067				levelCurrent--;
1068			}
1069			scriptLanguage = eScriptNone;
1070			continue;
1071		}
1072
1073		// handle the end of Django template code
1074		else if (isDjango &&
1075			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1076				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1077				 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1078			if (state == SCE_H_ASPAT) {
1079				aspScript = segIsScriptingIndicator(styler,
1080				                                    styler.GetStartSegment(), i - 1, aspScript);
1081			}
1082			if (state == SCE_HP_WORD) {
1083				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1084			} else {
1085				styler.ColourTo(i - 1, StateToPrint);
1086			}
1087			i += 1;
1088			visibleChars += 1;
1089			styler.ColourTo(i, SCE_H_ASP);
1090			state = beforePreProc;
1091			if (inScriptType == eNonHtmlScriptPreProc)
1092				inScriptType = eNonHtmlScript;
1093			else
1094				inScriptType = eHtml;
1095			if (foldHTMLPreprocessor) {
1096				levelCurrent--;
1097			}
1098			scriptLanguage = beforeLanguage;
1099			continue;
1100		}
1101
1102		// handle the end of a pre-processor = Non-HTML
1103		else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1104				  (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1105				  (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1106		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1107			if (state == SCE_H_ASPAT) {
1108				aspScript = segIsScriptingIndicator(styler,
1109				                                    styler.GetStartSegment(), i - 1, aspScript);
1110			}
1111			// Bounce out of any ASP mode
1112			switch (state) {
1113			case SCE_HJ_WORD:
1114				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1115				break;
1116			case SCE_HB_WORD:
1117				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1118				break;
1119			case SCE_HP_WORD:
1120				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1121				break;
1122			case SCE_HPHP_WORD:
1123				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1124				break;
1125			case SCE_H_XCCOMMENT:
1126				styler.ColourTo(i - 1, state);
1127				break;
1128			default :
1129				styler.ColourTo(i - 1, StateToPrint);
1130				break;
1131			}
1132			if (scriptLanguage != eScriptSGML) {
1133				i++;
1134				visibleChars++;
1135			}
1136			if (ch == '%')
1137				styler.ColourTo(i, SCE_H_ASP);
1138			else if (scriptLanguage == eScriptXML)
1139				styler.ColourTo(i, SCE_H_XMLEND);
1140			else if (scriptLanguage == eScriptSGML)
1141				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1142			else
1143				styler.ColourTo(i, SCE_H_QUESTION);
1144			state = beforePreProc;
1145			if (inScriptType == eNonHtmlScriptPreProc)
1146				inScriptType = eNonHtmlScript;
1147			else
1148				inScriptType = eHtml;
1149			// Unfold all scripting languages, except for XML tag
1150			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1151				levelCurrent--;
1152			}
1153			scriptLanguage = eScriptNone;
1154			continue;
1155		}
1156		/////////////////////////////////////
1157
1158		switch (state) {
1159		case SCE_H_DEFAULT:
1160			if (ch == '<') {
1161				// in HTML, fold on tag open and unfold on tag close
1162				tagOpened = true;
1163				tagClosing = (chNext == '/');
1164				styler.ColourTo(i - 1, StateToPrint);
1165				if (chNext != '!')
1166					state = SCE_H_TAGUNKNOWN;
1167			} else if (ch == '&') {
1168				styler.ColourTo(i - 1, SCE_H_DEFAULT);
1169				state = SCE_H_ENTITY;
1170			}
1171			break;
1172		case SCE_H_SGML_DEFAULT:
1173		case SCE_H_SGML_BLOCK_DEFAULT:
1174//			if (scriptLanguage == eScriptSGMLblock)
1175//				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1176
1177			if (ch == '\"') {
1178				styler.ColourTo(i - 1, StateToPrint);
1179				state = SCE_H_SGML_DOUBLESTRING;
1180			} else if (ch == '\'') {
1181				styler.ColourTo(i - 1, StateToPrint);
1182				state = SCE_H_SGML_SIMPLESTRING;
1183			} else if ((ch == '-') && (chPrev == '-')) {
1184				if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1185					styler.ColourTo(i - 2, StateToPrint);
1186				}
1187				state = SCE_H_SGML_COMMENT;
1188			} else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
1189				styler.ColourTo(i - 2, StateToPrint);
1190				state = SCE_H_SGML_ENTITY;
1191			} else if (ch == '#') {
1192				styler.ColourTo(i - 1, StateToPrint);
1193				state = SCE_H_SGML_SPECIAL;
1194			} else if (ch == '[') {
1195				styler.ColourTo(i - 1, StateToPrint);
1196				scriptLanguage = eScriptSGMLblock;
1197				state = SCE_H_SGML_BLOCK_DEFAULT;
1198			} else if (ch == ']') {
1199				if (scriptLanguage == eScriptSGMLblock) {
1200					styler.ColourTo(i, StateToPrint);
1201					scriptLanguage = eScriptSGML;
1202				} else {
1203					styler.ColourTo(i - 1, StateToPrint);
1204					styler.ColourTo(i, SCE_H_SGML_ERROR);
1205				}
1206				state = SCE_H_SGML_DEFAULT;
1207			} else if (scriptLanguage == eScriptSGMLblock) {
1208				if ((ch == '!') && (chPrev == '<')) {
1209					styler.ColourTo(i - 2, StateToPrint);
1210					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1211					state = SCE_H_SGML_COMMAND;
1212				} else if (ch == '>') {
1213					styler.ColourTo(i - 1, StateToPrint);
1214					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1215				}
1216			}
1217			break;
1218		case SCE_H_SGML_COMMAND:
1219			if ((ch == '-') && (chPrev == '-')) {
1220				styler.ColourTo(i - 2, StateToPrint);
1221				state = SCE_H_SGML_COMMENT;
1222			} else if (!issgmlwordchar(ch)) {
1223				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1224					styler.ColourTo(i - 1, StateToPrint);
1225					state = SCE_H_SGML_1ST_PARAM;
1226				} else {
1227					state = SCE_H_SGML_ERROR;
1228				}
1229			}
1230			break;
1231		case SCE_H_SGML_1ST_PARAM:
1232			// wait for the beginning of the word
1233			if ((ch == '-') && (chPrev == '-')) {
1234				if (scriptLanguage == eScriptSGMLblock) {
1235					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1236				} else {
1237					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1238				}
1239				state = SCE_H_SGML_1ST_PARAM_COMMENT;
1240			} else if (issgmlwordchar(ch)) {
1241				if (scriptLanguage == eScriptSGMLblock) {
1242					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1243				} else {
1244					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1245				}
1246				// find the length of the word
1247				int size = 1;
1248				while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1249					size++;
1250				styler.ColourTo(i + size - 1, StateToPrint);
1251				i += size - 1;
1252				visibleChars += size - 1;
1253				ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1254				if (scriptLanguage == eScriptSGMLblock) {
1255					state = SCE_H_SGML_BLOCK_DEFAULT;
1256				} else {
1257					state = SCE_H_SGML_DEFAULT;
1258				}
1259				continue;
1260			}
1261			break;
1262		case SCE_H_SGML_ERROR:
1263			if ((ch == '-') && (chPrev == '-')) {
1264				styler.ColourTo(i - 2, StateToPrint);
1265				state = SCE_H_SGML_COMMENT;
1266			}
1267		case SCE_H_SGML_DOUBLESTRING:
1268			if (ch == '\"') {
1269				styler.ColourTo(i, StateToPrint);
1270				state = SCE_H_SGML_DEFAULT;
1271			}
1272			break;
1273		case SCE_H_SGML_SIMPLESTRING:
1274			if (ch == '\'') {
1275				styler.ColourTo(i, StateToPrint);
1276				state = SCE_H_SGML_DEFAULT;
1277			}
1278			break;
1279		case SCE_H_SGML_COMMENT:
1280			if ((ch == '-') && (chPrev == '-')) {
1281				styler.ColourTo(i, StateToPrint);
1282				state = SCE_H_SGML_DEFAULT;
1283			}
1284			break;
1285		case SCE_H_CDATA:
1286			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1287				styler.ColourTo(i, StateToPrint);
1288				state = SCE_H_DEFAULT;
1289				levelCurrent--;
1290			}
1291			break;
1292		case SCE_H_COMMENT:
1293			if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1294				styler.ColourTo(i, StateToPrint);
1295				state = SCE_H_DEFAULT;
1296				levelCurrent--;
1297			}
1298			break;
1299		case SCE_H_SGML_1ST_PARAM_COMMENT:
1300			if ((ch == '-') && (chPrev == '-')) {
1301				styler.ColourTo(i, SCE_H_SGML_COMMENT);
1302				state = SCE_H_SGML_1ST_PARAM;
1303			}
1304			break;
1305		case SCE_H_SGML_SPECIAL:
1306			if (!(isascii(ch) && isupper(ch))) {
1307				styler.ColourTo(i - 1, StateToPrint);
1308				if (isalnum(ch)) {
1309					state = SCE_H_SGML_ERROR;
1310				} else {
1311					state = SCE_H_SGML_DEFAULT;
1312				}
1313			}
1314			break;
1315		case SCE_H_SGML_ENTITY:
1316			if (ch == ';') {
1317				styler.ColourTo(i, StateToPrint);
1318				state = SCE_H_SGML_DEFAULT;
1319			} else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1320				styler.ColourTo(i, SCE_H_SGML_ERROR);
1321				state = SCE_H_SGML_DEFAULT;
1322			}
1323			break;
1324		case SCE_H_ENTITY:
1325			if (ch == ';') {
1326				styler.ColourTo(i, StateToPrint);
1327				state = SCE_H_DEFAULT;
1328			}
1329			if (ch != '#' && !(isascii(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1330				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1331				if (!isascii(ch))	// Possibly start of a multibyte character so don't allow this byte to be in entity style
1332					styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1333				else
1334					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1335				state = SCE_H_DEFAULT;
1336			}
1337			break;
1338		case SCE_H_TAGUNKNOWN:
1339			if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1340				int eClass = classifyTagHTML(styler.GetStartSegment(),
1341					i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1342				if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1343					if (!tagClosing) {
1344						inScriptType = eNonHtmlScript;
1345						scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1346					} else {
1347						scriptLanguage = eScriptNone;
1348					}
1349					eClass = SCE_H_TAG;
1350				}
1351				if (ch == '>') {
1352					styler.ColourTo(i, eClass);
1353					if (inScriptType == eNonHtmlScript) {
1354						state = StateForScript(scriptLanguage);
1355					} else {
1356						state = SCE_H_DEFAULT;
1357					}
1358					tagOpened = false;
1359					if (!tagDontFold) {
1360						if (tagClosing) {
1361							levelCurrent--;
1362						} else {
1363							levelCurrent++;
1364						}
1365					}
1366					tagClosing = false;
1367				} else if (ch == '/' && chNext == '>') {
1368					if (eClass == SCE_H_TAGUNKNOWN) {
1369						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1370					} else {
1371						styler.ColourTo(i - 1, StateToPrint);
1372						styler.ColourTo(i + 1, SCE_H_TAGEND);
1373					}
1374					i++;
1375					ch = chNext;
1376					state = SCE_H_DEFAULT;
1377					tagOpened = false;
1378				} else {
1379					if (eClass != SCE_H_TAGUNKNOWN) {
1380						if (eClass == SCE_H_SGML_DEFAULT) {
1381							state = SCE_H_SGML_DEFAULT;
1382						} else {
1383							state = SCE_H_OTHER;
1384						}
1385					}
1386				}
1387			}
1388			break;
1389		case SCE_H_ATTRIBUTE:
1390			if (!setAttributeContinue.Contains(ch)) {
1391				if (inScriptType == eNonHtmlScript) {
1392					int scriptLanguagePrev = scriptLanguage;
1393					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1394					scriptLanguage = clientScript;
1395					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1396						inScriptType = eHtml;
1397				}
1398				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1399				if (ch == '>') {
1400					styler.ColourTo(i, SCE_H_TAG);
1401					if (inScriptType == eNonHtmlScript) {
1402						state = StateForScript(scriptLanguage);
1403					} else {
1404						state = SCE_H_DEFAULT;
1405					}
1406					tagOpened = false;
1407					if (!tagDontFold) {
1408						if (tagClosing) {
1409							levelCurrent--;
1410						} else {
1411							levelCurrent++;
1412						}
1413					}
1414					tagClosing = false;
1415				} else if (ch == '=') {
1416					styler.ColourTo(i, SCE_H_OTHER);
1417					state = SCE_H_VALUE;
1418				} else {
1419					state = SCE_H_OTHER;
1420				}
1421			}
1422			break;
1423		case SCE_H_OTHER:
1424			if (ch == '>') {
1425				styler.ColourTo(i - 1, StateToPrint);
1426				styler.ColourTo(i, SCE_H_TAG);
1427				if (inScriptType == eNonHtmlScript) {
1428					state = StateForScript(scriptLanguage);
1429				} else {
1430					state = SCE_H_DEFAULT;
1431				}
1432				tagOpened = false;
1433				if (!tagDontFold) {
1434					if (tagClosing) {
1435						levelCurrent--;
1436					} else {
1437						levelCurrent++;
1438					}
1439				}
1440				tagClosing = false;
1441			} else if (ch == '\"') {
1442				styler.ColourTo(i - 1, StateToPrint);
1443				state = SCE_H_DOUBLESTRING;
1444			} else if (ch == '\'') {
1445				styler.ColourTo(i - 1, StateToPrint);
1446				state = SCE_H_SINGLESTRING;
1447			} else if (ch == '=') {
1448				styler.ColourTo(i, StateToPrint);
1449				state = SCE_H_VALUE;
1450			} else if (ch == '/' && chNext == '>') {
1451				styler.ColourTo(i - 1, StateToPrint);
1452				styler.ColourTo(i + 1, SCE_H_TAGEND);
1453				i++;
1454				ch = chNext;
1455				state = SCE_H_DEFAULT;
1456				tagOpened = false;
1457			} else if (ch == '?' && chNext == '>') {
1458				styler.ColourTo(i - 1, StateToPrint);
1459				styler.ColourTo(i + 1, SCE_H_XMLEND);
1460				i++;
1461				ch = chNext;
1462				state = SCE_H_DEFAULT;
1463			} else if (setHTMLWord.Contains(ch)) {
1464				styler.ColourTo(i - 1, StateToPrint);
1465				state = SCE_H_ATTRIBUTE;
1466			}
1467			break;
1468		case SCE_H_DOUBLESTRING:
1469			if (ch == '\"') {
1470				if (inScriptType == eNonHtmlScript) {
1471					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1472				}
1473				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1474				state = SCE_H_OTHER;
1475			}
1476			break;
1477		case SCE_H_SINGLESTRING:
1478			if (ch == '\'') {
1479				if (inScriptType == eNonHtmlScript) {
1480					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1481				}
1482				styler.ColourTo(i, SCE_H_SINGLESTRING);
1483				state = SCE_H_OTHER;
1484			}
1485			break;
1486		case SCE_H_VALUE:
1487			if (!setHTMLWord.Contains(ch)) {
1488				if (ch == '\"' && chPrev == '=') {
1489					// Should really test for being first character
1490					state = SCE_H_DOUBLESTRING;
1491				} else if (ch == '\'' && chPrev == '=') {
1492					state = SCE_H_SINGLESTRING;
1493				} else {
1494					if (IsNumber(styler.GetStartSegment(), styler)) {
1495						styler.ColourTo(i - 1, SCE_H_NUMBER);
1496					} else {
1497						styler.ColourTo(i - 1, StateToPrint);
1498					}
1499					if (ch == '>') {
1500						styler.ColourTo(i, SCE_H_TAG);
1501						if (inScriptType == eNonHtmlScript) {
1502							state = StateForScript(scriptLanguage);
1503						} else {
1504							state = SCE_H_DEFAULT;
1505						}
1506						tagOpened = false;
1507						if (!tagDontFold) {
1508							if (tagClosing) {
1509								levelCurrent--;
1510							} else {
1511								levelCurrent++;
1512							}
1513						}
1514						tagClosing = false;
1515					} else {
1516						state = SCE_H_OTHER;
1517					}
1518				}
1519			}
1520			break;
1521		case SCE_HJ_DEFAULT:
1522		case SCE_HJ_START:
1523		case SCE_HJ_SYMBOLS:
1524			if (IsAWordStart(ch)) {
1525				styler.ColourTo(i - 1, StateToPrint);
1526				state = SCE_HJ_WORD;
1527			} else if (ch == '/' && chNext == '*') {
1528				styler.ColourTo(i - 1, StateToPrint);
1529				if (chNext2 == '*')
1530					state = SCE_HJ_COMMENTDOC;
1531				else
1532					state = SCE_HJ_COMMENT;
1533			} else if (ch == '/' && chNext == '/') {
1534				styler.ColourTo(i - 1, StateToPrint);
1535				state = SCE_HJ_COMMENTLINE;
1536			} else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1537				styler.ColourTo(i - 1, StateToPrint);
1538				state = SCE_HJ_REGEX;
1539			} else if (ch == '\"') {
1540				styler.ColourTo(i - 1, StateToPrint);
1541				state = SCE_HJ_DOUBLESTRING;
1542			} else if (ch == '\'') {
1543				styler.ColourTo(i - 1, StateToPrint);
1544				state = SCE_HJ_SINGLESTRING;
1545			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1546			           styler.SafeGetCharAt(i + 3) == '-') {
1547				styler.ColourTo(i - 1, StateToPrint);
1548				state = SCE_HJ_COMMENTLINE;
1549			} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1550				styler.ColourTo(i - 1, StateToPrint);
1551				state = SCE_HJ_COMMENTLINE;
1552				i += 2;
1553			} else if (IsOperator(ch)) {
1554				styler.ColourTo(i - 1, StateToPrint);
1555				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1556				state = SCE_HJ_DEFAULT;
1557			} else if ((ch == ' ') || (ch == '\t')) {
1558				if (state == SCE_HJ_START) {
1559					styler.ColourTo(i - 1, StateToPrint);
1560					state = SCE_HJ_DEFAULT;
1561				}
1562			}
1563			break;
1564		case SCE_HJ_WORD:
1565			if (!IsAWordChar(ch)) {
1566				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1567				//styler.ColourTo(i - 1, eHTJSKeyword);
1568				state = SCE_HJ_DEFAULT;
1569				if (ch == '/' && chNext == '*') {
1570					if (chNext2 == '*')
1571						state = SCE_HJ_COMMENTDOC;
1572					else
1573						state = SCE_HJ_COMMENT;
1574				} else if (ch == '/' && chNext == '/') {
1575					state = SCE_HJ_COMMENTLINE;
1576				} else if (ch == '\"') {
1577					state = SCE_HJ_DOUBLESTRING;
1578				} else if (ch == '\'') {
1579					state = SCE_HJ_SINGLESTRING;
1580				} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1581					styler.ColourTo(i - 1, StateToPrint);
1582					state = SCE_HJ_COMMENTLINE;
1583					i += 2;
1584				} else if (IsOperator(ch)) {
1585					styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1586					state = SCE_HJ_DEF

Large files files are truncated, but you can click here to view the full file