PageRenderTime 152ms CodeModel.GetById 15ms app.highlight 122ms RepoModel.GetById 1ms app.codeStats 1ms

/Pythonwin/Scintilla/src/LexHTML.cxx

https://bitbucket.org/jaraco/pywin32
C++ | 2184 lines | 1990 code | 106 blank | 88 comment | 1458 complexity | d39eb8a9411121905b14120d6b27385f MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Scintilla source code edit control
   2/** @file LexHTML.cxx
   3 ** Lexer for HTML.
   4 **/
   5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   6// The License.txt file describes the conditions under which this software may be distributed.
   7
   8#include <stdlib.h>
   9#include <string.h>
  10#include <ctype.h>
  11#include <stdio.h>
  12#include <stdarg.h>
  13
  14#include "Platform.h"
  15
  16#include "PropSet.h"
  17#include "Accessor.h"
  18#include "StyleContext.h"
  19#include "KeyWords.h"
  20#include "Scintilla.h"
  21#include "SciLexer.h"
  22#include "CharacterSet.h"
  23
  24#ifdef SCI_NAMESPACE
  25using namespace Scintilla;
  26#endif
  27
  28#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  29#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  30#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  31
  32enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  33enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  34
  35static inline bool IsAWordChar(const int ch) {
  36	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  37}
  38
  39static inline bool IsAWordStart(const int ch) {
  40	return (ch < 0x80) && (isalnum(ch) || ch == '_');
  41}
  42
  43inline bool IsOperator(int ch) {
  44	if (isascii(ch) && isalnum(ch))
  45		return false;
  46	// '.' left out as it is used to make up numbers
  47	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  48	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  49	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  50	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  51	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  52	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
  53		return true;
  54	return false;
  55}
  56
  57static inline int MakeLowerCase(int ch) {
  58	if (ch < 'A' || ch > 'Z')
  59		return ch;
  60	else
  61		return ch - 'A' + 'a';
  62}
  63
  64static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  65	size_t i = 0;
  66	for (; (i < end - start + 1) && (i < len-1); i++) {
  67		s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  68	}
  69	s[i] = '\0';
  70}
  71
  72static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  73	char s[100];
  74	GetTextSegment(styler, start, end, s, sizeof(s));
  75	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  76	if (strstr(s, "src"))	// External script
  77		return eScriptNone;
  78	if (strstr(s, "vbs"))
  79		return eScriptVBS;
  80	if (strstr(s, "pyth"))
  81		return eScriptPython;
  82	if (strstr(s, "javas"))
  83		return eScriptJS;
  84	if (strstr(s, "jscr"))
  85		return eScriptJS;
  86	if (strstr(s, "php"))
  87		return eScriptPHP;
  88	if (strstr(s, "xml")) {
  89		const char *xml = strstr(s, "xml");
  90		for (const char *t=s; t<xml; t++) {
  91			if (!IsASpace(*t)) {
  92				return prevValue;
  93			}
  94		}
  95		return eScriptXML;
  96	}
  97
  98	return prevValue;
  99}
 100
 101static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
 102	int iResult = 0;
 103	char s[100];
 104	GetTextSegment(styler, start, end, s, sizeof(s));
 105	if (0 == strncmp(s, "php", 3)) {
 106		iResult = 3;
 107	}
 108
 109	return iResult;
 110}
 111
 112static script_type ScriptOfState(int state) {
 113	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 114		return eScriptPython;
 115	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 116		return eScriptVBS;
 117	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 118		return eScriptJS;
 119	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
 120		return eScriptPHP;
 121	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
 122		return eScriptSGML;
 123	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
 124		return eScriptSGMLblock;
 125	} else {
 126		return eScriptNone;
 127	}
 128}
 129
 130static int statePrintForState(int state, script_mode inScriptType) {
 131	int StateToPrint = state;
 132
 133	if (state >= SCE_HJ_START) {
 134		if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 135			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 136		} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 137			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 138		} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 139			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 140		}
 141	}
 142
 143	return StateToPrint;
 144}
 145
 146static int stateForPrintState(int StateToPrint) {
 147	int state;
 148
 149	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
 150		state = StateToPrint - SCE_HA_PYTHON;
 151	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
 152		state = StateToPrint - SCE_HA_VBS;
 153	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
 154		state = StateToPrint - SCE_HA_JS;
 155	} else {
 156		state = StateToPrint;
 157	}
 158
 159	return state;
 160}
 161
 162static inline bool IsNumber(unsigned int start, Accessor &styler) {
 163	return IsADigit(styler[start]) || (styler[start] == '.') ||
 164	       (styler[start] == '-') || (styler[start] == '#');
 165}
 166
 167static inline bool isStringState(int state) {
 168	bool bResult;
 169
 170	switch (state) {
 171	case SCE_HJ_DOUBLESTRING:
 172	case SCE_HJ_SINGLESTRING:
 173	case SCE_HJA_DOUBLESTRING:
 174	case SCE_HJA_SINGLESTRING:
 175	case SCE_HB_STRING:
 176	case SCE_HBA_STRING:
 177	case SCE_HP_STRING:
 178	case SCE_HP_CHARACTER:
 179	case SCE_HP_TRIPLE:
 180	case SCE_HP_TRIPLEDOUBLE:
 181	case SCE_HPA_STRING:
 182	case SCE_HPA_CHARACTER:
 183	case SCE_HPA_TRIPLE:
 184	case SCE_HPA_TRIPLEDOUBLE:
 185	case SCE_HPHP_HSTRING:
 186	case SCE_HPHP_SIMPLESTRING:
 187	case SCE_HPHP_HSTRING_VARIABLE:
 188	case SCE_HPHP_COMPLEX_VARIABLE:
 189		bResult = true;
 190		break;
 191	default :
 192		bResult = false;
 193		break;
 194	}
 195	return bResult;
 196}
 197
 198static inline bool stateAllowsTermination(int state) {
 199	bool allowTermination = !isStringState(state);
 200	if (allowTermination) {
 201		switch (state) {
 202		case SCE_HB_COMMENTLINE:
 203		case SCE_HPHP_COMMENT:
 204		case SCE_HP_COMMENTLINE:
 205		case SCE_HPA_COMMENTLINE:
 206			allowTermination = false;
 207		}
 208	}
 209	return allowTermination;
 210}
 211
 212// not really well done, since it's only comments that should lex the %> and <%
 213static inline bool isCommentASPState(int state) {
 214	bool bResult;
 215
 216	switch (state) {
 217	case SCE_HJ_COMMENT:
 218	case SCE_HJ_COMMENTLINE:
 219	case SCE_HJ_COMMENTDOC:
 220	case SCE_HB_COMMENTLINE:
 221	case SCE_HP_COMMENTLINE:
 222	case SCE_HPHP_COMMENT:
 223	case SCE_HPHP_COMMENTLINE:
 224		bResult = true;
 225		break;
 226	default :
 227		bResult = false;
 228		break;
 229	}
 230	return bResult;
 231}
 232
 233static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 234	bool wordIsNumber = IsNumber(start, styler);
 235	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
 236	if (wordIsNumber) {
 237		chAttr = SCE_H_NUMBER;
 238	} else {
 239		char s[100];
 240		GetTextSegment(styler, start, end, s, sizeof(s));
 241		if (keywords.InList(s))
 242			chAttr = SCE_H_ATTRIBUTE;
 243	}
 244	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
 245		// No keywords -> all are known
 246		chAttr = SCE_H_ATTRIBUTE;
 247	styler.ColourTo(end, chAttr);
 248}
 249
 250static int classifyTagHTML(unsigned int start, unsigned int end,
 251                           WordList &keywords, Accessor &styler, bool &tagDontFold,
 252			   bool caseSensitive, bool isXml, bool allowScripts) {
 253	char s[30 + 2];
 254	// Copy after the '<'
 255	unsigned int i = 0;
 256	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
 257		char ch = styler[cPos];
 258		if ((ch != '<') && (ch != '/')) {
 259			s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
 260		}
 261	}
 262
 263	//The following is only a quick hack, to see if this whole thing would work
 264	//we first need the tagname with a trailing space...
 265	s[i] = ' ';
 266	s[i+1] = '\0';
 267
 268	// if the current language is XML, I can fold any tag
 269	// if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
 270	//...to find it in the list of no-container-tags
 271	tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
 272
 273	//now we can remove the trailing space
 274	s[i] = '\0';
 275
 276	// No keywords -> all are known
 277	// Name of a closing tag starts at s + 1
 278	char chAttr = SCE_H_TAGUNKNOWN;
 279	if (s[0] == '!') {
 280		chAttr = SCE_H_SGML_DEFAULT;
 281	} else if (!keywords || keywords.InList(s[0] == '/' ? s + 1 : s)) {
 282		chAttr = SCE_H_TAG;
 283	}
 284	styler.ColourTo(end, chAttr);
 285	if (chAttr == SCE_H_TAG) {
 286		if (allowScripts && 0 == strcmp(s, "script")) {
 287			chAttr = SCE_H_SCRIPT;
 288		} else if (!isXml && 0 == strcmp(s, "comment")) {
 289			chAttr = SCE_H_COMMENT;
 290		}
 291	}
 292	return chAttr;
 293}
 294
 295static void classifyWordHTJS(unsigned int start, unsigned int end,
 296                             WordList &keywords, Accessor &styler, script_mode inScriptType) {
 297	char chAttr = SCE_HJ_WORD;
 298	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 299	if (wordIsNumber)
 300		chAttr = SCE_HJ_NUMBER;
 301	else {
 302		char s[30 + 1];
 303		unsigned int i = 0;
 304		for (; i < end - start + 1 && i < 30; i++) {
 305			s[i] = styler[start + i];
 306		}
 307		s[i] = '\0';
 308		if (keywords.InList(s))
 309			chAttr = SCE_HJ_KEYWORD;
 310	}
 311	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 312}
 313
 314static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
 315	char chAttr = SCE_HB_IDENTIFIER;
 316	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 317	if (wordIsNumber)
 318		chAttr = SCE_HB_NUMBER;
 319	else {
 320		char s[100];
 321		GetTextSegment(styler, start, end, s, sizeof(s));
 322		if (keywords.InList(s)) {
 323			chAttr = SCE_HB_WORD;
 324			if (strcmp(s, "rem") == 0)
 325				chAttr = SCE_HB_COMMENTLINE;
 326		}
 327	}
 328	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 329	if (chAttr == SCE_HB_COMMENTLINE)
 330		return SCE_HB_COMMENTLINE;
 331	else
 332		return SCE_HB_DEFAULT;
 333}
 334
 335static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
 336	bool wordIsNumber = IsADigit(styler[start]);
 337	char s[30 + 1];
 338	unsigned int i = 0;
 339	for (; i < end - start + 1 && i < 30; i++) {
 340		s[i] = styler[start + i];
 341	}
 342	s[i] = '\0';
 343	char chAttr = SCE_HP_IDENTIFIER;
 344	if (0 == strcmp(prevWord, "class"))
 345		chAttr = SCE_HP_CLASSNAME;
 346	else if (0 == strcmp(prevWord, "def"))
 347		chAttr = SCE_HP_DEFNAME;
 348	else if (wordIsNumber)
 349		chAttr = SCE_HP_NUMBER;
 350	else if (keywords.InList(s))
 351		chAttr = SCE_HP_WORD;
 352	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 353	strcpy(prevWord, s);
 354}
 355
 356// Update the word colour to default or keyword
 357// Called when in a PHP word
 358static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 359	char chAttr = SCE_HPHP_DEFAULT;
 360	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
 361	if (wordIsNumber)
 362		chAttr = SCE_HPHP_NUMBER;
 363	else {
 364		char s[100];
 365		GetTextSegment(styler, start, end, s, sizeof(s));
 366		if (keywords.InList(s))
 367			chAttr = SCE_HPHP_WORD;
 368	}
 369	styler.ColourTo(end, chAttr);
 370}
 371
 372static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 373	char s[30 + 1];
 374	unsigned int i = 0;
 375	for (; i < end - start + 1 && i < 30; i++) {
 376		s[i] = styler[start + i];
 377	}
 378	s[i] = '\0';
 379	return keywords.InList(s);
 380}
 381
 382static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
 383	char s[30 + 1];
 384	unsigned int i = 0;
 385	for (; i < end - start + 1 && i < 30; i++) {
 386		s[i] = styler[start + i];
 387	}
 388	s[i] = '\0';
 389	return (0 == strcmp(s, "[CDATA["));
 390}
 391
 392// Return the first state to reach when entering a scripting language
 393static int StateForScript(script_type scriptLanguage) {
 394	int Result;
 395	switch (scriptLanguage) {
 396	case eScriptVBS:
 397		Result = SCE_HB_START;
 398		break;
 399	case eScriptPython:
 400		Result = SCE_HP_START;
 401		break;
 402	case eScriptPHP:
 403		Result = SCE_HPHP_DEFAULT;
 404		break;
 405	case eScriptXML:
 406		Result = SCE_H_TAGUNKNOWN;
 407		break;
 408	case eScriptSGML:
 409		Result = SCE_H_SGML_DEFAULT;
 410		break;
 411	case eScriptComment:
 412		Result = SCE_H_COMMENT;
 413		break;
 414	default :
 415		Result = SCE_HJ_START;
 416		break;
 417	}
 418	return Result;
 419}
 420
 421static inline bool ishtmlwordchar(int ch) {
 422	return !isascii(ch) ||
 423		(isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
 424}
 425
 426static inline bool issgmlwordchar(int ch) {
 427	return !isascii(ch) ||
 428		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
 429}
 430
 431static inline bool IsPhpWordStart(int ch) {
 432	return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
 433}
 434
 435static inline bool IsPhpWordChar(int ch) {
 436	return IsADigit(ch) || IsPhpWordStart(ch);
 437}
 438
 439static bool InTagState(int state) {
 440	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
 441	       state == SCE_H_SCRIPT ||
 442	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
 443	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
 444	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
 445}
 446
 447static bool IsCommentState(const int state) {
 448	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
 449}
 450
 451static bool IsScriptCommentState(const int state) {
 452	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
 453		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
 454}
 455
 456static bool isLineEnd(int ch) {
 457	return ch == '\r' || ch == '\n';
 458}
 459
 460static bool isOKBeforeRE(int ch) {
 461	return (ch == '(') || (ch == '=') || (ch == ',');
 462}
 463
 464static bool isPHPStringState(int state) {
 465	return
 466	    (state == SCE_HPHP_HSTRING) ||
 467	    (state == SCE_HPHP_SIMPLESTRING) ||
 468	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
 469	    (state == SCE_HPHP_COMPLEX_VARIABLE);
 470}
 471
 472static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
 473	int j;
 474	const int beginning = i - 1;
 475	bool isValidSimpleString = false;
 476
 477	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
 478		i++;
 479
 480	char ch = styler.SafeGetCharAt(i);
 481	const char chNext = styler.SafeGetCharAt(i + 1);
 482	if (!IsPhpWordStart(ch)) {
 483		if (ch == '\'' && IsPhpWordStart(chNext)) {
 484			i++;
 485			ch = chNext;
 486			isSimpleString = true;
 487		} else {
 488			phpStringDelimiter[0] = '\0';
 489			return beginning;
 490		}
 491	}
 492	phpStringDelimiter[0] = ch;
 493	i++;
 494
 495	for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
 496		if (!IsPhpWordChar(styler[j])) {
 497			if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
 498				isValidSimpleString = true;
 499				j++;
 500				break;
 501			} else {
 502				phpStringDelimiter[0] = '\0';
 503				return beginning;
 504			}
 505		}
 506		if (j - i < phpStringDelimiterSize - 2)
 507			phpStringDelimiter[j-i+1] = styler[j];
 508		else
 509			i++;
 510	}
 511	if (isSimpleString && !isValidSimpleString) {
 512		phpStringDelimiter[0] = '\0';
 513		return beginning;
 514	}
 515	phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
 516	return j - 1;
 517}
 518
 519static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 520                                  Accessor &styler, bool isXml) {
 521	WordList &keywords = *keywordlists[0];
 522	WordList &keywords2 = *keywordlists[1];
 523	WordList &keywords3 = *keywordlists[2];
 524	WordList &keywords4 = *keywordlists[3];
 525	WordList &keywords5 = *keywordlists[4];
 526	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
 527
 528	// Lexer for HTML requires more lexical states (8 bits worth) than most lexers
 529	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 530	char prevWord[200];
 531	prevWord[0] = '\0';
 532	char phpStringDelimiter[200]; // PHP is not limited in length, we are
 533	phpStringDelimiter[0] = '\0';
 534	int StateToPrint = initStyle;
 535	int state = stateForPrintState(StateToPrint);
 536
 537	// If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
 538	if (InTagState(state)) {
 539		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
 540			startPos--;
 541			length++;
 542		}
 543		state = SCE_H_DEFAULT;
 544	}
 545	// String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
 546	if (isPHPStringState(state)) {
 547		while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
 548			startPos--;
 549			length++;
 550			state = styler.StyleAt(startPos);
 551		}
 552		if (startPos == 0)
 553			state = SCE_H_DEFAULT;
 554	}
 555	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 556
 557	int lineCurrent = styler.GetLine(startPos);
 558	int lineState;
 559	if (lineCurrent > 0) {
 560		lineState = styler.GetLineState(lineCurrent);
 561	} else {
 562		// Default client and ASP scripting language is JavaScript
 563		lineState = eScriptJS << 8;
 564		lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
 565	}
 566	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
 567	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
 568	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
 569	bool tagDontFold = false; //some HTML tags should not be folded
 570	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
 571	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
 572	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
 573
 574	script_type scriptLanguage = ScriptOfState(state);
 575	// If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
 576	if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
 577		scriptLanguage = eScriptComment;
 578	}
 579
 580	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
 581	const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
 582	const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
 583	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 584	const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
 585	const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
 586	const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
 587	const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
 588
 589	const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
 590	const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
 591	const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
 592
 593	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
 594	int levelCurrent = levelPrev;
 595	int visibleChars = 0;
 596
 597	int chPrev = ' ';
 598	int ch = ' ';
 599	int chPrevNonWhite = ' ';
 600	// look back to set chPrevNonWhite properly for better regex colouring
 601	if (scriptLanguage == eScriptJS && startPos > 0) {
 602		int back = startPos;
 603		int style = 0;
 604		while (--back) {
 605			style = styler.StyleAt(back);
 606			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
 607				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
 608				break;
 609		}
 610		if (style == SCE_HJ_SYMBOLS) {
 611			chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
 612		}
 613	}
 614
 615	styler.StartSegment(startPos);
 616	const int lengthDoc = startPos + length;
 617	for (int i = startPos; i < lengthDoc; i++) {
 618		const int chPrev2 = chPrev;
 619		chPrev = ch;
 620		if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
 621			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
 622			chPrevNonWhite = ch;
 623		ch = static_cast<unsigned char>(styler[i]);
 624		int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
 625		const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
 626
 627		// Handle DBCS codepages
 628		if (styler.IsLeadByte(static_cast<char>(ch))) {
 629			chPrev = ' ';
 630			i += 1;
 631			continue;
 632		}
 633
 634		if ((!IsASpace(ch) || !foldCompact) && fold)
 635			visibleChars++;
 636
 637		// decide what is the current state to print (depending of the script tag)
 638		StateToPrint = statePrintForState(state, inScriptType);
 639
 640		// handle script folding
 641		if (fold) {
 642			switch (scriptLanguage) {
 643			case eScriptJS:
 644			case eScriptPHP:
 645				//not currently supported				case eScriptVBS:
 646
 647				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
 648				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
 649				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
 650					if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
 651						levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
 652					}
 653				} else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
 654					levelCurrent--;
 655				}
 656				break;
 657			case eScriptPython:
 658				if (state != SCE_HP_COMMENTLINE) {
 659					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
 660						levelCurrent++;
 661					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
 662						// check if the number of tabs is lower than the level
 663						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
 664						for (int j = 0; Findlevel > 0; j++) {
 665							char chTmp = styler.SafeGetCharAt(i + j + 1);
 666							if (chTmp == '\t') {
 667								Findlevel -= 8;
 668							} else if (chTmp == ' ') {
 669								Findlevel--;
 670							} else {
 671								break;
 672							}
 673						}
 674
 675						if (Findlevel > 0) {
 676							levelCurrent -= Findlevel / 8;
 677							if (Findlevel % 8)
 678								levelCurrent--;
 679						}
 680					}
 681				}
 682				break;
 683			default:
 684				break;
 685			}
 686		}
 687
 688		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 689			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 690			// Avoid triggering two times on Dos/Win
 691			// New line -> record any line state onto /next/ line
 692			if (fold) {
 693				int lev = levelPrev;
 694				if (visibleChars == 0)
 695					lev |= SC_FOLDLEVELWHITEFLAG;
 696				if ((levelCurrent > levelPrev) && (visibleChars > 0))
 697					lev |= SC_FOLDLEVELHEADERFLAG;
 698
 699				styler.SetLevel(lineCurrent, lev);
 700				visibleChars = 0;
 701				levelPrev = levelCurrent;
 702			}
 703			lineCurrent++;
 704			styler.SetLineState(lineCurrent,
 705			                    ((inScriptType & 0x03) << 0) |
 706			                    ((tagOpened & 0x01) << 2) |
 707			                    ((tagClosing & 0x01) << 3) |
 708			                    ((aspScript & 0x0F) << 4) |
 709			                    ((clientScript & 0x0F) << 8) |
 710			                    ((beforePreProc & 0xFF) << 12));
 711		}
 712
 713		// generic end of script processing
 714		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
 715			// Check if it's the end of the script tag (or any other HTML tag)
 716			switch (state) {
 717				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
 718			case SCE_H_DOUBLESTRING:
 719			case SCE_H_SINGLESTRING:
 720			case SCE_HJ_COMMENT:
 721			case SCE_HJ_COMMENTDOC:
 722			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
 723			// the end of script marker from some JS interpreters.
 724			case SCE_HB_COMMENTLINE:
 725			case SCE_HBA_COMMENTLINE:
 726			case SCE_HJ_DOUBLESTRING:
 727			case SCE_HJ_SINGLESTRING:
 728			case SCE_HJ_REGEX:
 729			case SCE_HB_STRING:
 730			case SCE_HBA_STRING:
 731			case SCE_HP_STRING:
 732			case SCE_HP_TRIPLE:
 733			case SCE_HP_TRIPLEDOUBLE:
 734			case SCE_HPHP_HSTRING:
 735			case SCE_HPHP_SIMPLESTRING:
 736			case SCE_HPHP_COMMENT:
 737			case SCE_HPHP_COMMENTLINE:
 738				break;
 739			default :
 740				// check if the closing tag is a script tag
 741				if (const char *tag =
 742						state == SCE_HJ_COMMENTLINE || isXml ? "script" :
 743						state == SCE_H_COMMENT ? "comment" : 0) {
 744					int j = i + 2;
 745					int chr;
 746					do {
 747						chr = static_cast<int>(*tag++);
 748					} while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
 749					if (chr != 0) break;
 750				}
 751				// closing tag of the script (it's a closing HTML tag anyway)
 752				styler.ColourTo(i - 1, StateToPrint);
 753				state = SCE_H_TAGUNKNOWN;
 754				inScriptType = eHtml;
 755				scriptLanguage = eScriptNone;
 756				clientScript = eScriptJS;
 757				i += 2;
 758				visibleChars += 2;
 759				tagClosing = true;
 760				continue;
 761			}
 762		}
 763
 764		/////////////////////////////////////
 765		// handle the start of PHP pre-processor = Non-HTML
 766		else if ((state != SCE_H_ASPAT) &&
 767		         !isPHPStringState(state) &&
 768		         (state != SCE_HPHP_COMMENT) &&
 769		         (ch == '<') &&
 770		         (chNext == '?') &&
 771				 !IsScriptCommentState(state) ) {
 772			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
 773			if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
 774			styler.ColourTo(i - 1, StateToPrint);
 775			beforePreProc = state;
 776			i++;
 777			visibleChars++;
 778			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
 779			if (scriptLanguage == eScriptXML)
 780				styler.ColourTo(i, SCE_H_XMLSTART);
 781			else
 782				styler.ColourTo(i, SCE_H_QUESTION);
 783			state = StateForScript(scriptLanguage);
 784			if (inScriptType == eNonHtmlScript)
 785				inScriptType = eNonHtmlScriptPreProc;
 786			else
 787				inScriptType = eNonHtmlPreProc;
 788			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
 789			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 790				levelCurrent++;
 791			}
 792			// should be better
 793			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 794			continue;
 795		}
 796
 797		// handle the start of ASP pre-processor = Non-HTML
 798		else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
 799			styler.ColourTo(i - 1, StateToPrint);
 800			beforePreProc = state;
 801			if (inScriptType == eNonHtmlScript)
 802				inScriptType = eNonHtmlScriptPreProc;
 803			else
 804				inScriptType = eNonHtmlPreProc;
 805
 806			if (chNext2 == '@') {
 807				i += 2; // place as if it was the second next char treated
 808				visibleChars += 2;
 809				state = SCE_H_ASPAT;
 810			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
 811				styler.ColourTo(i + 3, SCE_H_ASP);
 812				state = SCE_H_XCCOMMENT;
 813				scriptLanguage = eScriptVBS;
 814				continue;
 815			} else {
 816				if (chNext2 == '=') {
 817					i += 2; // place as if it was the second next char treated
 818					visibleChars += 2;
 819				} else {
 820					i++; // place as if it was the next char treated
 821					visibleChars++;
 822				}
 823
 824				state = StateForScript(aspScript);
 825			}
 826			scriptLanguage = eScriptVBS;
 827			styler.ColourTo(i, SCE_H_ASP);
 828			// fold whole script
 829			if (foldHTMLPreprocessor)
 830				levelCurrent++;
 831			// should be better
 832			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 833			continue;
 834		}
 835
 836		/////////////////////////////////////
 837		// handle the start of SGML language (DTD)
 838		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
 839				 (chPrev == '<') &&
 840				 (ch == '!') &&
 841				 (StateToPrint != SCE_H_CDATA) &&
 842				 (!IsCommentState(StateToPrint)) &&
 843				 (!IsScriptCommentState(StateToPrint)) ) {
 844			beforePreProc = state;
 845			styler.ColourTo(i - 2, StateToPrint);
 846			if ((chNext == '-') && (chNext2 == '-')) {
 847				state = SCE_H_COMMENT; // wait for a pending command
 848				styler.ColourTo(i + 2, SCE_H_COMMENT);
 849				i += 2; // follow styling after the --
 850			} else if (isWordCdata(i + 1, i + 7, styler)) {
 851				state = SCE_H_CDATA;
 852			} else {
 853				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
 854				scriptLanguage = eScriptSGML;
 855				state = SCE_H_SGML_COMMAND; // wait for a pending command
 856			}
 857			// fold whole tag (-- when closing the tag)
 858			if (foldHTMLPreprocessor)
 859				levelCurrent++;
 860			continue;
 861		}
 862
 863		// handle the end of a pre-processor = Non-HTML
 864		else if ((
 865		             ((inScriptType == eNonHtmlPreProc)
 866		              || (inScriptType == eNonHtmlScriptPreProc)) && (
 867		                 ((scriptLanguage != eScriptNone) && stateAllowsTermination(state) && ((ch == '%') || (ch == '?')))
 868		             ) && (chNext == '>')) ||
 869		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
 870			if (state == SCE_H_ASPAT) {
 871				aspScript = segIsScriptingIndicator(styler,
 872				                                    styler.GetStartSegment(), i - 1, aspScript);
 873			}
 874			// Bounce out of any ASP mode
 875			switch (state) {
 876			case SCE_HJ_WORD:
 877				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
 878				break;
 879			case SCE_HB_WORD:
 880				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
 881				break;
 882			case SCE_HP_WORD:
 883				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
 884				break;
 885			case SCE_HPHP_WORD:
 886				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
 887				break;
 888			case SCE_H_XCCOMMENT:
 889				styler.ColourTo(i - 1, state);
 890				break;
 891			default :
 892				styler.ColourTo(i - 1, StateToPrint);
 893				break;
 894			}
 895			if (scriptLanguage != eScriptSGML) {
 896				i++;
 897				visibleChars++;
 898			}
 899			if (ch == '%')
 900				styler.ColourTo(i, SCE_H_ASP);
 901			else if (scriptLanguage == eScriptXML)
 902				styler.ColourTo(i, SCE_H_XMLEND);
 903			else if (scriptLanguage == eScriptSGML)
 904				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 905			else
 906				styler.ColourTo(i, SCE_H_QUESTION);
 907			state = beforePreProc;
 908			if (inScriptType == eNonHtmlScriptPreProc)
 909				inScriptType = eNonHtmlScript;
 910			else
 911				inScriptType = eHtml;
 912			// Unfold all scripting languages, except for XML tag
 913			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 914				levelCurrent--;
 915			}
 916			scriptLanguage = eScriptNone;
 917			continue;
 918		}
 919		/////////////////////////////////////
 920
 921		switch (state) {
 922		case SCE_H_DEFAULT:
 923			if (ch == '<') {
 924				// in HTML, fold on tag open and unfold on tag close
 925				tagOpened = true;
 926				tagClosing = (chNext == '/');
 927				styler.ColourTo(i - 1, StateToPrint);
 928				if (chNext != '!')
 929					state = SCE_H_TAGUNKNOWN;
 930			} else if (ch == '&') {
 931				styler.ColourTo(i - 1, SCE_H_DEFAULT);
 932				state = SCE_H_ENTITY;
 933			}
 934			break;
 935		case SCE_H_SGML_DEFAULT:
 936		case SCE_H_SGML_BLOCK_DEFAULT:
 937//			if (scriptLanguage == eScriptSGMLblock)
 938//				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
 939
 940			if (ch == '\"') {
 941				styler.ColourTo(i - 1, StateToPrint);
 942				state = SCE_H_SGML_DOUBLESTRING;
 943			} else if (ch == '\'') {
 944				styler.ColourTo(i - 1, StateToPrint);
 945				state = SCE_H_SGML_SIMPLESTRING;
 946			} else if ((ch == '-') && (chPrev == '-')) {
 947				if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
 948					styler.ColourTo(i - 2, StateToPrint);
 949				}
 950				state = SCE_H_SGML_COMMENT;
 951			} else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
 952				styler.ColourTo(i - 2, StateToPrint);
 953				state = SCE_H_SGML_ENTITY;
 954			} else if (ch == '#') {
 955				styler.ColourTo(i - 1, StateToPrint);
 956				state = SCE_H_SGML_SPECIAL;
 957			} else if (ch == '[') {
 958				styler.ColourTo(i - 1, StateToPrint);
 959				scriptLanguage = eScriptSGMLblock;
 960				state = SCE_H_SGML_BLOCK_DEFAULT;
 961			} else if (ch == ']') {
 962				if (scriptLanguage == eScriptSGMLblock) {
 963					styler.ColourTo(i, StateToPrint);
 964					scriptLanguage = eScriptSGML;
 965				} else {
 966					styler.ColourTo(i - 1, StateToPrint);
 967					styler.ColourTo(i, SCE_H_SGML_ERROR);
 968				}
 969				state = SCE_H_SGML_DEFAULT;
 970			} else if (scriptLanguage == eScriptSGMLblock) {
 971				if ((ch == '!') && (chPrev == '<')) {
 972					styler.ColourTo(i - 2, StateToPrint);
 973					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 974					state = SCE_H_SGML_COMMAND;
 975				} else if (ch == '>') {
 976					styler.ColourTo(i - 1, StateToPrint);
 977					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 978				}
 979			}
 980			break;
 981		case SCE_H_SGML_COMMAND:
 982			if ((ch == '-') && (chPrev == '-')) {
 983				styler.ColourTo(i - 2, StateToPrint);
 984				state = SCE_H_SGML_COMMENT;
 985			} else if (!issgmlwordchar(ch)) {
 986				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
 987					styler.ColourTo(i - 1, StateToPrint);
 988					state = SCE_H_SGML_1ST_PARAM;
 989				} else {
 990					state = SCE_H_SGML_ERROR;
 991				}
 992			}
 993			break;
 994		case SCE_H_SGML_1ST_PARAM:
 995			// wait for the beginning of the word
 996			if ((ch == '-') && (chPrev == '-')) {
 997				if (scriptLanguage == eScriptSGMLblock) {
 998					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
 999				} else {
1000					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1001				}
1002				state = SCE_H_SGML_1ST_PARAM_COMMENT;
1003			} else if (issgmlwordchar(ch)) {
1004				if (scriptLanguage == eScriptSGMLblock) {
1005					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1006				} else {
1007					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1008				}
1009				// find the length of the word
1010				int size = 1;
1011				while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1012					size++;
1013				styler.ColourTo(i + size - 1, StateToPrint);
1014				i += size - 1;
1015				visibleChars += size - 1;
1016				ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1017				if (scriptLanguage == eScriptSGMLblock) {
1018					state = SCE_H_SGML_BLOCK_DEFAULT;
1019				} else {
1020					state = SCE_H_SGML_DEFAULT;
1021				}
1022				continue;
1023			}
1024			break;
1025		case SCE_H_SGML_ERROR:
1026			if ((ch == '-') && (chPrev == '-')) {
1027				styler.ColourTo(i - 2, StateToPrint);
1028				state = SCE_H_SGML_COMMENT;
1029			}
1030		case SCE_H_SGML_DOUBLESTRING:
1031			if (ch == '\"') {
1032				styler.ColourTo(i, StateToPrint);
1033				state = SCE_H_SGML_DEFAULT;
1034			}
1035			break;
1036		case SCE_H_SGML_SIMPLESTRING:
1037			if (ch == '\'') {
1038				styler.ColourTo(i, StateToPrint);
1039				state = SCE_H_SGML_DEFAULT;
1040			}
1041			break;
1042		case SCE_H_SGML_COMMENT:
1043			if ((ch == '-') && (chPrev == '-')) {
1044				styler.ColourTo(i, StateToPrint);
1045				state = SCE_H_SGML_DEFAULT;
1046			}
1047			break;
1048		case SCE_H_CDATA:
1049			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1050				styler.ColourTo(i, StateToPrint);
1051				state = SCE_H_DEFAULT;
1052				levelCurrent--;
1053			}
1054			break;
1055		case SCE_H_COMMENT:
1056			if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1057				styler.ColourTo(i, StateToPrint);
1058				state = SCE_H_DEFAULT;
1059				levelCurrent--;
1060			}
1061			break;
1062		case SCE_H_SGML_1ST_PARAM_COMMENT:
1063			if ((ch == '-') && (chPrev == '-')) {
1064				styler.ColourTo(i, SCE_H_SGML_COMMENT);
1065				state = SCE_H_SGML_1ST_PARAM;
1066			}
1067			break;
1068		case SCE_H_SGML_SPECIAL:
1069			if (!(isascii(ch) && isupper(ch))) {
1070				styler.ColourTo(i - 1, StateToPrint);
1071				if (isalnum(ch)) {
1072					state = SCE_H_SGML_ERROR;
1073				} else {
1074					state = SCE_H_SGML_DEFAULT;
1075				}
1076			}
1077			break;
1078		case SCE_H_SGML_ENTITY:
1079			if (ch == ';') {
1080				styler.ColourTo(i, StateToPrint);
1081				state = SCE_H_SGML_DEFAULT;
1082			} else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1083				styler.ColourTo(i, SCE_H_SGML_ERROR);
1084				state = SCE_H_SGML_DEFAULT;
1085			}
1086			break;
1087		case SCE_H_ENTITY:
1088			if (ch == ';') {
1089				styler.ColourTo(i, StateToPrint);
1090				state = SCE_H_DEFAULT;
1091			}
1092			if (ch != '#' && !(isascii(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1093				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1094				styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1095				state = SCE_H_DEFAULT;
1096			}
1097			break;
1098		case SCE_H_TAGUNKNOWN:
1099			if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1100				int eClass = classifyTagHTML(styler.GetStartSegment(),
1101					i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1102				if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1103					if (!tagClosing) {
1104						inScriptType = eNonHtmlScript;
1105						scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1106					} else {
1107						scriptLanguage = eScriptNone;
1108					}
1109					eClass = SCE_H_TAG;
1110				}
1111				if (ch == '>') {
1112					styler.ColourTo(i, eClass);
1113					if (inScriptType == eNonHtmlScript) {
1114						state = StateForScript(scriptLanguage);
1115					} else {
1116						state = SCE_H_DEFAULT;
1117					}
1118					tagOpened = false;
1119					if (!tagDontFold) {
1120						if (tagClosing) {
1121							levelCurrent--;
1122						} else {
1123							levelCurrent++;
1124						}
1125					}
1126					tagClosing = false;
1127				} else if (ch == '/' && chNext == '>') {
1128					if (eClass == SCE_H_TAGUNKNOWN) {
1129						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1130					} else {
1131						styler.ColourTo(i - 1, StateToPrint);
1132						styler.ColourTo(i + 1, SCE_H_TAGEND);
1133					}
1134					i++;
1135					ch = chNext;
1136					state = SCE_H_DEFAULT;
1137					tagOpened = false;
1138				} else {
1139					if (eClass != SCE_H_TAGUNKNOWN) {
1140						if (eClass == SCE_H_SGML_DEFAULT) {
1141							state = SCE_H_SGML_DEFAULT;
1142						} else {
1143							state = SCE_H_OTHER;
1144						}
1145					}
1146				}
1147			}
1148			break;
1149		case SCE_H_ATTRIBUTE:
1150			if (!setAttributeContinue.Contains(ch)) {
1151				if (inScriptType == eNonHtmlScript) {
1152					int scriptLanguagePrev = scriptLanguage;
1153					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1154					scriptLanguage = clientScript;
1155					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1156						inScriptType = eHtml;
1157				}
1158				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1159				if (ch == '>') {
1160					styler.ColourTo(i, SCE_H_TAG);
1161					if (inScriptType == eNonHtmlScript) {
1162						state = StateForScript(scriptLanguage);
1163					} else {
1164						state = SCE_H_DEFAULT;
1165					}
1166					tagOpened = false;
1167					if (!tagDontFold) {
1168						if (tagClosing) {
1169							levelCurrent--;
1170						} else {
1171							levelCurrent++;
1172						}
1173					}
1174					tagClosing = false;
1175				} else if (ch == '=') {
1176					styler.ColourTo(i, SCE_H_OTHER);
1177					state = SCE_H_VALUE;
1178				} else {
1179					state = SCE_H_OTHER;
1180				}
1181			}
1182			break;
1183		case SCE_H_OTHER:
1184			if (ch == '>') {
1185				styler.ColourTo(i - 1, StateToPrint);
1186				styler.ColourTo(i, SCE_H_TAG);
1187				if (inScriptType == eNonHtmlScript) {
1188					state = StateForScript(scriptLanguage);
1189				} else {
1190					state = SCE_H_DEFAULT;
1191				}
1192				tagOpened = false;
1193				if (!tagDontFold) {
1194					if (tagClosing) {
1195						levelCurrent--;
1196					} else {
1197						levelCurrent++;
1198					}
1199				}
1200				tagClosing = false;
1201			} else if (ch == '\"') {
1202				styler.ColourTo(i - 1, StateToPrint);
1203				state = SCE_H_DOUBLESTRING;
1204			} else if (ch == '\'') {
1205				styler.ColourTo(i - 1, StateToPrint);
1206				state = SCE_H_SINGLESTRING;
1207			} else if (ch == '=') {
1208				styler.ColourTo(i, StateToPrint);
1209				state = SCE_H_VALUE;
1210			} else if (ch == '/' && chNext == '>') {
1211				styler.ColourTo(i - 1, StateToPrint);
1212				styler.ColourTo(i + 1, SCE_H_TAGEND);
1213				i++;
1214				ch = chNext;
1215				state = SCE_H_DEFAULT;
1216				tagOpened = false;
1217			} else if (ch == '?' && chNext == '>') {
1218				styler.ColourTo(i - 1, StateToPrint);
1219				styler.ColourTo(i + 1, SCE_H_XMLEND);
1220				i++;
1221				ch = chNext;
1222				state = SCE_H_DEFAULT;
1223			} else if (setHTMLWord.Contains(ch)) {
1224				styler.ColourTo(i - 1, StateToPrint);
1225				state = SCE_H_ATTRIBUTE;
1226			}
1227			break;
1228		case SCE_H_DOUBLESTRING:
1229			if (ch == '\"') {
1230				if (inScriptType == eNonHtmlScript) {
1231					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1232				}
1233				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1234				state = SCE_H_OTHER;
1235			}
1236			break;
1237		case SCE_H_SINGLESTRING:
1238			if (ch == '\'') {
1239				if (inScriptType == eNonHtmlScript) {
1240					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1241				}
1242				styler.ColourTo(i, SCE_H_SINGLESTRING);
1243				state = SCE_H_OTHER;
1244			}
1245			break;
1246		case SCE_H_VALUE:
1247			if (!setHTMLWord.Contains(ch)) {
1248				if (ch == '\"' && chPrev == '=') {
1249					// Should really test for being first character
1250					state = SCE_H_DOUBLESTRING;
1251				} else if (ch == '\'' && chPrev == '=') {
1252					state = SCE_H_SINGLESTRING;
1253				} else {
1254					if (IsNumber(styler.GetStartSegment(), styler)) {
1255						styler.ColourTo(i - 1, SCE_H_NUMBER);
1256					} else {
1257						styler.ColourTo(i - 1, StateToPrint);
1258					}
1259					if (ch == '>') {
1260						styler.ColourTo(i, SCE_H_TAG);
1261						if (inScriptType == eNonHtmlScript) {
1262							state = StateForScript(scriptLanguage);
1263						} else {
1264							state = SCE_H_DEFAULT;
1265						}
1266						tagOpened = false;
1267						if (!tagDontFold) {
1268							if (tagClosing) {
1269								levelCurrent--;
1270							} else {
1271								levelCurrent++;
1272							}
1273						}
1274						tagClosing = false;
1275					} else {
1276						state = SCE_H_OTHER;
1277					}
1278				}
1279			}
1280			break;
1281		case SCE_HJ_DEFAULT:
1282		case SCE_HJ_START:
1283		case SCE_HJ_SYMBOLS:
1284			if (IsAWordStart(ch)) {
1285				styler.ColourTo(i - 1, StateToPrint);
1286				state = SCE_HJ_WORD;
1287			} else if (ch == '/' && chNext == '*') {
1288				styler.ColourTo(i - 1, StateToPrint);
1289				if (chNext2 == '*')
1290					state = SCE_HJ_COMMENTDOC;
1291				else
1292					state = SCE_HJ_COMMENT;
1293			} else if (ch == '/' && chNext == '/') {
1294				styler.ColourTo(i - 1, StateToPrint);
1295				state = SCE_HJ_COMMENTLINE;
1296			} else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1297				styler.ColourTo(i - 1, StateToPrint);
1298				state = SCE_HJ_REGEX;
1299			} else if (ch == '\"') {
1300				styler.ColourTo(i - 1, StateToPrint);
1301				state = SCE_HJ_DOUBLESTRING;
1302			} else if (ch == '\'') {
1303				styler.ColourTo(i - 1, StateToPrint);
1304				state = SCE_HJ_SINGLESTRING;
1305			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1306			           styler.SafeGetCharAt(i + 3) == '-') {
1307				styler.ColourTo(i - 1, StateToPrint);
1308				state = SCE_HJ_COMMENTLINE;
1309			} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1310				styler.ColourTo(i - 1, StateToPrint);
1311				state = SCE_HJ_COMMENTLINE;
1312				i += 2;
1313			} else if (IsOperator(ch)) {
1314				styler.ColourTo(i - 1, StateToPrint);
1315				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1316				state = SCE_HJ_DEFAULT;
1317			} else if ((ch == ' ') || (ch == '\t')) {
1318				if (state == SCE_HJ_START) {
1319					styler.ColourTo(i - 1, StateToPrint);
1320					state = SCE_HJ_DEFAULT;
1321				}
1322			}
1323			break;
1324		case SCE_HJ_WORD:
1325			if (!IsAWordChar(ch)) {
1326				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1327				//styler.ColourTo(i - 1, eHTJSKeyword);
1328				state = SCE_HJ_DEFAULT;
1329				if (ch == '/' && chNext == '*') {
1330					if (chNext2 == '*')
1331						state = SCE_HJ_COMMENTDOC;
1332					else
1333						state = SCE_HJ_COMMENT;
1334				} else if (ch == '/' && chNext == '/') {
1335					state = SCE_HJ_COMMENTLINE;
1336				} else if (ch == '\"') {
1337					state = SCE_HJ_DOUBLESTRING;
1338				} else if (ch == '\'') {
1339					state = SCE_HJ_SINGLESTRING;
1340				} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1341					styler.ColourTo(i - 1, StateToPrint);
1342					state = SCE_HJ_COMMENTLINE;
1343					i += 2;
1344				} else if (IsOperator(ch)) {
1345					styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1346					state = SCE_HJ_DEFAULT;
1347				}
1348			}
1349			break;
1350		case SCE_HJ_COMMENT:
1351		case SCE_HJ_COMMENTDOC:
1352			if (ch == '/' && chPrev == '*') {
1353				styler.ColourTo(i, StateToPrint);
1354				state = SCE_HJ_DEFAULT;
1355				ch = ' ';
1356			}
1357			break;
1358		case SCE_HJ_COMMENTLINE:
1359			if (ch == '\r' || ch == '\n') {
1360				styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1361				state = SCE_HJ_DEFAULT;
1362				ch = ' ';
1363			}
1364			break;
1365		case SCE_HJ_DOUBLESTRING:
1366			if (ch == '\\') {
1367				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1368					i++;
1369				}
1370			} else if (ch == '\"') {
1371				styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1372				state = SCE_HJ_DEFAULT;
1373			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1374				styler.ColourTo(i - 1, StateToPrint);
1375				state = SCE_HJ_COMMENTLINE;
1376				i += 2;
1377			} else if (isLineEnd(ch)) {
1378				styler.ColourTo(i - 1, StateToPrint);
1379				state = SCE_HJ_STRINGEOL;
1380			}
1381			break;
1382		case SCE_HJ_SINGLESTRING:
1383			if (ch == '\\') {
1384				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1385					i++;
1386				}
1387			} else if (ch == '\'') {
1388				styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1389				state = SCE_HJ_DEFAULT;
1390			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1391				styler.ColourTo(i - 1, StateToPrint);
1392				state = SCE_HJ_COMMENTLINE;
1393				i += 2;
1394			} else if (isLineEnd(ch)) {
1395				styler.ColourTo(i - 1, StateToPrint);
1396				state = SCE_HJ_STRINGEOL;
1397			}
1398			break;
1399		case SCE_HJ_STRINGEOL:
1400			if (!isLineEnd(ch)) {
1401				styler.ColourTo(i - 1, StateToPrint);
1402				state = SCE_HJ_DEFAULT;
1403			} else if (!isLineEnd(chNext)) {
1404				styler.ColourTo(i, StateToPrint);
1405				state = SCE_HJ_DEFAULT;
1406			}
1407			break;
1408		case SCE_HJ_REGEX:
1409			if (ch == '\r' || ch == '\n' || ch == '/') {
1410				if (ch == '/') {
1411					while (isascii(chNext) && islower(chNext)) {   // gobble regex flags
1412						i++;
1413						ch = chNext;
1414						chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1415					}
1416				}
1417				styler.ColourTo(i, StateToPrint);
1418				state = SCE_HJ_DEFAULT;
1419			} else if (ch == '\\') {
1420				// Gobble up the quoted character
1421				if (chNext == '\\' || chNext == '/') {
1422					i++;
1423					ch = chNext;
1424					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1425				}
1426			}
1427			break;
1428		case SCE_HB_DEFAULT:
1429		case SCE_HB_START:
1430			if (IsAWordStart(ch)) {
1431				styler.ColourTo(i - 1, StateToPrint);
1432				state = SCE_HB_WORD;
1433			} else if (ch == '\'') {
1434				styler.ColourTo(i - 1, StateToPrint);
1435				state = SCE_HB_COMMENTLINE;
1436			} else if (ch == '\"') {
1437				styler.ColourTo(i - 1, StateToPrint);
1438				state = SCE_HB_STRING;
1439			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1440			           styler.SafeGetCharAt(i + 3) == '-') {
1441				styler.ColourTo(i - 1, StateToPrint);
1442				state = SCE_HB_COMMENTLINE;
1443			} else if (IsOperator(ch)) {
1444				styler.ColourTo(i - 1, StateToPrint);
1445				styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1446				state = SCE_HB_DEFAULT;
1447			} else if ((ch == ' ') || (ch == '\t')) {
1448				if (state == SCE_HB_START) {
1449					styler.ColourTo(i - 1, StateToPrint);
1450					state = SCE_HB_DEFAULT;
1451				}
1452			}
1453			break;
1454		case SCE_HB_WORD:
1455			if (!IsAWordChar(ch)) {
1456				state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1457				if (state == SCE_HB_DEFAULT) {
1458					if (ch == '\"') {
1459						state = SCE_HB_STRING;
1460					} else if (ch == '\'') {
1461						state = SCE_HB_COMMENTLINE;
1462					} else if (IsOperator(ch)) {
1463						styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1464						state = SCE_HB_DEFAULT;
1465					}
1466				}
1467			}
1468			break;
1469		case SCE_HB_STRING:
1470			if (ch == '\"') {
1471				styler.ColourTo(i, StateToPrint);
1472				state = SCE_HB_DEFAULT;
1473			} else if (ch == '\r' || ch == '\n') {
1474				styler.ColourTo(i - 1, StateToPrint);
1475				state = SCE_HB_STRINGEOL;
1476			}
1477			break;
1478		case SCE_HB_COMMENTLINE:
1479			if (ch == '\r' || ch == '\n') {
1480				styler.ColourTo(i - 1, StateToPrint);
1481				state = SCE_HB_DEFAULT;
1482			}
1483			break;
1484		case SCE_HB_STRINGEOL:
1485			if (!isLineEnd(ch)) {
1486				styler.ColourTo(i - 1, StateToPrint);
1487				state = SCE_HB_DEFAULT;
1488			} else if (!isLineEnd(chNext)) {
1489				styler.ColourTo(i, StateToPrint);
1490				state = SCE_HB_DEFAULT;
1491			}
1492			break;
1493		case SCE_HP_DEFAULT:
1494		case SCE_HP_START:
1495			if (IsAWordStart(ch)) {
1496				styler.ColourTo(i - 1, StateToPrint);
1497				state = SCE_HP_WORD;
1498			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1499			           styler.SafeGetCharAt(i + 3) == '-') {
1500				styler.ColourTo(i - 1, StateToPrint);
1501				state = SCE_HP_COMMENTLINE;
1502			} else if (ch == '#') {
1503				styler.ColourTo(i - 1, StateToPrint);
1504				state = SCE_HP_COMMENTLINE;
1505			} else if (ch == '\"') {
1506				styler.ColourTo(i - 1, StateToPrint);
1507				if (chNext == '\"' && chNext2 == '\"') {
1508					i += 2;
1509					state = SCE_HP_TRIPLEDOUBLE;
1510					ch = ' ';
1511					chPrev = ' ';
1512					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1513				} else {
1514					//					state = statePrintForState(SCE_HP_STRING,inScriptType);
1515					state = SCE_HP_STRING;
1516				}
1517			} else if (ch == '\'') {
1518				styler.ColourTo(i - 1, StateToPrint);
1519				if (chNext == '\'' && chNext2 == '\'') {
1520					i += 2;
1521					state = SCE_HP_TRIPLE;
1522					ch = ' ';
1523					chPrev = ' ';
1524					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1525				} else {
1526					state = SCE_HP_CHARACTER;
1527				}
1528			} else if (IsOperator(ch)) {
1529				styler.ColourTo(i - 1, StateToPrint);
1530				styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1531			} else if ((ch == ' ') || (ch == '\t')) {
1532				if (state == SCE_HP_START) {
1533					styler.ColourTo(i - 1, StateToPrint);
1534					state = SCE_HP_DEFAULT;
1535				}
1536			}
1537			break;
1538		case SCE_HP_WORD:
1539			if (!IsAWordChar(ch)) {
1540				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1541				state = SCE_HP_DEFAULT;
1542				if (ch == '#') {
1543					state = SCE_HP_COMMENTLINE;
1544				} else if (ch == '\"') {
1545					if (chNext == '\"' && chNext2 == '\"') {
1546						i += 2;
1547						state = SCE_HP_TRIPLEDOUBLE;
1548						ch = ' ';
1549						chPrev = ' ';
1550						chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1551					} else {
1552						state = SCE_HP_STRING;
1553					}
1554				} else if (ch == '\'') {
1555					if (chNext == '\'' && chNext2 == '\'') {
1556						i += 2;
1557						state = SCE_HP_TRIPLE;
1558						ch = ' ';
1559						chPrev = ' ';
1560						chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1561					} else {
1562						state = SCE_HP_CHARACTER;
1563					}
1564				} else if (IsOperator(ch)) {
1565					styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1566				}
1567			}
1568			break;
1569		case SCE_HP_COMMENTLINE:
1570			if (ch == '\r' || ch == '\n') {
1571				styler.ColourTo(i - 1, StateToPrint);
1572				state = SCE_HP_DEFAULT;
1573			}
1574			break;
1575		case SCE_HP_STRING:
1576			if (ch == '\\') {
1577				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1578					i++;
1579					ch = chNext;
1580					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1581				}
1582			} else if (ch == '\"') {
1583				styler.ColourTo(i, StateToPrint);
1584				state = SCE_HP_DEFAULT;
1585			}
1586			break;
1587		case SCE_HP_CHARACTER:
1588			if (ch == '\\') {
1589				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1590					i++;
1591					ch = chNext;
1592					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1593				}
1594			} else if (ch == '\'') {
1595				styler.ColourTo(i, StateToPrint);
1596				state = SCE_HP_DEFAULT;
1597			}
1598			break;
1599		case SCE_HP_TRIPLE:
1600			if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1601				styler.ColourTo(i, StateToPrint);
1602				state = SCE_HP_DEFAULT;
1603			}
1604			break;
1605		case SCE_HP_TRIPLEDOUBLE:
1606			if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1607				styler.ColourTo(i, StateToPrint);
1608				state = SCE_HP_DEFAULT;
1609	

Large files files are truncated, but you can click here to view the full file