PageRenderTime 23ms CodeModel.GetById 33ms app.highlight 190ms RepoModel.GetById 1ms app.codeStats 1ms

/MOULOpenSourceClientPlugin/StaticSDKs/Win32/Scintilla/src/LexHTML.cxx

https://bitbucket.org/cwalther/cwe-ou-nobink
C++ | 2042 lines | 1862 code | 98 blank | 82 comment | 1346 complexity | e146bcce334abffb5cc8487f5eb485b8 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Scintilla source code edit control
   2/** @file LexHTML.cxx
   3 ** Lexer for HTML.
   4 **/
   5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   6// The License.txt file describes the conditions under which this software may be distributed.
   7
   8#include <stdlib.h>

   9#include <string.h>

  10#include <ctype.h>

  11#include <stdio.h>

  12#include <stdarg.h>

  13
  14#include "Platform.h"

  15
  16#include "PropSet.h"

  17#include "Accessor.h"

  18#include "StyleContext.h"

  19#include "KeyWords.h"

  20#include "Scintilla.h"

  21#include "SciLexer.h"

  22
  23#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)

  24#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)

  25#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)

  26
  27enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock };
  28enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  29
  30static inline bool IsAWordChar(const int ch) {
  31	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  32}
  33
  34static inline bool IsAWordStart(const int ch) {
  35	return (ch < 0x80) && (isalnum(ch) || ch == '_');
  36}
  37
  38static inline int MakeLowerCase(int ch) {
  39	if (ch < 'A' || ch > 'Z')
  40		return ch;
  41	else
  42		return ch - 'A' + 'a';
  43}
  44
  45static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  46	size_t i = 0;
  47	for (; (i < end - start + 1) && (i < len-1); i++) {
  48		s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  49	}
  50	s[i] = '\0';
  51}
  52
  53static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  54	char s[100];
  55	GetTextSegment(styler, start, end, s, sizeof(s));
  56	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  57	if (strstr(s, "src"))	// External script
  58		return eScriptNone;
  59	if (strstr(s, "vbs"))
  60		return eScriptVBS;
  61	if (strstr(s, "pyth"))
  62		return eScriptPython;
  63	if (strstr(s, "javas"))
  64		return eScriptJS;
  65	if (strstr(s, "jscr"))
  66		return eScriptJS;
  67	if (strstr(s, "php"))
  68		return eScriptPHP;
  69	if (strstr(s, "xml"))
  70		return eScriptXML;
  71
  72	return prevValue;
  73}
  74
  75static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
  76	int iResult = 0;
  77	char s[100];
  78	GetTextSegment(styler, start, end, s, sizeof(s));
  79	if (0 == strncmp(s, "php", 3)) {
  80		iResult = 3;
  81	}
  82
  83	return iResult;
  84}
  85
  86static script_type ScriptOfState(int state) {
  87	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  88		return eScriptPython;
  89	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  90		return eScriptVBS;
  91	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
  92		return eScriptJS;
  93	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
  94		return eScriptPHP;
  95	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
  96		return eScriptSGML;
  97	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
  98		return eScriptSGMLblock;
  99	} else {
 100		return eScriptNone;
 101	}
 102}
 103
 104static int statePrintForState(int state, script_mode inScriptType) {
 105	int StateToPrint;
 106
 107	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 108		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 109	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 110		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 111	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 112		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 113	} else {
 114		StateToPrint = state;
 115	}
 116
 117	return StateToPrint;
 118}
 119
 120static int stateForPrintState(int StateToPrint) {
 121	int state;
 122
 123	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
 124		state = StateToPrint - SCE_HA_PYTHON;
 125	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
 126		state = StateToPrint - SCE_HA_VBS;
 127	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
 128		state = StateToPrint - SCE_HA_JS;
 129	} else {
 130		state = StateToPrint;
 131	}
 132
 133	return state;
 134}
 135
 136static inline bool IsNumber(unsigned int start, Accessor &styler) {
 137	return IsADigit(styler[start]) || (styler[start] == '.') ||
 138	       (styler[start] == '-') || (styler[start] == '#');
 139}
 140
 141static inline bool isStringState(int state) {
 142	bool bResult;
 143
 144	switch (state) {
 145	case SCE_HJ_DOUBLESTRING:
 146	case SCE_HJ_SINGLESTRING:
 147	case SCE_HJA_DOUBLESTRING:
 148	case SCE_HJA_SINGLESTRING:
 149	case SCE_HB_STRING:
 150	case SCE_HBA_STRING:
 151	case SCE_HP_STRING:
 152	case SCE_HP_CHARACTER:
 153	case SCE_HP_TRIPLE:
 154	case SCE_HP_TRIPLEDOUBLE:
 155	case SCE_HPA_STRING:
 156	case SCE_HPA_CHARACTER:
 157	case SCE_HPA_TRIPLE:
 158	case SCE_HPA_TRIPLEDOUBLE:
 159	case SCE_HPHP_HSTRING:
 160	case SCE_HPHP_SIMPLESTRING:
 161	case SCE_HPHP_HSTRING_VARIABLE:
 162	case SCE_HPHP_COMPLEX_VARIABLE:
 163		bResult = true;
 164		break;
 165	default :
 166		bResult = false;
 167		break;
 168	}
 169	return bResult;
 170}
 171
 172static inline bool stateAllowsTermination(int state) {
 173	bool allowTermination = !isStringState(state);
 174	if (allowTermination) {
 175		switch (state) {
 176		case SCE_HPHP_COMMENT:
 177		case SCE_HP_COMMENTLINE:
 178		case SCE_HPA_COMMENTLINE:
 179			allowTermination = false;
 180		}
 181	}
 182	return allowTermination;
 183}
 184
 185// not really well done, since it's only comments that should lex the %> and <%
 186static inline bool isCommentASPState(int state) {
 187	bool bResult;
 188
 189	switch (state) {
 190	case SCE_HJ_COMMENT:
 191	case SCE_HJ_COMMENTLINE:
 192	case SCE_HJ_COMMENTDOC:
 193	case SCE_HB_COMMENTLINE:
 194	case SCE_HP_COMMENTLINE:
 195	case SCE_HPHP_COMMENT:
 196	case SCE_HPHP_COMMENTLINE:
 197		bResult = true;
 198		break;
 199	default :
 200		bResult = false;
 201		break;
 202	}
 203	return bResult;
 204}
 205
 206static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 207	bool wordIsNumber = IsNumber(start, styler);
 208	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
 209	if (wordIsNumber) {
 210		chAttr = SCE_H_NUMBER;
 211	} else {
 212		char s[100];
 213		GetTextSegment(styler, start, end, s, sizeof(s));
 214		if (keywords.InList(s))
 215			chAttr = SCE_H_ATTRIBUTE;
 216	}
 217	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
 218		// No keywords -> all are known
 219		chAttr = SCE_H_ATTRIBUTE;
 220	styler.ColourTo(end, chAttr);
 221}
 222
 223static int classifyTagHTML(unsigned int start, unsigned int end,
 224                           WordList &keywords, Accessor &styler, bool &tagDontFold,
 225			   bool caseSensitive) {
 226	char s[30 + 2];
 227	// Copy after the '<'
 228	unsigned int i = 0;
 229	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
 230		char ch = styler[cPos];
 231		if ((ch != '<') && (ch != '/')) {
 232			s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
 233		}
 234	}
 235
 236	//The following is only a quick hack, to see if this whole thing would work
 237	//we first need the tagname with a trailing space...
 238	s[i] = ' ';
 239	s[i+1] = '\0';
 240
 241	//...to find it in the list of no-container-tags
 242	// (There are many more. We will need a keywordlist in the property file for this)
 243	tagDontFold = (NULL != strstr("meta link img area br hr input ",s));
 244
 245	//now we can remove the trailing space
 246	s[i] = '\0';
 247
 248	bool isScript = false;
 249	char chAttr = SCE_H_TAGUNKNOWN;
 250	if (s[0] == '!') {
 251		chAttr = SCE_H_SGML_DEFAULT;
 252	} else if (s[0] == '/') {	// Closing tag
 253		if (keywords.InList(s + 1))
 254			chAttr = SCE_H_TAG;
 255	} else {
 256		if (keywords.InList(s)) {
 257			chAttr = SCE_H_TAG;
 258			isScript = 0 == strcmp(s, "script");
 259		}
 260	}
 261	if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords) {
 262		// No keywords -> all are known
 263		chAttr = SCE_H_TAG;
 264		isScript = 0 == strcmp(s, "script");
 265	}
 266	styler.ColourTo(end, chAttr);
 267	return isScript ? SCE_H_SCRIPT : chAttr;
 268}
 269
 270static void classifyWordHTJS(unsigned int start, unsigned int end,
 271                             WordList &keywords, Accessor &styler, script_mode inScriptType) {
 272	char chAttr = SCE_HJ_WORD;
 273	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 274	if (wordIsNumber)
 275		chAttr = SCE_HJ_NUMBER;
 276	else {
 277		char s[30 + 1];
 278		unsigned int i = 0;
 279		for (; i < end - start + 1 && i < 30; i++) {
 280			s[i] = styler[start + i];
 281		}
 282		s[i] = '\0';
 283		if (keywords.InList(s))
 284			chAttr = SCE_HJ_KEYWORD;
 285	}
 286	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 287}
 288
 289static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
 290	char chAttr = SCE_HB_IDENTIFIER;
 291	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 292	if (wordIsNumber)
 293		chAttr = SCE_HB_NUMBER;
 294	else {
 295		char s[100];
 296		GetTextSegment(styler, start, end, s, sizeof(s));
 297		if (keywords.InList(s)) {
 298			chAttr = SCE_HB_WORD;
 299			if (strcmp(s, "rem") == 0)
 300				chAttr = SCE_HB_COMMENTLINE;
 301		}
 302	}
 303	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 304	if (chAttr == SCE_HB_COMMENTLINE)
 305		return SCE_HB_COMMENTLINE;
 306	else
 307		return SCE_HB_DEFAULT;
 308}
 309
 310static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
 311	bool wordIsNumber = IsADigit(styler[start]);
 312	char s[30 + 1];
 313	unsigned int i = 0;
 314	for (; i < end - start + 1 && i < 30; i++) {
 315		s[i] = styler[start + i];
 316	}
 317	s[i] = '\0';
 318	char chAttr = SCE_HP_IDENTIFIER;
 319	if (0 == strcmp(prevWord, "class"))
 320		chAttr = SCE_HP_CLASSNAME;
 321	else if (0 == strcmp(prevWord, "def"))
 322		chAttr = SCE_HP_DEFNAME;
 323	else if (wordIsNumber)
 324		chAttr = SCE_HP_NUMBER;
 325	else if (keywords.InList(s))
 326		chAttr = SCE_HP_WORD;
 327	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 328	strcpy(prevWord, s);
 329}
 330
 331// Update the word colour to default or keyword
 332// Called when in a PHP word
 333static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 334	char chAttr = SCE_HPHP_DEFAULT;
 335	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
 336	if (wordIsNumber)
 337		chAttr = SCE_HPHP_NUMBER;
 338	else {
 339		char s[100];
 340		GetTextSegment(styler, start, end, s, sizeof(s));
 341		if (keywords.InList(s))
 342			chAttr = SCE_HPHP_WORD;
 343	}
 344	styler.ColourTo(end, chAttr);
 345}
 346
 347static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 348	char s[30 + 1];
 349	unsigned int i = 0;
 350	for (; i < end - start + 1 && i < 30; i++) {
 351		s[i] = styler[start + i];
 352	}
 353	s[i] = '\0';
 354	return keywords.InList(s);
 355}
 356
 357static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
 358	char s[30 + 1];
 359	unsigned int i = 0;
 360	for (; i < end - start + 1 && i < 30; i++) {
 361		s[i] = styler[start + i];
 362	}
 363	s[i] = '\0';
 364	return (0 == strcmp(s, "[CDATA["));
 365}
 366
 367// Return the first state to reach when entering a scripting language
 368static int StateForScript(script_type scriptLanguage) {
 369	int Result;
 370	switch (scriptLanguage) {
 371	case eScriptVBS:
 372		Result = SCE_HB_START;
 373		break;
 374	case eScriptPython:
 375		Result = SCE_HP_START;
 376		break;
 377	case eScriptPHP:
 378		Result = SCE_HPHP_DEFAULT;
 379		break;
 380	case eScriptXML:
 381		Result = SCE_H_TAGUNKNOWN;
 382		break;
 383	case eScriptSGML:
 384		Result = SCE_H_SGML_DEFAULT;
 385		break;
 386	default :
 387		Result = SCE_HJ_START;
 388		break;
 389	}
 390	return Result;
 391}
 392
 393static inline bool ishtmlwordchar(char ch) {
 394	return !isascii(ch) ||
 395		(isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
 396}
 397
 398static inline bool issgmlwordchar(char ch) {
 399	return !isascii(ch) ||
 400		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
 401}
 402
 403static inline bool IsPhpWordStart(const unsigned char ch) {
 404	return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
 405}
 406
 407static inline bool IsPhpWordChar(char ch) {
 408	return IsADigit(ch) || IsPhpWordStart(ch);
 409}
 410
 411static bool InTagState(int state) {
 412	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
 413	       state == SCE_H_SCRIPT ||
 414	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
 415	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
 416	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
 417}
 418
 419static bool IsCommentState(const int state) {
 420	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
 421}
 422
 423static bool IsScriptCommentState(const int state) {
 424	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
 425		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
 426}
 427
 428static bool isLineEnd(char ch) {
 429	return ch == '\r' || ch == '\n';
 430}
 431
 432static bool isOKBeforeRE(char ch) {
 433	return (ch == '(') || (ch == '=') || (ch == ',');
 434}
 435
 436static bool isPHPStringState(int state) {
 437	return
 438	    (state == SCE_HPHP_HSTRING) ||
 439	    (state == SCE_HPHP_SIMPLESTRING) ||
 440	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
 441	    (state == SCE_HPHP_COMPLEX_VARIABLE);
 442}
 443
 444static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler) {
 445	int j;
 446	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
 447		i++;
 448	phpStringDelimiter[0] = '\n';
 449	for (j = i; j < lengthDoc && styler[j] != '\n' && styler[j] != '\r'; j++) {
 450		if (j - i < phpStringDelimiterSize - 2)
 451			phpStringDelimiter[j-i+1] = styler[j];
 452		else
 453			i++;
 454	}
 455	phpStringDelimiter[j-i+1] = '\0';
 456	return j;
 457}
 458
 459static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 460                                  Accessor &styler) {
 461	WordList &keywords = *keywordlists[0];
 462	WordList &keywords2 = *keywordlists[1];
 463	WordList &keywords3 = *keywordlists[2];
 464	WordList &keywords4 = *keywordlists[3];
 465	WordList &keywords5 = *keywordlists[4];
 466	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
 467
 468	// Lexer for HTML requires more lexical states (7 bits worth) than most lexers
 469	styler.StartAt(startPos, STYLE_MAX);
 470	char prevWord[200];
 471	prevWord[0] = '\0';
 472	char phpStringDelimiter[200]; // PHP is not limited in length, we are
 473	phpStringDelimiter[0] = '\0';
 474	int StateToPrint = initStyle;
 475	int state = stateForPrintState(StateToPrint);
 476
 477	// If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
 478	if (InTagState(state)) {
 479		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
 480			startPos--;
 481			length++;
 482		}
 483		state = SCE_H_DEFAULT;
 484	}
 485	// String can be heredoc, must find a delimiter first
 486	while (startPos > 0 && isPHPStringState(state) && state != SCE_HPHP_SIMPLESTRING) {
 487		startPos--;
 488		length++;
 489		state = styler.StyleAt(startPos);
 490	}
 491	styler.StartAt(startPos, STYLE_MAX);
 492
 493	int lineCurrent = styler.GetLine(startPos);
 494	int lineState;
 495	if (lineCurrent > 0) {
 496		lineState = styler.GetLineState(lineCurrent);
 497	} else {
 498		// Default client and ASP scripting language is JavaScript
 499		lineState = eScriptJS << 8;
 500		lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
 501	}
 502	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
 503	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
 504	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
 505	bool tagDontFold = false; //some HTML tags should not be folded
 506	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
 507	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
 508	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
 509
 510	script_type scriptLanguage = ScriptOfState(state);
 511
 512	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
 513	const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
 514	const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
 515	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 516	const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
 517
 518	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
 519	int levelCurrent = levelPrev;
 520	int visibleChars = 0;
 521
 522	char chPrev = ' ';
 523	char ch = ' ';
 524	char chPrevNonWhite = ' ';
 525	// look back to set chPrevNonWhite properly for better regex colouring
 526	if (scriptLanguage == eScriptJS && startPos > 0) {
 527		int back = startPos;
 528		int style = 0;
 529		while (--back) {
 530			style = styler.StyleAt(back);
 531			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
 532				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
 533				break;
 534		}
 535		if (style == SCE_HJ_SYMBOLS) {
 536			chPrevNonWhite = styler.SafeGetCharAt(back);
 537		}
 538	}
 539
 540	styler.StartSegment(startPos);
 541	const int lengthDoc = startPos + length;
 542	for (int i = startPos; i < lengthDoc; i++) {
 543		const char chPrev2 = chPrev;
 544		chPrev = ch;
 545		if (!isspacechar(ch) && state != SCE_HJ_COMMENT &&
 546			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
 547			chPrevNonWhite = ch;
 548		ch = styler[i];
 549		char chNext = styler.SafeGetCharAt(i + 1);
 550		const char chNext2 = styler.SafeGetCharAt(i + 2);
 551
 552		// Handle DBCS codepages
 553		if (styler.IsLeadByte(ch)) {
 554			chPrev = ' ';
 555			i += 1;
 556			continue;
 557		}
 558
 559		if ((!isspacechar(ch) || !foldCompact) && fold)
 560			visibleChars++;
 561
 562		// decide what is the current state to print (depending of the script tag)
 563		StateToPrint = statePrintForState(state, inScriptType);
 564
 565		// handle script folding
 566		if (fold) {
 567			switch (scriptLanguage) {
 568			case eScriptJS:
 569			case eScriptPHP:
 570				//not currently supported				case eScriptVBS:
 571
 572				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
 573				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
 574				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
 575					if ((ch == '{') || (ch == '}')) {
 576						levelCurrent += (ch == '{') ? 1 : -1;
 577					}
 578				}
 579				break;
 580			case eScriptPython:
 581				if (state != SCE_HP_COMMENTLINE) {
 582					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
 583						levelCurrent++;
 584					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
 585						// check if the number of tabs is lower than the level
 586						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
 587						for (int j = 0; Findlevel > 0; j++) {
 588							char chTmp = styler.SafeGetCharAt(i + j + 1);
 589							if (chTmp == '\t') {
 590								Findlevel -= 8;
 591							} else if (chTmp == ' ') {
 592								Findlevel--;
 593							} else {
 594								break;
 595							}
 596						}
 597
 598						if (Findlevel > 0) {
 599							levelCurrent -= Findlevel / 8;
 600							if (Findlevel % 8)
 601								levelCurrent--;
 602						}
 603					}
 604				}
 605				break;
 606			default:
 607				break;
 608			}
 609		}
 610
 611		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 612			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 613			// Avoid triggering two times on Dos/Win
 614			// New line -> record any line state onto /next/ line
 615			if (fold) {
 616				int lev = levelPrev;
 617				if (visibleChars == 0)
 618					lev |= SC_FOLDLEVELWHITEFLAG;
 619				if ((levelCurrent > levelPrev) && (visibleChars > 0))
 620					lev |= SC_FOLDLEVELHEADERFLAG;
 621
 622				styler.SetLevel(lineCurrent, lev);
 623				visibleChars = 0;
 624				levelPrev = levelCurrent;
 625			}
 626			lineCurrent++;
 627			styler.SetLineState(lineCurrent,
 628			                    ((inScriptType & 0x03) << 0) |
 629			                    ((tagOpened & 0x01) << 2) |
 630			                    ((tagClosing & 0x01) << 3) |
 631			                    ((aspScript & 0x0F) << 4) |
 632			                    ((clientScript & 0x0F) << 8) |
 633			                    ((beforePreProc & 0xFF) << 12));
 634		}
 635
 636		// generic end of script processing
 637		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
 638			// Check if it's the end of the script tag (or any other HTML tag)
 639			switch (state) {
 640				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
 641			case SCE_H_DOUBLESTRING:
 642			case SCE_H_SINGLESTRING:
 643			case SCE_HJ_COMMENT:
 644			case SCE_HJ_COMMENTDOC:
 645			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
 646			// the end of script marker from some JS interpreters.
 647			case SCE_HJ_DOUBLESTRING:
 648			case SCE_HJ_SINGLESTRING:
 649			case SCE_HJ_REGEX:
 650			case SCE_HB_STRING:
 651			case SCE_HP_STRING:
 652			case SCE_HP_TRIPLE:
 653			case SCE_HP_TRIPLEDOUBLE:
 654				break;
 655			default :
 656				// check if the closing tag is a script tag
 657				if (state == SCE_HJ_COMMENTLINE) {
 658					char tag[7]; // room for the <script> tag
 659					char chr;	// current char
 660					int j=0;
 661					chr = styler.SafeGetCharAt(i+2);
 662					while (j < 6 && !isspacechar(chr)) {
 663						tag[j++] = static_cast<char>(MakeLowerCase(chr));
 664						chr = styler.SafeGetCharAt(i+2+j);
 665					}
 666					tag[j] = '\0';
 667					if (strcmp(tag, "script") != 0) break;
 668				}
 669				// closing tag of the script (it's a closing HTML tag anyway)
 670				styler.ColourTo(i - 1, StateToPrint);
 671				state = SCE_H_TAGUNKNOWN;
 672				inScriptType = eHtml;
 673				scriptLanguage = eScriptNone;
 674				clientScript = eScriptJS;
 675				i += 2;
 676				visibleChars += 2;
 677				tagClosing = true;
 678				continue;
 679			}
 680		}
 681
 682		/////////////////////////////////////
 683		// handle the start of PHP pre-processor = Non-HTML
 684		else if ((state != SCE_H_ASPAT) &&
 685		         !isPHPStringState(state) &&
 686		         (state != SCE_HPHP_COMMENT) &&
 687		         (ch == '<') &&
 688		         (chNext == '?') &&
 689				 !IsScriptCommentState(state) ) {
 690			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 10, eScriptPHP);
 691			if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
 692			styler.ColourTo(i - 1, StateToPrint);
 693			beforePreProc = state;
 694			i++;
 695			visibleChars++;
 696			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 10);
 697			if (scriptLanguage == eScriptXML)
 698				styler.ColourTo(i, SCE_H_XMLSTART);
 699			else
 700				styler.ColourTo(i, SCE_H_QUESTION);
 701			state = StateForScript(scriptLanguage);
 702			if (inScriptType == eNonHtmlScript)
 703				inScriptType = eNonHtmlScriptPreProc;
 704			else
 705				inScriptType = eNonHtmlPreProc;
 706			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
 707			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 708				levelCurrent++;
 709			}
 710			// should be better
 711			ch = styler.SafeGetCharAt(i);
 712			continue;
 713		}
 714
 715		// handle the start of ASP pre-processor = Non-HTML
 716		else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
 717			styler.ColourTo(i - 1, StateToPrint);
 718			beforePreProc = state;
 719			if (inScriptType == eNonHtmlScript)
 720				inScriptType = eNonHtmlScriptPreProc;
 721			else
 722				inScriptType = eNonHtmlPreProc;
 723
 724			if (chNext2 == '@') {
 725				i += 2; // place as if it was the second next char treated
 726				visibleChars += 2;
 727				state = SCE_H_ASPAT;
 728			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
 729				styler.ColourTo(i + 3, SCE_H_ASP);
 730				state = SCE_H_XCCOMMENT;
 731				scriptLanguage = eScriptVBS;
 732				continue;
 733			} else {
 734				if (chNext2 == '=') {
 735					i += 2; // place as if it was the second next char treated
 736					visibleChars += 2;
 737				} else {
 738					i++; // place as if it was the next char treated
 739					visibleChars++;
 740				}
 741
 742				state = StateForScript(aspScript);
 743			}
 744			scriptLanguage = eScriptVBS;
 745			styler.ColourTo(i, SCE_H_ASP);
 746			// fold whole script
 747			if (foldHTMLPreprocessor)
 748				levelCurrent++;
 749			// should be better
 750			ch = styler.SafeGetCharAt(i);
 751			continue;
 752		}
 753
 754		/////////////////////////////////////
 755		// handle the start of SGML language (DTD)
 756		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
 757				 (chPrev == '<') &&
 758				 (ch == '!') &&
 759				 (StateToPrint != SCE_H_CDATA) &&
 760				 (!IsCommentState(StateToPrint)) &&
 761				 (!IsScriptCommentState(StateToPrint)) ) {
 762			beforePreProc = state;
 763			styler.ColourTo(i - 2, StateToPrint);
 764			if ((chNext == '-') && (chNext2 == '-')) {
 765				state = SCE_H_COMMENT; // wait for a pending command
 766				styler.ColourTo(i + 2, SCE_H_COMMENT);
 767				i += 2; // follow styling after the --
 768			} else if (isWordCdata(i + 1, i + 7, styler)) {
 769				state = SCE_H_CDATA;
 770			} else {
 771				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
 772				scriptLanguage = eScriptSGML;
 773				state = SCE_H_SGML_COMMAND; // wait for a pending command
 774			}
 775			// fold whole tag (-- when closing the tag)
 776			if (foldHTMLPreprocessor)
 777				levelCurrent++;
 778			continue;
 779		}
 780
 781		// handle the end of a pre-processor = Non-HTML
 782		else if ((
 783		             ((inScriptType == eNonHtmlPreProc)
 784		              || (inScriptType == eNonHtmlScriptPreProc)) && (
 785		                 ((scriptLanguage != eScriptNone) && stateAllowsTermination(state) && ((ch == '%') || (ch == '?')))
 786		             ) && (chNext == '>')) ||
 787		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
 788			if (state == SCE_H_ASPAT) {
 789				aspScript = segIsScriptingIndicator(styler,
 790				                                    styler.GetStartSegment(), i - 1, aspScript);
 791			}
 792			// Bounce out of any ASP mode
 793			switch (state) {
 794			case SCE_HJ_WORD:
 795				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
 796				break;
 797			case SCE_HB_WORD:
 798				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
 799				break;
 800			case SCE_HP_WORD:
 801				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
 802				break;
 803			case SCE_HPHP_WORD:
 804				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
 805				break;
 806			case SCE_H_XCCOMMENT:
 807				styler.ColourTo(i - 1, state);
 808				break;
 809			default :
 810				styler.ColourTo(i - 1, StateToPrint);
 811				break;
 812			}
 813			if (scriptLanguage != eScriptSGML) {
 814				i++;
 815				visibleChars++;
 816			}
 817			if (ch == '%')
 818				styler.ColourTo(i, SCE_H_ASP);
 819			else if (scriptLanguage == eScriptXML)
 820				styler.ColourTo(i, SCE_H_XMLEND);
 821			else if (scriptLanguage == eScriptSGML)
 822				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 823			else
 824				styler.ColourTo(i, SCE_H_QUESTION);
 825			state = beforePreProc;
 826			if (inScriptType == eNonHtmlScriptPreProc)
 827				inScriptType = eNonHtmlScript;
 828			else
 829				inScriptType = eHtml;
 830			// Unfold all scripting languages, except for XML tag
 831			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 832				levelCurrent--;
 833			}
 834			scriptLanguage = eScriptNone;
 835			continue;
 836		}
 837		/////////////////////////////////////
 838
 839		switch (state) {
 840		case SCE_H_DEFAULT:
 841			if (ch == '<') {
 842				// in HTML, fold on tag open and unfold on tag close
 843				tagOpened = true;
 844				tagClosing = (chNext == '/');
 845				styler.ColourTo(i - 1, StateToPrint);
 846				if (chNext != '!')
 847					state = SCE_H_TAGUNKNOWN;
 848			} else if (ch == '&') {
 849				styler.ColourTo(i - 1, SCE_H_DEFAULT);
 850				state = SCE_H_ENTITY;
 851			}
 852			break;
 853		case SCE_H_SGML_DEFAULT:
 854		case SCE_H_SGML_BLOCK_DEFAULT:
 855//			if (scriptLanguage == eScriptSGMLblock)
 856//				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
 857
 858			if (ch == '\"') {
 859				styler.ColourTo(i - 1, StateToPrint);
 860				state = SCE_H_SGML_DOUBLESTRING;
 861			} else if (ch == '\'') {
 862				styler.ColourTo(i - 1, StateToPrint);
 863				state = SCE_H_SGML_SIMPLESTRING;
 864			} else if ((ch == '-') && (chPrev == '-')) {
 865				styler.ColourTo(i - 2, StateToPrint);
 866				state = SCE_H_SGML_COMMENT;
 867			} else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
 868				styler.ColourTo(i - 2, StateToPrint);
 869				state = SCE_H_SGML_ENTITY;
 870			} else if (ch == '#') {
 871				styler.ColourTo(i - 1, StateToPrint);
 872				state = SCE_H_SGML_SPECIAL;
 873			} else if (ch == '[') {
 874				styler.ColourTo(i - 1, StateToPrint);
 875				scriptLanguage = eScriptSGMLblock;
 876				state = SCE_H_SGML_BLOCK_DEFAULT;
 877			} else if (ch == ']') {
 878				if (scriptLanguage == eScriptSGMLblock) {
 879					styler.ColourTo(i, StateToPrint);
 880					scriptLanguage = eScriptSGML;
 881				} else {
 882					styler.ColourTo(i - 1, StateToPrint);
 883					styler.ColourTo(i, SCE_H_SGML_ERROR);
 884				}
 885				state = SCE_H_SGML_DEFAULT;
 886			} else if (scriptLanguage == eScriptSGMLblock) {
 887				if ((ch == '!') && (chPrev == '<')) {
 888					styler.ColourTo(i - 2, StateToPrint);
 889					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 890					state = SCE_H_SGML_COMMAND;
 891				} else if (ch == '>') {
 892					styler.ColourTo(i - 1, StateToPrint);
 893					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 894				}
 895			}
 896			break;
 897		case SCE_H_SGML_COMMAND:
 898			if ((ch == '-') && (chPrev == '-')) {
 899				styler.ColourTo(i - 2, StateToPrint);
 900				state = SCE_H_SGML_COMMENT;
 901			} else if (!issgmlwordchar(ch)) {
 902				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
 903					styler.ColourTo(i - 1, StateToPrint);
 904					state = SCE_H_SGML_1ST_PARAM;
 905				} else {
 906					state = SCE_H_SGML_ERROR;
 907				}
 908			}
 909			break;
 910		case SCE_H_SGML_1ST_PARAM:
 911			// wait for the beginning of the word
 912			if ((ch == '-') && (chPrev == '-')) {
 913				if (scriptLanguage == eScriptSGMLblock) {
 914					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
 915				} else {
 916					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
 917				}
 918				state = SCE_H_SGML_1ST_PARAM_COMMENT;
 919			} else if (issgmlwordchar(ch)) {
 920				if (scriptLanguage == eScriptSGMLblock) {
 921					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
 922				} else {
 923					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
 924				}
 925				// find the length of the word
 926				int size = 1;
 927				while (ishtmlwordchar(styler.SafeGetCharAt(i + size)))
 928					size++;
 929				styler.ColourTo(i + size - 1, StateToPrint);
 930				i += size - 1;
 931				visibleChars += size - 1;
 932				ch = styler.SafeGetCharAt(i);
 933				if (scriptLanguage == eScriptSGMLblock) {
 934					state = SCE_H_SGML_BLOCK_DEFAULT;
 935				} else {
 936					state = SCE_H_SGML_DEFAULT;
 937				}
 938				continue;
 939			}
 940			break;
 941		case SCE_H_SGML_ERROR:
 942			if ((ch == '-') && (chPrev == '-')) {
 943				styler.ColourTo(i - 2, StateToPrint);
 944				state = SCE_H_SGML_COMMENT;
 945			}
 946		case SCE_H_SGML_DOUBLESTRING:
 947			if (ch == '\"') {
 948				styler.ColourTo(i, StateToPrint);
 949				state = SCE_H_SGML_DEFAULT;
 950			}
 951			break;
 952		case SCE_H_SGML_SIMPLESTRING:
 953			if (ch == '\'') {
 954				styler.ColourTo(i, StateToPrint);
 955				state = SCE_H_SGML_DEFAULT;
 956			}
 957			break;
 958		case SCE_H_SGML_COMMENT:
 959			if ((ch == '-') && (chPrev == '-')) {
 960				styler.ColourTo(i, StateToPrint);
 961				state = SCE_H_SGML_DEFAULT;
 962			}
 963			break;
 964		case SCE_H_CDATA:
 965			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
 966				styler.ColourTo(i, StateToPrint);
 967				state = SCE_H_DEFAULT;
 968				levelCurrent--;
 969			}
 970			break;
 971		case SCE_H_COMMENT:
 972			if ((chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
 973				styler.ColourTo(i, StateToPrint);
 974				state = SCE_H_DEFAULT;
 975				levelCurrent--;
 976			}
 977			break;
 978		case SCE_H_SGML_1ST_PARAM_COMMENT:
 979			if ((ch == '-') && (chPrev == '-')) {
 980				styler.ColourTo(i, SCE_H_SGML_COMMENT);
 981				state = SCE_H_SGML_1ST_PARAM;
 982			}
 983			break;
 984		case SCE_H_SGML_SPECIAL:
 985			if (!(isascii(ch) && isupper(ch))) {
 986				styler.ColourTo(i - 1, StateToPrint);
 987				if (isalnum(ch)) {
 988					state = SCE_H_SGML_ERROR;
 989				} else {
 990					state = SCE_H_SGML_DEFAULT;
 991				}
 992			}
 993			break;
 994		case SCE_H_SGML_ENTITY:
 995			if (ch == ';') {
 996				styler.ColourTo(i, StateToPrint);
 997				state = SCE_H_SGML_DEFAULT;
 998			} else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
 999				styler.ColourTo(i, SCE_H_SGML_ERROR);
1000				state = SCE_H_SGML_DEFAULT;
1001			}
1002			break;
1003		case SCE_H_ENTITY:
1004			if (ch == ';') {
1005				styler.ColourTo(i, StateToPrint);
1006				state = SCE_H_DEFAULT;
1007			}
1008			if (ch != '#' && !(isascii(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1009				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1010				styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1011				state = SCE_H_DEFAULT;
1012			}
1013			break;
1014		case SCE_H_TAGUNKNOWN:
1015			if (!ishtmlwordchar(ch) && !((ch == '/') && (chPrev == '<')) && ch != '[') {
1016				int eClass = classifyTagHTML(styler.GetStartSegment(),
1017					i - 1, keywords, styler, tagDontFold, caseSensitive);
1018				if (eClass == SCE_H_SCRIPT) {
1019					if (!tagClosing) {
1020						inScriptType = eNonHtmlScript;
1021						scriptLanguage = clientScript;
1022						eClass = SCE_H_TAG;
1023					} else {
1024						scriptLanguage = eScriptNone;
1025						eClass = SCE_H_TAG;
1026					}
1027				}
1028				if (ch == '>') {
1029					styler.ColourTo(i, eClass);
1030					if (inScriptType == eNonHtmlScript) {
1031						state = StateForScript(scriptLanguage);
1032					} else {
1033						state = SCE_H_DEFAULT;
1034					}
1035					tagOpened = false;
1036					if (!tagDontFold){
1037						if (tagClosing) {
1038							levelCurrent--;
1039						} else {
1040							levelCurrent++;
1041						}
1042					}
1043					tagClosing = false;
1044				} else if (ch == '/' && chNext == '>') {
1045					if (eClass == SCE_H_TAGUNKNOWN) {
1046						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1047					} else {
1048						styler.ColourTo(i - 1, StateToPrint);
1049						styler.ColourTo(i + 1, SCE_H_TAGEND);
1050					}
1051					i++;
1052					ch = chNext;
1053					state = SCE_H_DEFAULT;
1054					tagOpened = false;
1055				} else {
1056					if (eClass != SCE_H_TAGUNKNOWN) {
1057						if (eClass == SCE_H_SGML_DEFAULT) {
1058							state = SCE_H_SGML_DEFAULT;
1059						} else {
1060							state = SCE_H_OTHER;
1061						}
1062					}
1063				}
1064			}
1065			break;
1066		case SCE_H_ATTRIBUTE:
1067			if (!ishtmlwordchar(ch) && ch != '/' && ch != '-') {
1068				if (inScriptType == eNonHtmlScript) {
1069					int scriptLanguagePrev = scriptLanguage;
1070					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1071					scriptLanguage = clientScript;
1072					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1073						inScriptType = eHtml;
1074				}
1075				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1076				if (ch == '>') {
1077					styler.ColourTo(i, SCE_H_TAG);
1078					if (inScriptType == eNonHtmlScript) {
1079						state = StateForScript(scriptLanguage);
1080					} else {
1081						state = SCE_H_DEFAULT;
1082					}
1083					tagOpened = false;
1084					if (!tagDontFold){
1085						if (tagClosing){
1086							levelCurrent--;
1087						} else {
1088							levelCurrent++;
1089						}
1090					}
1091					tagClosing = false;
1092				} else if (ch == '=') {
1093					styler.ColourTo(i, SCE_H_OTHER);
1094					state = SCE_H_VALUE;
1095				} else {
1096					state = SCE_H_OTHER;
1097				}
1098			}
1099			break;
1100		case SCE_H_OTHER:
1101			if (ch == '>') {
1102				styler.ColourTo(i - 1, StateToPrint);
1103				styler.ColourTo(i, SCE_H_TAG);
1104				if (inScriptType == eNonHtmlScript) {
1105					state = StateForScript(scriptLanguage);
1106				} else {
1107					state = SCE_H_DEFAULT;
1108				}
1109				tagOpened = false;
1110				if (!tagDontFold){
1111					if (tagClosing){
1112						levelCurrent--;
1113					} else {
1114						levelCurrent++;
1115					}
1116				}
1117				tagClosing = false;
1118			} else if (ch == '\"') {
1119				styler.ColourTo(i - 1, StateToPrint);
1120				state = SCE_H_DOUBLESTRING;
1121			} else if (ch == '\'') {
1122				styler.ColourTo(i - 1, StateToPrint);
1123				state = SCE_H_SINGLESTRING;
1124			} else if (ch == '=') {
1125				styler.ColourTo(i, StateToPrint);
1126				state = SCE_H_VALUE;
1127			} else if (ch == '/' && chNext == '>') {
1128				styler.ColourTo(i - 1, StateToPrint);
1129				styler.ColourTo(i + 1, SCE_H_TAGEND);
1130				i++;
1131				ch = chNext;
1132				state = SCE_H_DEFAULT;
1133				tagOpened = false;
1134			} else if (ch == '?' && chNext == '>') {
1135				styler.ColourTo(i - 1, StateToPrint);
1136				styler.ColourTo(i + 1, SCE_H_XMLEND);
1137				i++;
1138				ch = chNext;
1139				state = SCE_H_DEFAULT;
1140			} else if (ishtmlwordchar(ch)) {
1141				styler.ColourTo(i - 1, StateToPrint);
1142				state = SCE_H_ATTRIBUTE;
1143			}
1144			break;
1145		case SCE_H_DOUBLESTRING:
1146			if (ch == '\"') {
1147				if (inScriptType == eNonHtmlScript) {
1148					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1149				}
1150				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1151				state = SCE_H_OTHER;
1152			}
1153			break;
1154		case SCE_H_SINGLESTRING:
1155			if (ch == '\'') {
1156				if (inScriptType == eNonHtmlScript) {
1157					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1158				}
1159				styler.ColourTo(i, SCE_H_SINGLESTRING);
1160				state = SCE_H_OTHER;
1161			}
1162			break;
1163		case SCE_H_VALUE:
1164			if (!ishtmlwordchar(ch)) {
1165				if (ch == '\"' && chPrev == '=') {
1166					// Should really test for being first character
1167					state = SCE_H_DOUBLESTRING;
1168				} else if (ch == '\'' && chPrev == '=') {
1169					state = SCE_H_SINGLESTRING;
1170				} else {
1171					if (IsNumber(styler.GetStartSegment(), styler)) {
1172						styler.ColourTo(i - 1, SCE_H_NUMBER);
1173					} else {
1174						styler.ColourTo(i - 1, StateToPrint);
1175					}
1176					if (ch == '>') {
1177						styler.ColourTo(i, SCE_H_TAG);
1178						if (inScriptType == eNonHtmlScript) {
1179							state = StateForScript(scriptLanguage);
1180						} else {
1181							state = SCE_H_DEFAULT;
1182						}
1183						tagOpened = false;
1184						if (!tagDontFold){
1185							if (tagClosing){
1186								levelCurrent--;
1187							} else {
1188								levelCurrent++;
1189							}
1190						}
1191						tagClosing = false;
1192					} else {
1193						state = SCE_H_OTHER;
1194					}
1195				}
1196			}
1197			break;
1198		case SCE_HJ_DEFAULT:
1199		case SCE_HJ_START:
1200		case SCE_HJ_SYMBOLS:
1201			if (iswordstart(ch)) {
1202				styler.ColourTo(i - 1, StateToPrint);
1203				state = SCE_HJ_WORD;
1204			} else if (ch == '/' && chNext == '*') {
1205				styler.ColourTo(i - 1, StateToPrint);
1206				if (chNext2 == '*')
1207					state = SCE_HJ_COMMENTDOC;
1208				else
1209					state = SCE_HJ_COMMENT;
1210			} else if (ch == '/' && chNext == '/') {
1211				styler.ColourTo(i - 1, StateToPrint);
1212				state = SCE_HJ_COMMENTLINE;
1213			} else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1214				styler.ColourTo(i - 1, StateToPrint);
1215				state = SCE_HJ_REGEX;
1216			} else if (ch == '\"') {
1217				styler.ColourTo(i - 1, StateToPrint);
1218				state = SCE_HJ_DOUBLESTRING;
1219			} else if (ch == '\'') {
1220				styler.ColourTo(i - 1, StateToPrint);
1221				state = SCE_HJ_SINGLESTRING;
1222			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1223			           styler.SafeGetCharAt(i + 3) == '-') {
1224				styler.ColourTo(i - 1, StateToPrint);
1225				state = SCE_HJ_COMMENTLINE;
1226			} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1227				styler.ColourTo(i - 1, StateToPrint);
1228				state = SCE_HJ_COMMENTLINE;
1229				i += 2;
1230			} else if (isoperator(ch)) {
1231				styler.ColourTo(i - 1, StateToPrint);
1232				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1233				state = SCE_HJ_DEFAULT;
1234			} else if ((ch == ' ') || (ch == '\t')) {
1235				if (state == SCE_HJ_START) {
1236					styler.ColourTo(i - 1, StateToPrint);
1237					state = SCE_HJ_DEFAULT;
1238				}
1239			}
1240			break;
1241		case SCE_HJ_WORD:
1242			if (!iswordchar(ch)) {
1243				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1244				//styler.ColourTo(i - 1, eHTJSKeyword);
1245				state = SCE_HJ_DEFAULT;
1246				if (ch == '/' && chNext == '*') {
1247					if (chNext2 == '*')
1248						state = SCE_HJ_COMMENTDOC;
1249					else
1250						state = SCE_HJ_COMMENT;
1251				} else if (ch == '/' && chNext == '/') {
1252					state = SCE_HJ_COMMENTLINE;
1253				} else if (ch == '\"') {
1254					state = SCE_HJ_DOUBLESTRING;
1255				} else if (ch == '\'') {
1256					state = SCE_HJ_SINGLESTRING;
1257				} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1258					styler.ColourTo(i - 1, StateToPrint);
1259					state = SCE_HJ_COMMENTLINE;
1260					i += 2;
1261				} else if (isoperator(ch)) {
1262					styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1263					state = SCE_HJ_DEFAULT;
1264				}
1265			}
1266			break;
1267		case SCE_HJ_COMMENT:
1268		case SCE_HJ_COMMENTDOC:
1269			if (ch == '/' && chPrev == '*') {
1270				styler.ColourTo(i, StateToPrint);
1271				state = SCE_HJ_DEFAULT;
1272				ch = ' ';
1273			}
1274			break;
1275		case SCE_HJ_COMMENTLINE:
1276			if (ch == '\r' || ch == '\n') {
1277				styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1278				state = SCE_HJ_DEFAULT;
1279				ch = ' ';
1280			}
1281			break;
1282		case SCE_HJ_DOUBLESTRING:
1283			if (ch == '\\') {
1284				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1285					i++;
1286				}
1287			} else if (ch == '\"') {
1288				styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1289				state = SCE_HJ_DEFAULT;
1290			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1291				styler.ColourTo(i - 1, StateToPrint);
1292				state = SCE_HJ_COMMENTLINE;
1293				i += 2;
1294			} else if (isLineEnd(ch)) {
1295				styler.ColourTo(i - 1, StateToPrint);
1296				state = SCE_HJ_STRINGEOL;
1297			}
1298			break;
1299		case SCE_HJ_SINGLESTRING:
1300			if (ch == '\\') {
1301				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1302					i++;
1303				}
1304			} else if (ch == '\'') {
1305				styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1306				state = SCE_HJ_DEFAULT;
1307			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1308				styler.ColourTo(i - 1, StateToPrint);
1309				state = SCE_HJ_COMMENTLINE;
1310				i += 2;
1311			} else if (isLineEnd(ch)) {
1312				styler.ColourTo(i - 1, StateToPrint);
1313				state = SCE_HJ_STRINGEOL;
1314			}
1315			break;
1316		case SCE_HJ_STRINGEOL:
1317			if (!isLineEnd(ch)) {
1318				styler.ColourTo(i - 1, StateToPrint);
1319				state = SCE_HJ_DEFAULT;
1320			} else if (!isLineEnd(chNext)) {
1321				styler.ColourTo(i, StateToPrint);
1322				state = SCE_HJ_DEFAULT;
1323			}
1324			break;
1325		case SCE_HJ_REGEX:
1326			if (ch == '\r' || ch == '\n' || ch == '/') {
1327				if (ch == '/') {
1328					while (isascii(chNext) && islower(chNext)) {   // gobble regex flags
1329						i++;
1330						ch = chNext;
1331						chNext = styler.SafeGetCharAt(i + 1);
1332					}
1333				}
1334				styler.ColourTo(i, StateToPrint);
1335				state = SCE_HJ_DEFAULT;
1336			} else if (ch == '\\') {
1337				// Gobble up the quoted character
1338				if (chNext == '\\' || chNext == '/') {
1339					i++;
1340					ch = chNext;
1341					chNext = styler.SafeGetCharAt(i + 1);
1342				}
1343			}
1344			break;
1345		case SCE_HB_DEFAULT:
1346		case SCE_HB_START:
1347			if (iswordstart(ch)) {
1348				styler.ColourTo(i - 1, StateToPrint);
1349				state = SCE_HB_WORD;
1350			} else if (ch == '\'') {
1351				styler.ColourTo(i - 1, StateToPrint);
1352				state = SCE_HB_COMMENTLINE;
1353			} else if (ch == '\"') {
1354				styler.ColourTo(i - 1, StateToPrint);
1355				state = SCE_HB_STRING;
1356			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1357			           styler.SafeGetCharAt(i + 3) == '-') {
1358				styler.ColourTo(i - 1, StateToPrint);
1359				state = SCE_HB_COMMENTLINE;
1360			} else if (isoperator(ch)) {
1361				styler.ColourTo(i - 1, StateToPrint);
1362				styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1363				state = SCE_HB_DEFAULT;
1364			} else if ((ch == ' ') || (ch == '\t')) {
1365				if (state == SCE_HB_START) {
1366					styler.ColourTo(i - 1, StateToPrint);
1367					state = SCE_HB_DEFAULT;
1368				}
1369			}
1370			break;
1371		case SCE_HB_WORD:
1372			if (!iswordchar(ch)) {
1373				state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1374				if (state == SCE_HB_DEFAULT) {
1375					if (ch == '\"') {
1376						state = SCE_HB_STRING;
1377					} else if (ch == '\'') {
1378						state = SCE_HB_COMMENTLINE;
1379					} else if (isoperator(ch)) {
1380						styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1381						state = SCE_HB_DEFAULT;
1382					}
1383				}
1384			}
1385			break;
1386		case SCE_HB_STRING:
1387			if (ch == '\"') {
1388				styler.ColourTo(i, StateToPrint);
1389				state = SCE_HB_DEFAULT;
1390			} else if (ch == '\r' || ch == '\n') {
1391				styler.ColourTo(i - 1, StateToPrint);
1392				state = SCE_HB_STRINGEOL;
1393			}
1394			break;
1395		case SCE_HB_COMMENTLINE:
1396			if (ch == '\r' || ch == '\n') {
1397				styler.ColourTo(i - 1, StateToPrint);
1398				state = SCE_HB_DEFAULT;
1399			}
1400			break;
1401		case SCE_HB_STRINGEOL:
1402			if (!isLineEnd(ch)) {
1403				styler.ColourTo(i - 1, StateToPrint);
1404				state = SCE_HB_DEFAULT;
1405			} else if (!isLineEnd(chNext)) {
1406				styler.ColourTo(i, StateToPrint);
1407				state = SCE_HB_DEFAULT;
1408			}
1409			break;
1410		case SCE_HP_DEFAULT:
1411		case SCE_HP_START:
1412			if (iswordstart(ch)) {
1413				styler.ColourTo(i - 1, StateToPrint);
1414				state = SCE_HP_WORD;
1415			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1416			           styler.SafeGetCharAt(i + 3) == '-') {
1417				styler.ColourTo(i - 1, StateToPrint);
1418				state = SCE_HP_COMMENTLINE;
1419			} else if (ch == '#') {
1420				styler.ColourTo(i - 1, StateToPrint);
1421				state = SCE_HP_COMMENTLINE;
1422			} else if (ch == '\"') {
1423				styler.ColourTo(i - 1, StateToPrint);
1424				if (chNext == '\"' && chNext2 == '\"') {
1425					i += 2;
1426					state = SCE_HP_TRIPLEDOUBLE;
1427					ch = ' ';
1428					chPrev = ' ';
1429					chNext = styler.SafeGetCharAt(i + 1);
1430				} else {
1431					//					state = statePrintForState(SCE_HP_STRING,inScriptType);
1432					state = SCE_HP_STRING;
1433				}
1434			} else if (ch == '\'') {
1435				styler.ColourTo(i - 1, StateToPrint);
1436				if (chNext == '\'' && chNext2 == '\'') {
1437					i += 2;
1438					state = SCE_HP_TRIPLE;
1439					ch = ' ';
1440					chPrev = ' ';
1441					chNext = styler.SafeGetCharAt(i + 1);
1442				} else {
1443					state = SCE_HP_CHARACTER;
1444				}
1445			} else if (isoperator(ch)) {
1446				styler.ColourTo(i - 1, StateToPrint);
1447				styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1448			} else if ((ch == ' ') || (ch == '\t')) {
1449				if (state == SCE_HP_START) {
1450					styler.ColourTo(i - 1, StateToPrint);
1451					state = SCE_HP_DEFAULT;
1452				}
1453			}
1454			break;
1455		case SCE_HP_WORD:
1456			if (!iswordchar(ch)) {
1457				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1458				state = SCE_HP_DEFAULT;
1459				if (ch == '#') {
1460					state = SCE_HP_COMMENTLINE;
1461				} else if (ch == '\"') {
1462					if (chNext == '\"' && chNext2 == '\"') {
1463						i += 2;
1464						state = SCE_HP_TRIPLEDOUBLE;
1465						ch = ' ';
1466						chPrev = ' ';
1467						chNext = styler.SafeGetCharAt(i + 1);
1468					} else {
1469						state = SCE_HP_STRING;
1470					}
1471				} else if (ch == '\'') {
1472					if (chNext == '\'' && chNext2 == '\'') {
1473						i += 2;
1474						state = SCE_HP_TRIPLE;
1475						ch = ' ';
1476						chPrev = ' ';
1477						chNext = styler.SafeGetCharAt(i + 1);
1478					} else {
1479						state = SCE_HP_CHARACTER;
1480					}
1481				} else if (isoperator(ch)) {
1482					styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1483				}
1484			}
1485			break;
1486		case SCE_HP_COMMENTLINE:
1487			if (ch == '\r' || ch == '\n') {
1488				styler.ColourTo(i - 1, StateToPrint);
1489				state = SCE_HP_DEFAULT;
1490			}
1491			break;
1492		case SCE_HP_STRING:
1493			if (ch == '\\') {
1494				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1495					i++;
1496					ch = chNext;
1497					chNext = styler.SafeGetCharAt(i + 1);
1498				}
1499			} else if (ch == '\"') {
1500				styler.ColourTo(i, StateToPrint);
1501				state = SCE_HP_DEFAULT;
1502			}
1503			break;
1504		case SCE_HP_CHARACTER:
1505			if (ch == '\\') {
1506				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1507					i++;
1508					ch = chNext;
1509					chNext = styler.SafeGetCharAt(i + 1);
1510				}
1511			} else if (ch == '\'') {
1512				styler.ColourTo(i, StateToPrint);
1513				state = SCE_HP_DEFAULT;
1514			}
1515			break;
1516		case SCE_HP_TRIPLE:
1517			if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1518				styler.ColourTo(i, StateToPrint);
1519				state = SCE_HP_DEFAULT;
1520			}
1521			break;
1522		case SCE_HP_TRIPLEDOUBLE:
1523			if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1524				styler.ColourTo(i, StateToPrint);
1525				state = SCE_HP_DEFAULT;
1526			}
1527			break;
1528			///////////// start - PHP state handling
1529		case SCE_HPHP_WORD:
1530			if (!iswordchar(ch)) {
1531				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1532				if (ch == '/' && chNext == '*') {
1533					i++;
1534					state = SCE_HPHP_COMMENT;
1535				} else if (ch == '/' && chNext == '/') {
1536					i++;
1537					state = SCE_HPHP_COMMENTLINE;
1538				} else if (ch == '#') {
1539					state = SCE_HPHP_COMMENTLINE;
1540				} else if (ch == '\"') {
1541					state = SCE_HPHP_HSTRING;
1542					strcpy(phpStringDelimiter, "\"");
1543				} else if (styler.Match(i, "<<<")) {
1544					state = SCE_HPHP_HSTRING;
1545					i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1546				} else if (ch == '\'') {
1547					state = SCE_HPHP_SIMPLESTRING;
1548				} else if (ch == '$' && IsPhpWordStart(chNext)) {
1549					state = SCE_HPHP_VARIABLE;
1550				} else if (isoperator(ch)) {
1551					state = SCE_HPHP_OPERATOR;
1552				} else {
1553					state = SCE_HPHP_DEFAULT;
1554				}
1555			}
1556			break;
1557		case SCE_HPHP_NUMBER:
1558			// recognize bases 8,10 or 16 integers OR floating-point numbers
1559			if (!IsADigit(ch)
1560				&& strchr(".xXabcdefABCDEF", ch) == NULL
1561				&& ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1562				styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1563				if (isoperator(ch))
1564					state = SCE_HPHP_OPERATOR;
1565				else
1566					state = SCE_HPHP_DEFAULT;
1567			}
1568			break;
1569		case SCE_HPHP_VARIABLE:
1570			if (!IsPhpWordChar(ch)) {
1571				styler.ColourTo(i - 1, SCE_HPHP_VARIABLE);
1572				if (isoperator(ch))
1573					state = SCE_HPHP_OPERATOR;
1574				else
1575					state = SCE_HPHP_DEFAULT;
1576			}
1577			break;
1578		case SCE_HPHP_COMMENT:
1579			if (ch == '/' && chPrev == '*') {
1580				styler.ColourTo(i, StateToPrint);
1581				state = SCE_HPHP_DEFAULT;
1582			}
1583			break;
1584		case SCE_HPHP_COMMENTLINE:
1585			if (ch == '\r' || ch == '\n') {
1586				styler.ColourTo(i - 1, StateToPrint);
1587				state = SCE_HPHP_DEFAULT;
1588			}
1589			break;
1590		case SCE_HPHP_HSTRING:
1591			if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext

Large files files are truncated, but you can click here to view the full file