PageRenderTime 1353ms CodeModel.GetById 161ms app.highlight 779ms RepoModel.GetById 121ms app.codeStats 1ms

/code/ryzom/client/src/lua_ide_dll_nevrax/source/scintilla/LexHTML.cxx

https://bitbucket.org/mattraykowski/ryzomcore_demoshard
C++ | 1885 lines | 1714 code | 86 blank | 85 comment | 1211 complexity | 4dae2370de3927b81e2bdaa6b6ad49d8 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Ryzom - MMORPG Framework <http://dev.ryzom.com/projects/ryzom/>
   2// Copyright (C) 2010  Winch Gate Property Limited
   3//
   4// This program is free software: you can redistribute it and/or modify
   5// it under the terms of the GNU Affero General Public License as
   6// published by the Free Software Foundation, either version 3 of the
   7// License, or (at your option) any later version.
   8//
   9// This program is distributed in the hope that it will be useful,
  10// but WITHOUT ANY WARRANTY; without even the implied warranty of
  11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12// GNU Affero General Public License for more details.
  13//
  14// You should have received a copy of the GNU Affero General Public License
  15// along with this program.  If not, see <http://www.gnu.org/licenses/>.
  16
  17// Scintilla source code edit control
  18/** @file LexHTML.cxx
  19 ** Lexer for HTML.
  20 **/
  21// Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
  22// The License.txt file describes the conditions under which this software may be distributed.
  23
  24#include <stdlib.h>

  25#include <string.h>

  26#include <ctype.h>

  27#include <stdio.h>

  28#include <stdarg.h>

  29
  30#include "Platform.h"

  31
  32#include "PropSet.h"

  33#include "Accessor.h"

  34#include "StyleContext.h"

  35#include "KeyWords.h"

  36#include "Scintilla.h"

  37#include "SciLexer.h"

  38
  39#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)

  40#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)

  41#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)

  42
  43enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock };
  44enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  45
  46static inline bool IsAWordChar(const int ch) {
  47	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  48}
  49
  50static inline bool IsAWordStart(const int ch) {
  51	return (ch < 0x80) && (isalnum(ch) || ch == '_');
  52}
  53
  54static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  55	char s[30 + 1];
  56	unsigned int i = 0;
  57	for (; i < end - start + 1 && i < 30; i++) {
  58		s[i] = static_cast<char>(tolower(styler[start + i]));
  59	}
  60	s[i] = '\0';
  61	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  62	if (strstr(s, "src"))	// External script
  63		return eScriptNone;
  64	if (strstr(s, "vbs"))
  65		return eScriptVBS;
  66	if (strstr(s, "pyth"))
  67		return eScriptPython;
  68	if (strstr(s, "javas"))
  69		return eScriptJS;
  70	if (strstr(s, "jscr"))
  71		return eScriptJS;
  72	if (strstr(s, "php"))
  73		return eScriptPHP;
  74	if (strstr(s, "xml"))
  75		return eScriptXML;
  76
  77	return prevValue;
  78}
  79
  80static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
  81	int iResult = 0;
  82	char s[30 + 1];
  83	unsigned int i = 0;
  84	for (; i < end - start + 1 && i < 30; i++) {
  85		s[i] = static_cast<char>(tolower(styler[start + i]));
  86	}
  87	s[i] = '\0';
  88	if (0 == strncmp(s, "php", 3)) {
  89		iResult = 3;
  90	}
  91
  92	return iResult;
  93}
  94
  95static script_type ScriptOfState(int state) {
  96	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
  97		return eScriptPython;
  98	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
  99		return eScriptVBS;
 100	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 101		return eScriptJS;
 102	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
 103		return eScriptPHP;
 104	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
 105		return eScriptSGML;
 106	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
 107		return eScriptSGMLblock;
 108	} else {
 109		return eScriptNone;
 110	}
 111}
 112
 113static int statePrintForState(int state, script_mode inScriptType) {
 114	int StateToPrint;
 115
 116	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 117		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 118	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 119		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 120	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 121		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 122	} else {
 123		StateToPrint = state;
 124	}
 125
 126	return StateToPrint;
 127}
 128
 129static int stateForPrintState(int StateToPrint) {
 130	int state;
 131
 132	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
 133		state = StateToPrint - SCE_HA_PYTHON;
 134	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
 135		state = StateToPrint - SCE_HA_VBS;
 136	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
 137		state = StateToPrint - SCE_HA_JS;
 138	} else {
 139		state = StateToPrint;
 140	}
 141
 142	return state;
 143}
 144
 145static inline bool IsNumber(unsigned int start, Accessor &styler) {
 146	return isdigit(styler[start]) || (styler[start] == '.') ||
 147	       (styler[start] == '-') || (styler[start] == '#');
 148}
 149
 150static inline bool isStringState(int state) {
 151	bool bResult;
 152
 153	switch (state) {
 154	case SCE_HJ_DOUBLESTRING:
 155	case SCE_HJ_SINGLESTRING:
 156	case SCE_HJA_DOUBLESTRING:
 157	case SCE_HJA_SINGLESTRING:
 158	case SCE_HB_STRING:
 159	case SCE_HBA_STRING:
 160	case SCE_HP_STRING:
 161	case SCE_HPA_STRING:
 162	case SCE_HPHP_HSTRING:
 163	case SCE_HPHP_SIMPLESTRING:
 164		bResult = true;
 165		break;
 166	default :
 167		bResult = false;
 168		break;
 169	}
 170	return bResult;
 171}
 172
 173// not really well done, since it's only comments that should lex the %> and <%
 174static inline bool isCommentASPState(int state) {
 175	bool bResult;
 176
 177	switch (state) {
 178	case SCE_HJ_COMMENT:
 179	case SCE_HJ_COMMENTLINE:
 180	case SCE_HJ_COMMENTDOC:
 181	case SCE_HB_COMMENTLINE:
 182	case SCE_HP_COMMENTLINE:
 183	case SCE_HPHP_COMMENT:
 184	case SCE_HPHP_COMMENTLINE:
 185		bResult = true;
 186		break;
 187	default :
 188		bResult = false;
 189		break;
 190	}
 191	return bResult;
 192}
 193
 194static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 195	bool wordIsNumber = IsNumber(start, styler);
 196	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
 197	if (wordIsNumber) {
 198		chAttr = SCE_H_NUMBER;
 199	} else {
 200		char s[30 + 1];
 201		unsigned int i = 0;
 202		for (; i < end - start + 1 && i < 30; i++) {
 203			s[i] = static_cast<char>(tolower(styler[start + i]));
 204		}
 205		s[i] = '\0';
 206		if (keywords.InList(s))
 207			chAttr = SCE_H_ATTRIBUTE;
 208	}
 209	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
 210		// No keywords -> all are known
 211		chAttr = SCE_H_ATTRIBUTE;
 212	styler.ColourTo(end, chAttr);
 213}
 214
 215static int classifyTagHTML(unsigned int start, unsigned int end,
 216                           WordList &keywords, Accessor &styler) {
 217	char s[30 + 1];
 218	// Copy after the '<'
 219	unsigned int i = 0;
 220	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
 221		char ch = styler[cPos];
 222		if ((ch != '<') && (ch != '/'))
 223			s[i++] = static_cast<char>(tolower(ch));
 224	}
 225	s[i] = '\0';
 226	bool isScript = false;
 227	char chAttr = SCE_H_TAGUNKNOWN;
 228	if (s[0] == '!') {
 229		chAttr = SCE_H_SGML_DEFAULT;
 230	} else if (s[0] == '/') {	// Closing tag
 231		if (keywords.InList(s + 1))
 232			chAttr = SCE_H_TAG;
 233	} else {
 234		if (keywords.InList(s)) {
 235			chAttr = SCE_H_TAG;
 236			isScript = 0 == strcmp(s, "script");
 237		}
 238	}
 239	if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords) {
 240		// No keywords -> all are known
 241		chAttr = SCE_H_TAG;
 242		isScript = 0 == strcmp(s, "script");
 243	}
 244	styler.ColourTo(end, chAttr);
 245	return isScript ? SCE_H_SCRIPT : chAttr;
 246}
 247
 248static void classifyWordHTJS(unsigned int start, unsigned int end,
 249                             WordList &keywords, Accessor &styler, script_mode inScriptType) {
 250	char chAttr = SCE_HJ_WORD;
 251	bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
 252	if (wordIsNumber)
 253		chAttr = SCE_HJ_NUMBER;
 254	else {
 255		char s[30 + 1];
 256		unsigned int i = 0;
 257		for (; i < end - start + 1 && i < 30; i++) {
 258			s[i] = styler[start + i];
 259		}
 260		s[i] = '\0';
 261		if (keywords.InList(s))
 262			chAttr = SCE_HJ_KEYWORD;
 263	}
 264	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 265}
 266
 267static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
 268	char chAttr = SCE_HB_IDENTIFIER;
 269	bool wordIsNumber = isdigit(styler[start]) || (styler[start] == '.');
 270	if (wordIsNumber)
 271		chAttr = SCE_HB_NUMBER;
 272	else {
 273		char s[30 + 1];
 274		unsigned int i = 0;
 275		for (; i < end - start + 1 && i < 30; i++) {
 276			s[i] = static_cast<char>(tolower(styler[start + i]));
 277		}
 278		s[i] = '\0';
 279		if (keywords.InList(s)) {
 280			chAttr = SCE_HB_WORD;
 281			if (strcmp(s, "rem") == 0)
 282				chAttr = SCE_HB_COMMENTLINE;
 283		}
 284	}
 285	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 286	if (chAttr == SCE_HB_COMMENTLINE)
 287		return SCE_HB_COMMENTLINE;
 288	else
 289		return SCE_HB_DEFAULT;
 290}
 291
 292static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
 293	bool wordIsNumber = isdigit(styler[start]) != 0;
 294	char s[30 + 1];
 295	unsigned int i = 0;
 296	for (; i < end - start + 1 && i < 30; i++) {
 297		s[i] = styler[start + i];
 298	}
 299	s[i] = '\0';
 300	char chAttr = SCE_HP_IDENTIFIER;
 301	if (0 == strcmp(prevWord, "class"))
 302		chAttr = SCE_HP_CLASSNAME;
 303	else if (0 == strcmp(prevWord, "def"))
 304		chAttr = SCE_HP_DEFNAME;
 305	else if (wordIsNumber)
 306		chAttr = SCE_HP_NUMBER;
 307	else if (keywords.InList(s))
 308		chAttr = SCE_HP_WORD;
 309	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 310	strcpy(prevWord, s);
 311}
 312
 313// Update the word colour to default or keyword
 314// Called when in a PHP word
 315static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 316	char chAttr = SCE_HPHP_DEFAULT;
 317	bool wordIsNumber = isdigit(styler[start]) != 0;
 318	if (wordIsNumber)
 319		chAttr = SCE_HPHP_NUMBER;
 320	else {
 321		char s[100 + 1];
 322		unsigned int i = 0;
 323		for (; i < end - start + 1 && i < 100; i++) {
 324			s[i] = static_cast<char>(tolower(styler[start + i]));
 325		}
 326		s[i] = '\0';
 327		if (keywords.InList(s))
 328			chAttr = SCE_HPHP_WORD;
 329	}
 330	styler.ColourTo(end, chAttr);
 331}
 332
 333static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 334	char s[30 + 1];
 335	unsigned int i = 0;
 336	for (; i < end - start + 1 && i < 30; i++) {
 337		s[i] = styler[start + i];
 338	}
 339	s[i] = '\0';
 340	return keywords.InList(s);
 341}
 342
 343static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
 344	char s[30 + 1];
 345	unsigned int i = 0;
 346	for (; i < end - start + 1 && i < 30; i++) {
 347		s[i] = styler[start + i];
 348	}
 349	s[i] = '\0';
 350	return (0 == strcmp(s, "[CDATA["));
 351}
 352
 353// Return the first state to reach when entering a scripting language
 354static int StateForScript(script_type scriptLanguage) {
 355	int Result;
 356	switch (scriptLanguage) {
 357	case eScriptVBS:
 358		Result = SCE_HB_START;
 359		break;
 360	case eScriptPython:
 361		Result = SCE_HP_START;
 362		break;
 363	case eScriptPHP:
 364		Result = SCE_HPHP_DEFAULT;
 365		break;
 366	case eScriptXML:
 367		Result = SCE_H_TAGUNKNOWN;
 368		break;
 369	case eScriptSGML:
 370		Result = SCE_H_SGML_DEFAULT;
 371		break;
 372	default :
 373		Result = SCE_HJ_START;
 374		break;
 375	}
 376	return Result;
 377}
 378
 379static inline bool ishtmlwordchar(char ch) {
 380	return isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#';
 381}
 382
 383static inline bool issgmlwordchar(char ch) {
 384	return isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[';
 385}
 386
 387static bool InTagState(int state) {
 388	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
 389	       state == SCE_H_SCRIPT ||
 390	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
 391	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
 392	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
 393}
 394
 395static bool IsCommentState(const int state) {
 396	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
 397}
 398
 399static bool isLineEnd(char ch) {
 400	return ch == '\r' || ch == '\n';
 401}
 402
 403static bool isOKBeforeRE(char ch) {
 404	return (ch == '(') || (ch == '=') || (ch == ',');
 405}
 406
 407static bool isPHPStringState(int state) {
 408	return
 409	    (state == SCE_HPHP_HSTRING) ||
 410	    (state == SCE_HPHP_SIMPLESTRING) ||
 411	    (state == SCE_HPHP_HSTRING_VARIABLE);
 412}
 413
 414static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 415                                  Accessor &styler) {
 416	WordList &keywords = *keywordlists[0];
 417	WordList &keywords2 = *keywordlists[1];
 418	WordList &keywords3 = *keywordlists[2];
 419	WordList &keywords4 = *keywordlists[3];
 420	WordList &keywords5 = *keywordlists[4];
 421	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
 422
 423	// Lexer for HTML requires more lexical states (7 bits worth) than most lexers
 424	styler.StartAt(startPos, STYLE_MAX);
 425	char prevWord[200];
 426	prevWord[0] = '\0';
 427	int StateToPrint = initStyle;
 428	int state = stateForPrintState(StateToPrint);
 429
 430	// If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
 431	if (InTagState(state)) {
 432		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
 433			startPos--;
 434			length++;
 435		}
 436		state = SCE_H_DEFAULT;
 437	}
 438	styler.StartAt(startPos, STYLE_MAX);
 439
 440	int lineCurrent = styler.GetLine(startPos);
 441	int lineState;
 442	if (lineCurrent > 0) {
 443		lineState = styler.GetLineState(lineCurrent);
 444	} else {
 445		// Default client and ASP scripting language is JavaScript
 446		lineState = eScriptJS << 8;
 447		lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
 448	}
 449	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
 450	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
 451	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
 452	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
 453	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
 454	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
 455
 456	script_type scriptLanguage = ScriptOfState(state);
 457
 458	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
 459	const bool fold = foldHTML && styler.GetPropertyInt("fold");
 460	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 461
 462	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
 463	int levelCurrent = levelPrev;
 464	int visibleChars = 0;
 465
 466	char chPrev = ' ';
 467	char ch = ' ';
 468	char chPrevNonWhite = ' ';
 469	styler.StartSegment(startPos);
 470	const int lengthDoc = startPos + length;
 471	for (int i = startPos; i < lengthDoc; i++) {
 472		const char chPrev2 = chPrev;
 473		chPrev = ch;
 474		if (ch != ' ' && ch != '\t')
 475			chPrevNonWhite = ch;
 476		ch = styler[i];
 477		char chNext = styler.SafeGetCharAt(i + 1);
 478		const char chNext2 = styler.SafeGetCharAt(i + 2);
 479
 480		// Handle DBCS codepages
 481		if (styler.IsLeadByte(ch)) {
 482			chPrev = ' ';
 483			i += 1;
 484			continue;
 485		}
 486
 487		if ((!isspacechar(ch) || !foldCompact) && fold)
 488			visibleChars++;
 489
 490		// decide what is the current state to print (depending of the script tag)
 491		StateToPrint = statePrintForState(state, inScriptType);
 492
 493		// handle script folding
 494		if (fold) {
 495			switch (scriptLanguage) {
 496			case eScriptJS:
 497			case eScriptPHP:
 498				//not currently supported				case eScriptVBS:
 499
 500				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC)) {
 501					if ((ch == '{') || (ch == '}')) {
 502						levelCurrent += (ch == '{') ? 1 : -1;
 503					}
 504				}
 505				break;
 506			case eScriptPython:
 507				if (state != SCE_HP_COMMENTLINE) {
 508					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
 509						levelCurrent++;
 510					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
 511						// check if the number of tabs is lower than the level
 512						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
 513						for (int j = 0; Findlevel > 0; j++) {
 514							char chTmp = styler.SafeGetCharAt(i + j + 1);
 515							if (chTmp == '\t') {
 516								Findlevel -= 8;
 517							} else if (chTmp == ' ') {
 518								Findlevel--;
 519							} else {
 520								break;
 521							}
 522						}
 523
 524						if (Findlevel > 0) {
 525							levelCurrent -= Findlevel / 8;
 526							if (Findlevel % 8)
 527								levelCurrent--;
 528						}
 529					}
 530				}
 531				break;
 532			default:
 533				break;
 534			}
 535		}
 536
 537		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 538			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 539			// Avoid triggering two times on Dos/Win
 540			// New line -> record any line state onto /next/ line
 541			if (fold) {
 542				int lev = levelPrev;
 543				if (visibleChars == 0)
 544					lev |= SC_FOLDLEVELWHITEFLAG;
 545				if ((levelCurrent > levelPrev) && (visibleChars > 0))
 546					lev |= SC_FOLDLEVELHEADERFLAG;
 547
 548				styler.SetLevel(lineCurrent, lev);
 549				visibleChars = 0;
 550				levelPrev = levelCurrent;
 551			}
 552			lineCurrent++;
 553			styler.SetLineState(lineCurrent,
 554			                    ((inScriptType & 0x03) << 0) |
 555			                    ((tagOpened & 0x01) << 2) |
 556			                    ((tagClosing & 0x01) << 3) |
 557			                    ((aspScript & 0x0F) << 4) |
 558			                    ((clientScript & 0x0F) << 8) |
 559			                    ((beforePreProc & 0xFF) << 12));
 560		}
 561
 562		// generic end of script processing
 563		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
 564			// Check if it's the end of the script tag (or any other HTML tag)
 565			switch (state) {
 566				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
 567			case SCE_H_DOUBLESTRING:
 568			case SCE_H_SINGLESTRING:
 569			case SCE_HJ_COMMENT:
 570			case SCE_HJ_COMMENTDOC:
 571				// SCE_HJ_COMMENTLINE removed as this is a common thing done to hide
 572				// the end of script marker from some JS interpreters.
 573				//case SCE_HJ_COMMENTLINE:
 574			case SCE_HJ_DOUBLESTRING:
 575			case SCE_HJ_SINGLESTRING:
 576			case SCE_HB_STRING:
 577			case SCE_HP_STRING:
 578			case SCE_HP_TRIPLE:
 579			case SCE_HP_TRIPLEDOUBLE:
 580				break;
 581			default :
 582				// closing tag of the script (it's a closing HTML tag anyway)
 583				styler.ColourTo(i - 1, StateToPrint);
 584				state = SCE_H_TAGUNKNOWN;
 585				inScriptType = eHtml;
 586				scriptLanguage = eScriptNone;
 587				clientScript = eScriptJS;
 588				i += 2;
 589				visibleChars += 2;
 590				tagClosing = true;
 591				continue;
 592			}
 593		}
 594
 595		/////////////////////////////////////
 596		// handle the start of PHP pre-processor = Non-HTML
 597		else if ((state != SCE_H_ASPAT) &&
 598		         !isPHPStringState(state) &&
 599		         (state != SCE_HPHP_COMMENT) &&
 600		         (ch == '<') &&
 601		         (chNext == '?')) {
 602			styler.ColourTo(i - 1, StateToPrint);
 603			beforePreProc = state;
 604			scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment() + 2, i + 10, eScriptPHP);
 605			i++;
 606			visibleChars++;
 607			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 10);
 608			if (scriptLanguage == eScriptXML)
 609				styler.ColourTo(i, SCE_H_XMLSTART);
 610			else
 611				styler.ColourTo(i, SCE_H_QUESTION);
 612			state = StateForScript(scriptLanguage);
 613			if (inScriptType == eNonHtmlScript)
 614				inScriptType = eNonHtmlScriptPreProc;
 615			else
 616				inScriptType = eNonHtmlPreProc;
 617			// fold whole script
 618			levelCurrent++;
 619			if (scriptLanguage == eScriptXML)
 620				levelCurrent--; // no folding of the XML first tag (all XML-like tags in this case)
 621			// should be better
 622			ch = styler.SafeGetCharAt(i);
 623			continue;
 624		}
 625
 626		// handle the start of ASP pre-processor = Non-HTML
 627		else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%')) {
 628			styler.ColourTo(i - 1, StateToPrint);
 629			beforePreProc = state;
 630			if (inScriptType == eNonHtmlScript)
 631				inScriptType = eNonHtmlScriptPreProc;
 632			else
 633				inScriptType = eNonHtmlPreProc;
 634
 635			if (chNext2 == '@') {
 636				i += 2; // place as if it was the second next char treated
 637				visibleChars += 2;
 638				state = SCE_H_ASPAT;
 639			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
 640				styler.ColourTo(i + 3, SCE_H_ASP);
 641				state = SCE_H_XCCOMMENT;
 642				scriptLanguage = eScriptVBS;
 643				continue;
 644			} else {
 645				if (chNext2 == '=') {
 646					i += 2; // place as if it was the second next char treated
 647					visibleChars += 2;
 648				} else {
 649					i++; // place as if it was the next char treated
 650					visibleChars++;
 651				}
 652
 653				state = StateForScript(aspScript);
 654			}
 655			scriptLanguage = eScriptVBS;
 656			styler.ColourTo(i, SCE_H_ASP);
 657			// fold whole script
 658			levelCurrent++;
 659			// should be better
 660			ch = styler.SafeGetCharAt(i);
 661			continue;
 662		}
 663
 664		/////////////////////////////////////
 665		// handle the start of SGML language (DTD)
 666		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
 667		         (chPrev == '<') &&
 668		         (ch == '!') &&
 669		         (StateToPrint != SCE_H_CDATA) && (!IsCommentState(StateToPrint))) {
 670			beforePreProc = state;
 671			styler.ColourTo(i - 2, StateToPrint);
 672			if ((chNext == '-') && (chNext2 == '-')) {
 673				state = SCE_H_COMMENT; // wait for a pending command
 674			}
 675			else if (isWordCdata(i + 1, i + 7, styler)) {
 676				state = SCE_H_CDATA;
 677			} else {
 678				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
 679				scriptLanguage = eScriptSGML;
 680				state = SCE_H_SGML_COMMAND; // wait for a pending command
 681			}
 682			// fold whole tag (-- when closing the tag)
 683
 684			levelCurrent++;
 685			continue;
 686		}
 687
 688		// handle the end of a pre-processor = Non-HTML
 689		else if ((
 690		             ((inScriptType == eNonHtmlPreProc)
 691		              || (inScriptType == eNonHtmlScriptPreProc)) && (
 692		                 ((scriptLanguage == eScriptPHP) && (ch == '?') && !isPHPStringState(state) && (state != SCE_HPHP_COMMENT)) ||
 693		                 ((scriptLanguage != eScriptNone) && !isStringState(state) &&
 694		                  (ch == '%'))
 695		             ) && (chNext == '>')) ||
 696		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
 697			if (state == SCE_H_ASPAT) {
 698				aspScript = segIsScriptingIndicator(styler,
 699				                                    styler.GetStartSegment(), i - 1, aspScript);
 700			}
 701			// Bounce out of any ASP mode
 702			switch (state) {
 703			case SCE_HJ_WORD:
 704				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
 705				break;
 706			case SCE_HB_WORD:
 707				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
 708				break;
 709			case SCE_HP_WORD:
 710				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
 711				break;
 712			case SCE_HPHP_WORD:
 713				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
 714				break;
 715			case SCE_H_XCCOMMENT:
 716				styler.ColourTo(i - 1, state);
 717				break;
 718			default :
 719				styler.ColourTo(i - 1, StateToPrint);
 720				break;
 721			}
 722			if (scriptLanguage != eScriptSGML) {
 723				i++;
 724				visibleChars++;
 725			}
 726			if (ch == '%')
 727				styler.ColourTo(i, SCE_H_ASP);
 728			else if (scriptLanguage == eScriptXML)
 729				styler.ColourTo(i, SCE_H_XMLEND);
 730			else if (scriptLanguage == eScriptSGML)
 731				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 732			else
 733				styler.ColourTo(i, SCE_H_QUESTION);
 734			state = beforePreProc;
 735			if (inScriptType == eNonHtmlScriptPreProc)
 736				inScriptType = eNonHtmlScript;
 737			else
 738				inScriptType = eHtml;
 739			scriptLanguage = eScriptNone;
 740			// unfold all scripting languages
 741			levelCurrent--;
 742			continue;
 743		}
 744		/////////////////////////////////////
 745
 746		switch (state) {
 747		case SCE_H_DEFAULT:
 748			if (ch == '<') {
 749				// in HTML, fold on tag open and unfold on tag close
 750				tagOpened = true;
 751				tagClosing = (chNext == '/');
 752				styler.ColourTo(i - 1, StateToPrint);
 753				if (chNext != '!')
 754					state = SCE_H_TAGUNKNOWN;
 755			} else if (ch == '&') {
 756				styler.ColourTo(i - 1, SCE_H_DEFAULT);
 757				state = SCE_H_ENTITY;
 758			}
 759			break;
 760		case SCE_H_SGML_DEFAULT:
 761		case SCE_H_SGML_BLOCK_DEFAULT:
 762//			if (scriptLanguage == eScriptSGMLblock)
 763//				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
 764
 765			if (ch == '\"') {
 766				styler.ColourTo(i - 1, StateToPrint);
 767				state = SCE_H_SGML_DOUBLESTRING;
 768			} else if (ch == '\'') {
 769				styler.ColourTo(i - 1, StateToPrint);
 770				state = SCE_H_SGML_SIMPLESTRING;
 771			} else if ((ch == '-') && (chPrev == '-')) {
 772				styler.ColourTo(i - 2, StateToPrint);
 773				state = SCE_H_SGML_COMMENT;
 774			} else if (isalpha(ch) && (chPrev == '%')) {
 775				styler.ColourTo(i - 2, StateToPrint);
 776				state = SCE_H_SGML_ENTITY;
 777			} else if (ch == '#') {
 778				styler.ColourTo(i - 1, StateToPrint);
 779				state = SCE_H_SGML_SPECIAL;
 780			} else if (ch == '[') {
 781				styler.ColourTo(i - 1, StateToPrint);
 782				scriptLanguage = eScriptSGMLblock;
 783				state = SCE_H_SGML_BLOCK_DEFAULT;
 784			} else if (ch == ']') {
 785				if (scriptLanguage == eScriptSGMLblock) {
 786					styler.ColourTo(i, StateToPrint);
 787					scriptLanguage = eScriptSGML;
 788				} else {
 789					styler.ColourTo(i - 1, StateToPrint);
 790					styler.ColourTo(i, SCE_H_SGML_ERROR);
 791				}
 792				state = SCE_H_SGML_DEFAULT;
 793			} else if (scriptLanguage == eScriptSGMLblock) {
 794				if ((ch == '!') && (chPrev == '<')) {
 795					styler.ColourTo(i - 2, StateToPrint);
 796					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 797					state = SCE_H_SGML_COMMAND;
 798				} else if (ch == '>') {
 799					styler.ColourTo(i - 1, StateToPrint);
 800					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
 801				}
 802			}
 803			break;
 804		case SCE_H_SGML_COMMAND:
 805			if ((ch == '-') && (chPrev == '-')) {
 806				styler.ColourTo(i - 2, StateToPrint);
 807				state = SCE_H_SGML_COMMENT;
 808			} else if (!issgmlwordchar(ch)) {
 809				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
 810					styler.ColourTo(i - 1, StateToPrint);
 811					state = SCE_H_SGML_1ST_PARAM;
 812				} else {
 813					state = SCE_H_SGML_ERROR;
 814				}
 815			}
 816			break;
 817		case SCE_H_SGML_1ST_PARAM:
 818			// wait for the beginning of the word
 819			if ((ch == '-') && (chPrev == '-')) {
 820				if (scriptLanguage == eScriptSGMLblock) {
 821					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
 822				} else {
 823					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
 824				}
 825				state = SCE_H_SGML_1ST_PARAM_COMMENT;
 826			} else if (issgmlwordchar(ch)) {
 827				if (scriptLanguage == eScriptSGMLblock) {
 828					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
 829				} else {
 830					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
 831				}
 832				// find the length of the word
 833				int size = 1;
 834				while (ishtmlwordchar(styler.SafeGetCharAt(i + size)))
 835					size++;
 836				styler.ColourTo(i + size - 1, StateToPrint);
 837				i += size - 1;
 838				visibleChars += size - 1;
 839				ch = styler.SafeGetCharAt(i);
 840				if (scriptLanguage == eScriptSGMLblock) {
 841					state = SCE_H_SGML_BLOCK_DEFAULT;
 842				} else {
 843					state = SCE_H_SGML_DEFAULT;
 844				}
 845				continue;
 846			}
 847			break;
 848		case SCE_H_SGML_ERROR:
 849			if ((ch == '-') && (chPrev == '-')) {
 850				styler.ColourTo(i - 2, StateToPrint);
 851				state = SCE_H_SGML_COMMENT;
 852			}
 853		case SCE_H_SGML_DOUBLESTRING:
 854			if (ch == '\"') {
 855				styler.ColourTo(i, StateToPrint);
 856				state = SCE_H_SGML_DEFAULT;
 857			}
 858			break;
 859		case SCE_H_SGML_SIMPLESTRING:
 860			if (ch == '\'') {
 861				styler.ColourTo(i, StateToPrint);
 862				state = SCE_H_SGML_DEFAULT;
 863			}
 864			break;
 865		case SCE_H_SGML_COMMENT:
 866			if ((ch == '-') && (chPrev == '-')) {
 867				styler.ColourTo(i, StateToPrint);
 868				state = SCE_H_SGML_DEFAULT;
 869			}
 870			break;
 871		case SCE_H_CDATA:
 872			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
 873				styler.ColourTo(i, StateToPrint);
 874				state = SCE_H_DEFAULT;
 875				levelCurrent--;
 876			}
 877			break;
 878		case SCE_H_COMMENT:
 879			if ((chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
 880				styler.ColourTo(i, StateToPrint);
 881				state = SCE_H_DEFAULT;
 882				levelCurrent--;
 883			}
 884			break;
 885		case SCE_H_SGML_1ST_PARAM_COMMENT:
 886			if ((ch == '-') && (chPrev == '-')) {
 887				styler.ColourTo(i, SCE_H_SGML_COMMENT);
 888				state = SCE_H_SGML_1ST_PARAM;
 889			}
 890			break;
 891		case SCE_H_SGML_SPECIAL:
 892			if (!isupper(ch)) {
 893				styler.ColourTo(i - 1, StateToPrint);
 894				if (isalnum(ch)) {
 895					state = SCE_H_SGML_ERROR;
 896				} else {
 897					state = SCE_H_SGML_DEFAULT;
 898				}
 899			}
 900			break;
 901		case SCE_H_SGML_ENTITY:
 902			if (ch == ';') {
 903				styler.ColourTo(i, StateToPrint);
 904				state = SCE_H_SGML_DEFAULT;
 905			} else if (!isalnum(ch) && ch != '-' && ch != '.') {
 906				styler.ColourTo(i, SCE_H_SGML_ERROR);
 907				state = SCE_H_SGML_DEFAULT;
 908			}
 909			break;
 910		case SCE_H_ENTITY:
 911			if (ch == ';') {
 912				styler.ColourTo(i, StateToPrint);
 913				state = SCE_H_DEFAULT;
 914			}
 915			if (ch != '#' && !isalnum(ch)) {	// Should check that '#' follows '&', but it is unlikely anyway...
 916				styler.ColourTo(i, SCE_H_TAGUNKNOWN);
 917				state = SCE_H_DEFAULT;
 918			}
 919			break;
 920		case SCE_H_TAGUNKNOWN:
 921			if (!ishtmlwordchar(ch) && !((ch == '/') && (chPrev == '<')) && ch != '[') {
 922				int eClass = classifyTagHTML(styler.GetStartSegment(), i - 1, keywords, styler);
 923				if (eClass == SCE_H_SCRIPT) {
 924					if (!tagClosing) {
 925						inScriptType = eNonHtmlScript;
 926						scriptLanguage = clientScript;
 927						eClass = SCE_H_TAG;
 928					} else {
 929						scriptLanguage = eScriptNone;
 930						eClass = SCE_H_TAG;
 931					}
 932				}
 933				if (ch == '>') {
 934					styler.ColourTo(i, eClass);
 935					if (inScriptType == eNonHtmlScript) {
 936						state = StateForScript(scriptLanguage);
 937					} else {
 938						state = SCE_H_DEFAULT;
 939					}
 940					tagOpened = false;
 941					if (tagClosing) {
 942						levelCurrent--;
 943					} else {
 944						levelCurrent++;
 945					}
 946					tagClosing = false;
 947				} else if (ch == '/' && chNext == '>') {
 948					if (eClass == SCE_H_TAGUNKNOWN) {
 949						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
 950					} else {
 951						styler.ColourTo(i - 1, StateToPrint);
 952						styler.ColourTo(i + 1, SCE_H_TAGEND);
 953					}
 954					i++;
 955					ch = chNext;
 956					state = SCE_H_DEFAULT;
 957					tagOpened = false;
 958				} else {
 959					if (eClass != SCE_H_TAGUNKNOWN) {
 960						if (eClass == SCE_H_SGML_DEFAULT) {
 961							state = SCE_H_SGML_DEFAULT;
 962						} else {
 963							state = SCE_H_OTHER;
 964						}
 965					}
 966				}
 967			}
 968			break;
 969		case SCE_H_ATTRIBUTE:
 970			if (!ishtmlwordchar(ch) && ch != '/' && ch != '-') {
 971				if (inScriptType == eNonHtmlScript) {
 972					int scriptLanguagePrev = scriptLanguage;
 973					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
 974					scriptLanguage = clientScript;
 975					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
 976						inScriptType = eHtml;
 977				}
 978				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
 979				if (ch == '>') {
 980					styler.ColourTo(i, SCE_H_TAG);
 981					if (inScriptType == eNonHtmlScript) {
 982						state = StateForScript(scriptLanguage);
 983					} else {
 984						state = SCE_H_DEFAULT;
 985					}
 986					tagOpened = false;
 987					if (tagClosing)
 988						levelCurrent--;
 989					else
 990						levelCurrent++;
 991					tagClosing = false;
 992				} else if (ch == '=') {
 993					styler.ColourTo(i, SCE_H_OTHER);
 994					state = SCE_H_VALUE;
 995				} else {
 996					state = SCE_H_OTHER;
 997				}
 998			}
 999			break;
1000		case SCE_H_OTHER:
1001			if (ch == '>') {
1002				styler.ColourTo(i - 1, StateToPrint);
1003				styler.ColourTo(i, SCE_H_TAG);
1004				if (inScriptType == eNonHtmlScript) {
1005					state = StateForScript(scriptLanguage);
1006				} else {
1007					state = SCE_H_DEFAULT;
1008				}
1009				tagOpened = false;
1010				if (tagClosing)
1011					levelCurrent--;
1012				else
1013					levelCurrent++;
1014				tagClosing = false;
1015			} else if (ch == '\"') {
1016				styler.ColourTo(i - 1, StateToPrint);
1017				state = SCE_H_DOUBLESTRING;
1018			} else if (ch == '\'') {
1019				styler.ColourTo(i - 1, StateToPrint);
1020				state = SCE_H_SINGLESTRING;
1021			} else if (ch == '=') {
1022				styler.ColourTo(i, StateToPrint);
1023				state = SCE_H_VALUE;
1024			} else if (ch == '/' && chNext == '>') {
1025				styler.ColourTo(i - 1, StateToPrint);
1026				styler.ColourTo(i + 1, SCE_H_TAGEND);
1027				i++;
1028				ch = chNext;
1029				state = SCE_H_DEFAULT;
1030				tagOpened = false;
1031			} else if (ch == '?' && chNext == '>') {
1032				styler.ColourTo(i - 1, StateToPrint);
1033				styler.ColourTo(i + 1, SCE_H_XMLEND);
1034				i++;
1035				ch = chNext;
1036				state = SCE_H_DEFAULT;
1037			} else if (ishtmlwordchar(ch)) {
1038				styler.ColourTo(i - 1, StateToPrint);
1039				state = SCE_H_ATTRIBUTE;
1040			}
1041			break;
1042		case SCE_H_DOUBLESTRING:
1043			if (ch == '\"') {
1044				if (inScriptType == eNonHtmlScript) {
1045					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1046				}
1047				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1048				state = SCE_H_OTHER;
1049			}
1050			break;
1051		case SCE_H_SINGLESTRING:
1052			if (ch == '\'') {
1053				if (inScriptType == eNonHtmlScript) {
1054					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1055				}
1056				styler.ColourTo(i, SCE_H_SINGLESTRING);
1057				state = SCE_H_OTHER;
1058			}
1059			break;
1060		case SCE_H_VALUE:
1061			if (!ishtmlwordchar(ch)) {
1062				if (ch == '\"') {
1063					// Should really test for being first character
1064					state = SCE_H_DOUBLESTRING;
1065				} else if (ch == '\'') {
1066					state = SCE_H_SINGLESTRING;
1067				} else {
1068					if (IsNumber(styler.GetStartSegment(), styler)) {
1069						styler.ColourTo(i - 1, SCE_H_NUMBER);
1070					} else {
1071						styler.ColourTo(i - 1, StateToPrint);
1072					}
1073					if (ch == '>') {
1074						styler.ColourTo(i, SCE_H_TAG);
1075						if (inScriptType == eNonHtmlScript) {
1076							state = StateForScript(scriptLanguage);
1077						} else {
1078							state = SCE_H_DEFAULT;
1079						}
1080						tagOpened = false;
1081						if (tagClosing)
1082							levelCurrent--;
1083						else
1084							levelCurrent++;
1085						tagClosing = false;
1086					} else {
1087						state = SCE_H_OTHER;
1088					}
1089				}
1090			}
1091			break;
1092		case SCE_HJ_DEFAULT:
1093		case SCE_HJ_START:
1094		case SCE_HJ_SYMBOLS:
1095			if (iswordstart(ch)) {
1096				styler.ColourTo(i - 1, StateToPrint);
1097				state = SCE_HJ_WORD;
1098			} else if (ch == '/' && chNext == '*') {
1099				styler.ColourTo(i - 1, StateToPrint);
1100				if (chNext2 == '*')
1101					state = SCE_HJ_COMMENTDOC;
1102				else
1103					state = SCE_HJ_COMMENT;
1104			} else if (ch == '/' && chNext == '/') {
1105				styler.ColourTo(i - 1, StateToPrint);
1106				state = SCE_HJ_COMMENTLINE;
1107			} else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1108				styler.ColourTo(i - 1, StateToPrint);
1109				state = SCE_HJ_REGEX;
1110			} else if (ch == '\"') {
1111				styler.ColourTo(i - 1, StateToPrint);
1112				state = SCE_HJ_DOUBLESTRING;
1113			} else if (ch == '\'') {
1114				styler.ColourTo(i - 1, StateToPrint);
1115				state = SCE_HJ_SINGLESTRING;
1116			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1117			           styler.SafeGetCharAt(i + 3) == '-') {
1118				styler.ColourTo(i - 1, StateToPrint);
1119				state = SCE_HJ_COMMENTLINE;
1120			} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1121				styler.ColourTo(i - 1, StateToPrint);
1122				state = SCE_HJ_COMMENTLINE;
1123				i += 2;
1124			} else if (isoperator(ch)) {
1125				styler.ColourTo(i - 1, StateToPrint);
1126				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1127				state = SCE_HJ_DEFAULT;
1128			} else if ((ch == ' ') || (ch == '\t')) {
1129				if (state == SCE_HJ_START) {
1130					styler.ColourTo(i - 1, StateToPrint);
1131					state = SCE_HJ_DEFAULT;
1132				}
1133			}
1134			break;
1135		case SCE_HJ_WORD:
1136			if (!iswordchar(ch)) {
1137				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1138				//styler.ColourTo(i - 1, eHTJSKeyword);
1139				state = SCE_HJ_DEFAULT;
1140				if (ch == '/' && chNext == '*') {
1141					if (chNext2 == '*')
1142						state = SCE_HJ_COMMENTDOC;
1143					else
1144						state = SCE_HJ_COMMENT;
1145				} else if (ch == '/' && chNext == '/') {
1146					state = SCE_HJ_COMMENTLINE;
1147				} else if (ch == '\"') {
1148					state = SCE_HJ_DOUBLESTRING;
1149				} else if (ch == '\'') {
1150					state = SCE_HJ_SINGLESTRING;
1151				} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1152					styler.ColourTo(i - 1, StateToPrint);
1153					state = SCE_HJ_COMMENTLINE;
1154					i += 2;
1155				} else if (isoperator(ch)) {
1156					styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1157					state = SCE_HJ_DEFAULT;
1158				}
1159			}
1160			break;
1161		case SCE_HJ_COMMENT:
1162		case SCE_HJ_COMMENTDOC:
1163			if (ch == '/' && chPrev == '*') {
1164				styler.ColourTo(i, StateToPrint);
1165				state = SCE_HJ_DEFAULT;
1166			}
1167			break;
1168		case SCE_HJ_COMMENTLINE:
1169			if (ch == '\r' || ch == '\n') {
1170				styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1171				state = SCE_HJ_DEFAULT;
1172			}
1173			break;
1174		case SCE_HJ_DOUBLESTRING:
1175			if (ch == '\\') {
1176				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1177					i++;
1178				}
1179			} else if (ch == '\"') {
1180				styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1181				state = SCE_HJ_DEFAULT;
1182			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1183				styler.ColourTo(i - 1, StateToPrint);
1184				state = SCE_HJ_COMMENTLINE;
1185				i += 2;
1186			} else if (isLineEnd(ch)) {
1187				styler.ColourTo(i - 1, StateToPrint);
1188				state = SCE_HJ_STRINGEOL;
1189			}
1190			break;
1191		case SCE_HJ_SINGLESTRING:
1192			if (ch == '\\') {
1193				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1194					i++;
1195				}
1196			} else if (ch == '\'') {
1197				styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1198				state = SCE_HJ_DEFAULT;
1199			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1200				styler.ColourTo(i - 1, StateToPrint);
1201				state = SCE_HJ_COMMENTLINE;
1202				i += 2;
1203			} else if (isLineEnd(ch)) {
1204				styler.ColourTo(i - 1, StateToPrint);
1205				state = SCE_HJ_STRINGEOL;
1206			}
1207			break;
1208		case SCE_HJ_STRINGEOL:
1209			if (!isLineEnd(ch)) {
1210				styler.ColourTo(i - 1, StateToPrint);
1211				state = SCE_HJ_DEFAULT;
1212			} else if (!isLineEnd(chNext)) {
1213				styler.ColourTo(i, StateToPrint);
1214				state = SCE_HJ_DEFAULT;
1215			}
1216			break;
1217		case SCE_HJ_REGEX:
1218			if (ch == '\r' || ch == '\n' || ch == '/') {
1219				styler.ColourTo(i, StateToPrint);
1220				state = SCE_HJ_DEFAULT;
1221			} else if (ch == '\\') {
1222				// Gobble up the quoted character
1223				if (chNext == '\\' || chNext == '/') {
1224					i++;
1225					ch = chNext;
1226					chNext = styler.SafeGetCharAt(i + 1);
1227				}
1228			}
1229			break;
1230		case SCE_HB_DEFAULT:
1231		case SCE_HB_START:
1232			if (iswordstart(ch)) {
1233				styler.ColourTo(i - 1, StateToPrint);
1234				state = SCE_HB_WORD;
1235			} else if (ch == '\'') {
1236				styler.ColourTo(i - 1, StateToPrint);
1237				state = SCE_HB_COMMENTLINE;
1238			} else if (ch == '\"') {
1239				styler.ColourTo(i - 1, StateToPrint);
1240				state = SCE_HB_STRING;
1241			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1242			           styler.SafeGetCharAt(i + 3) == '-') {
1243				styler.ColourTo(i - 1, StateToPrint);
1244				state = SCE_HB_COMMENTLINE;
1245			} else if (isoperator(ch)) {
1246				styler.ColourTo(i - 1, StateToPrint);
1247				styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1248				state = SCE_HB_DEFAULT;
1249			} else if ((ch == ' ') || (ch == '\t')) {
1250				if (state == SCE_HB_START) {
1251					styler.ColourTo(i - 1, StateToPrint);
1252					state = SCE_HB_DEFAULT;
1253				}
1254			}
1255			break;
1256		case SCE_HB_WORD:
1257			if (!iswordchar(ch)) {
1258				state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1259				if (state == SCE_HB_DEFAULT) {
1260					if (ch == '\"') {
1261						state = SCE_HB_STRING;
1262					} else if (ch == '\'') {
1263						state = SCE_HB_COMMENTLINE;
1264					} else if (isoperator(ch)) {
1265						styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1266						state = SCE_HB_DEFAULT;
1267					}
1268				}
1269			}
1270			break;
1271		case SCE_HB_STRING:
1272			if (ch == '\"') {
1273				styler.ColourTo(i, StateToPrint);
1274				state = SCE_HB_DEFAULT;
1275			} else if (ch == '\r' || ch == '\n') {
1276				styler.ColourTo(i - 1, StateToPrint);
1277				state = SCE_HB_STRINGEOL;
1278			}
1279			break;
1280		case SCE_HB_COMMENTLINE:
1281			if (ch == '\r' || ch == '\n') {
1282				styler.ColourTo(i - 1, StateToPrint);
1283				state = SCE_HB_DEFAULT;
1284			}
1285			break;
1286		case SCE_HB_STRINGEOL:
1287			if (!isLineEnd(ch)) {
1288				styler.ColourTo(i - 1, StateToPrint);
1289				state = SCE_HB_DEFAULT;
1290			} else if (!isLineEnd(chNext)) {
1291				styler.ColourTo(i, StateToPrint);
1292				state = SCE_HB_DEFAULT;
1293			}
1294			break;
1295		case SCE_HP_DEFAULT:
1296		case SCE_HP_START:
1297			if (iswordstart(ch)) {
1298				styler.ColourTo(i - 1, StateToPrint);
1299				state = SCE_HP_WORD;
1300			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1301			           styler.SafeGetCharAt(i + 3) == '-') {
1302				styler.ColourTo(i - 1, StateToPrint);
1303				state = SCE_HP_COMMENTLINE;
1304			} else if (ch == '#') {
1305				styler.ColourTo(i - 1, StateToPrint);
1306				state = SCE_HP_COMMENTLINE;
1307			} else if (ch == '\"') {
1308				styler.ColourTo(i - 1, StateToPrint);
1309				if (chNext == '\"' && chNext2 == '\"') {
1310					i += 2;
1311					state = SCE_HP_TRIPLEDOUBLE;
1312					ch = ' ';
1313					chPrev = ' ';
1314					chNext = styler.SafeGetCharAt(i + 1);
1315				} else {
1316					//					state = statePrintForState(SCE_HP_STRING,inScriptType);
1317					state = SCE_HP_STRING;
1318				}
1319			} else if (ch == '\'') {
1320				styler.ColourTo(i - 1, StateToPrint);
1321				if (chNext == '\'' && chNext2 == '\'') {
1322					i += 2;
1323					state = SCE_HP_TRIPLE;
1324					ch = ' ';
1325					chPrev = ' ';
1326					chNext = styler.SafeGetCharAt(i + 1);
1327				} else {
1328					state = SCE_HP_CHARACTER;
1329				}
1330			} else if (isoperator(ch)) {
1331				styler.ColourTo(i - 1, StateToPrint);
1332				styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1333			} else if ((ch == ' ') || (ch == '\t')) {
1334				if (state == SCE_HP_START) {
1335					styler.ColourTo(i - 1, StateToPrint);
1336					state = SCE_HP_DEFAULT;
1337				}
1338			}
1339			break;
1340		case SCE_HP_WORD:
1341			if (!iswordchar(ch)) {
1342				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1343				state = SCE_HP_DEFAULT;
1344				if (ch == '#') {
1345					state = SCE_HP_COMMENTLINE;
1346				} else if (ch == '\"') {
1347					if (chNext == '\"' && chNext2 == '\"') {
1348						i += 2;
1349						state = SCE_HP_TRIPLEDOUBLE;
1350						ch = ' ';
1351						chPrev = ' ';
1352						chNext = styler.SafeGetCharAt(i + 1);
1353					} else {
1354						state = SCE_HP_STRING;
1355					}
1356				} else if (ch == '\'') {
1357					if (chNext == '\'' && chNext2 == '\'') {
1358						i += 2;
1359						state = SCE_HP_TRIPLE;
1360						ch = ' ';
1361						chPrev = ' ';
1362						chNext = styler.SafeGetCharAt(i + 1);
1363					} else {
1364						state = SCE_HP_CHARACTER;
1365					}
1366				} else if (isoperator(ch)) {
1367					styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1368				}
1369			}
1370			break;
1371		case SCE_HP_COMMENTLINE:
1372			if (ch == '\r' || ch == '\n') {
1373				styler.ColourTo(i - 1, StateToPrint);
1374				state = SCE_HP_DEFAULT;
1375			}
1376			break;
1377		case SCE_HP_STRING:
1378			if (ch == '\\') {
1379				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1380					i++;
1381					ch = chNext;
1382					chNext = styler.SafeGetCharAt(i + 1);
1383				}
1384			} else if (ch == '\"') {
1385				styler.ColourTo(i, StateToPrint);
1386				state = SCE_HP_DEFAULT;
1387			}
1388			break;
1389		case SCE_HP_CHARACTER:
1390			if (ch == '\\') {
1391				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1392					i++;
1393					ch = chNext;
1394					chNext = styler.SafeGetCharAt(i + 1);
1395				}
1396			} else if (ch == '\'') {
1397				styler.ColourTo(i, StateToPrint);
1398				state = SCE_HP_DEFAULT;
1399			}
1400			break;
1401		case SCE_HP_TRIPLE:
1402			if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1403				styler.ColourTo(i, StateToPrint);
1404				state = SCE_HP_DEFAULT;
1405			}
1406			break;
1407		case SCE_HP_TRIPLEDOUBLE:
1408			if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1409				styler.ColourTo(i, StateToPrint);
1410				state = SCE_HP_DEFAULT;
1411			}
1412			break;
1413			///////////// start - PHP state handling
1414		case SCE_HPHP_WORD:
1415			if (!iswordstart(ch)) {
1416				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1417				if (ch == '/' && chNext == '*') {
1418					i++;
1419					state = SCE_HPHP_COMMENT;
1420				} else if (ch == '/' && chNext == '/') {
1421					i++;
1422					state = SCE_HPHP_COMMENTLINE;
1423				} else if (ch == '#') {
1424					state = SCE_HPHP_COMMENTLINE;
1425				} else if (ch == '\"') {
1426					state = SCE_HPHP_HSTRING;
1427				} else if (ch == '\'') {
1428					state = SCE_HPHP_SIMPLESTRING;
1429				} else if (ch == '$') {
1430					state = SCE_HPHP_VARIABLE;
1431				} else if (isoperator(ch)) {
1432					state = SCE_HPHP_OPERATOR;
1433				} else {
1434					state = SCE_HPHP_DEFAULT;
1435				}
1436			}
1437			break;
1438		case SCE_HPHP_NUMBER:
1439			if (!isdigit(ch)) {
1440				styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1441				if (isoperator(ch))
1442					state = SCE_HPHP_OPERATOR;
1443				else
1444					state = SCE_HPHP_DEFAULT;
1445			}
1446			break;
1447		case SCE_HPHP_VARIABLE:
1448			if (!iswordstart(ch)) {
1449				styler.ColourTo(i - 1, SCE_HPHP_VARIABLE);
1450				if (isoperator(ch))
1451					state = SCE_HPHP_OPERATOR;
1452				else
1453					state = SCE_HPHP_DEFAULT;
1454			}
1455			break;
1456		case SCE_HPHP_COMMENT:
1457			if (ch == '/' && chPrev == '*') {
1458				styler.ColourTo(i, StateToPrint);
1459				state = SCE_HPHP_DEFAULT;
1460			}
1461			break;
1462		case SCE_HPHP_COMMENTLINE:
1463			if (ch == '\r' || ch == '\n') {
1464				styler.ColourTo(i - 1, StateToPrint);
1465				state = SCE_HPHP_DEFAULT;
1466			}
1467			break;
1468		case SCE_HPHP_HSTRING:
1469			if (ch == '\\') {
1470				// skip the next char
1471				i++;
1472			} else if (ch == '$') {
1473				styler.ColourTo(i - 1, StateToPrint);
1474				state = SCE_HPHP_HSTRING_VARIABLE;
1475			} else if (ch == '\"') {
1476				styler.ColourTo(i, StateToPrint);
1477				state = SCE_HPHP_DEFAULT;
1478			}
1479			break;
1480		case SCE_HPHP_SIMPLESTRING:
1481			if (ch == '\\') {
1482				// skip the next char
1483				i++;
1484			} else if (ch == '\'') {
1485				styler.ColourTo(i, StateToPrint);
1486				state = SCE_HPHP_DEFAULT;
1487			}
1488			break;
1489		case SCE_HPHP_HSTRING_VARIABLE:
1490			if (!iswordstart(ch)) {
1491				styler.ColourTo(i - 1, StateToPrint);
1492				i--; // strange but it works
1493				state = SCE_HPHP_HSTRING;
1494			}
1495			break;
1496		case SCE_HPHP_OPERATOR:
1497		case SCE_HPHP_DEFAULT:
1498			styler.ColourTo(i - 1, StateToPrint);
1499			if (isdigit(ch)) {
1500				state = SCE_HPHP_NUMBER;
1501			} else if (iswordstart(ch)) {
1502				state = SCE_HPHP_WORD;
1503			} else if (ch == '/' && chNext == '*') {
1504				i++;
1505				state = SCE_HPHP_COMMENT;
1506			} else if (ch == '/' && chNext == '/') {
1507				i++;
1508				state = SCE_HPHP_COMMENTLINE;
1509			} else if (ch == '#') {
1510				state = SCE_HPHP_COMMENTLINE;
1511			} else if (ch == '\"') {
1512				state = SCE_HPHP_HSTRING;
1513			} else if (ch == '\'') {
1514				state = SCE_HPHP_SIMPLESTRING;
1515			} else if (ch == '$') {
1516				state = SCE_HPHP_VARIABLE;
1517			} else if (isoperator(ch)) {
1518				state = SCE_HPHP_OPERATOR;
1519			} else if ((state == SCE_HPHP_OPERATOR) && (isspacechar(ch))) {
1520				state = SCE_HPHP_DEFAULT;
1521			}
1522			break;
1523			///////////// end - PHP state handling
1524		}
1525
1526		// Some of the above terminated their lexeme but since the same character starts
1527		// the same class again, only reenter if non empty segment.
1528
1529		bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
1530		if (state == SCE_HB_DEFAULT) {    // One of the above succeeded
1531			if ((ch == '\"') && (nonEmptySegment)) {
1532				state = SCE_HB_STRING;
1533			} else if (ch == '\'') {
1534				state = SCE_HB_COMMENTLINE;
1535			} else if (iswordstart(ch)) {
1536				state = SCE_HB_WORD;
1537			} else if (isoperator(ch)) {
1538				styler.ColourTo(i, SCE_HB_DEFAULT);
1539			}
1540		} else if (state == SCE_HBA_DEFAULT) {    // One of the above succeeded
1541			if ((ch == '\"') && (nonEmptySegment)) {
1542				state = SCE_HBA_STRING;
1543			} else if (ch == '\'') {
1544				state = SCE_HBA_COMMENTLINE;
1545			} else if (iswordstart(ch)) {
1546				state = SCE_HBA_WORD;
1547			} else if (isoperator(ch)) {
1548				styler.ColourTo(i, SCE_HBA_DEFAULT);
1549			}
1550		} else if (state == SCE_HJ_DEFAULT) {    // One of the above succeeded
1551			if (ch == '/' && chNext == '*') {
1552				if (styler.SafeGetCharAt(i + 2) == '*')
1553					state = SCE_HJ_COMMENTDOC;
1554				else
1555					state = SCE_HJ_COMMENT;
1556			} else if (ch == '/' && chNext == '/') {
1557				state = SCE_HJ_COMMENTLINE;
1558			} else if ((ch == '\"') && (nonEmptySegment)) {
1559				state = SCE_HJ_DOUBLESTRING;
1560			} else if ((ch == '\'') && (nonEmptySegment)) {
1561				state = SCE_HJ_SINGLESTRING;
1562			} else if (iswordstart(ch)) {
1563				state = SCE_HJ_WORD;
1564			} else if (isoperator(ch)) {
1565				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1566			}
1567		}
1568	}
1569
1570	StateToPrint = statePrintForState(state, inScriptType);
1571	styler.ColourTo(lengthDoc - 1, StateToPrint);
1572
1573	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1574	if (fold) {
1575		int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1576		styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1577	}
1578}
1579
1580static bool isASPScript(int state) {
1581	return
1582		(state >= SCE_HJA_START && state <= SCE_HJA_REGEX) ||
1583		(state >= SCE_HBA_START && state <= SCE_HBA_STRINGEOL) ||
1584		(state >= SCE_HPA_DEFAULT && state <= SCE_HPA_IDENTIFIER);
1585}
1586
1587static void ColouriseHBAPiece(StyleContext &sc, WordList *keywordlists[]) {
1588	WordList &keywordsVBS = *keywordlists[2];
1589	if (sc.state == SCE_HBA_WORD) {
1590		if (!IsAWordChar(sc.ch)) {
1591			char s[100];
1592			sc.GetCurrentLowered(s, sizeof(s));
1593			if (keywordsVBS.InList(s)) {
1594				if (strcmp(s, "rem") == 0) {
1595					sc.ChangeState(SCE_HBA_COMMENTLINE);
1596					if (sc.atLineEnd) {
1597						sc.SetState(SCE_HBA_DEFAULT);
1598					}
1599				} else {
1600					sc.SetState(SCE_HBA_DEFAULT);
1601				}
1602			} else {
1603				sc.ChangeState(SCE_HBA_IDENTIFIER);
1604				sc.SetState(SCE_HBA_DEFAULT);
1605			}
1606		}
1607	} else if (sc.state == SCE_HBA_NUMBER) {
1608		if (!IsAWordChar(sc.ch)) {
1609			sc.SetState(SCE_HBA_DEFAULT);
1610		}
1611	} else if (sc.state == SCE_HBA_STRING) {
1612		if (sc.ch == '\"') {
1613			sc.For

Large files files are truncated, but you can click here to view the full file