PageRenderTime 762ms CodeModel.GetById 50ms app.highlight 285ms RepoModel.GetById 17ms app.codeStats 1ms

/lexers/LexHTML.cxx

https://bitbucket.org/kpozn/scintilla
C++ | 2190 lines | 1956 code | 121 blank | 113 comment | 1460 complexity | 4bf6ed7a874be7d7e29f9b6460f88291 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Scintilla source code edit control
   2/** @file LexHTML.cxx
   3 ** Lexer for HTML.
   4 **/
   5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   6// The License.txt file describes the conditions under which this software may be distributed.
   7
   8#include <stdlib.h>
   9#include <string.h>
  10#include <stdio.h>
  11#include <stdarg.h>
  12#include <assert.h>
  13#include <ctype.h>
  14
  15#include "ILexer.h"
  16#include "Scintilla.h"
  17#include "SciLexer.h"
  18
  19#include "WordList.h"
  20#include "LexAccessor.h"
  21#include "Accessor.h"
  22#include "StyleContext.h"
  23#include "CharacterSet.h"
  24#include "LexerModule.h"
  25
  26#ifdef SCI_NAMESPACE
  27using namespace Scintilla;
  28#endif
  29
  30#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  31#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  32#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  33
  34enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  35enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  36
  37static inline bool IsAWordChar(const int ch) {
  38	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  39}
  40
  41static inline bool IsAWordStart(const int ch) {
  42	return (ch < 0x80) && (isalnum(ch) || ch == '_');
  43}
  44
  45inline bool IsOperator(int ch) {
  46	if (isascii(ch) && isalnum(ch))
  47		return false;
  48	// '.' left out as it is used to make up numbers
  49	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  50	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  51	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  52	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  53	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  54	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
  55		return true;
  56	return false;
  57}
  58
  59static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  60	unsigned int i = 0;
  61	for (; (i < end - start + 1) && (i < len-1); i++) {
  62		s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  63	}
  64	s[i] = '\0';
  65}
  66
  67static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  68
  69	unsigned int i = 0;
  70	for (; i < sLen-1; i++) {
  71		char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  72		if ((i == 0) && !IsAWordStart(ch))
  73			break;
  74		if ((i > 0) && !IsAWordChar(ch))
  75			break;
  76		s[i] = ch;
  77	}
  78	s[i] = '\0';
  79
  80	return s;
  81}
  82
  83static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  84	char s[100];
  85	GetTextSegment(styler, start, end, s, sizeof(s));
  86	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  87	if (strstr(s, "src"))	// External script
  88		return eScriptNone;
  89	if (strstr(s, "vbs"))
  90		return eScriptVBS;
  91	if (strstr(s, "pyth"))
  92		return eScriptPython;
  93	if (strstr(s, "javas"))
  94		return eScriptJS;
  95	if (strstr(s, "jscr"))
  96		return eScriptJS;
  97	if (strstr(s, "php"))
  98		return eScriptPHP;
  99	if (strstr(s, "xml")) {
 100		const char *xml = strstr(s, "xml");
 101		for (const char *t=s; t<xml; t++) {
 102			if (!IsASpace(*t)) {
 103				return prevValue;
 104			}
 105		}
 106		return eScriptXML;
 107	}
 108
 109	return prevValue;
 110}
 111
 112static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
 113	int iResult = 0;
 114	char s[100];
 115	GetTextSegment(styler, start, end, s, sizeof(s));
 116	if (0 == strncmp(s, "php", 3)) {
 117		iResult = 3;
 118	}
 119
 120	return iResult;
 121}
 122
 123static script_type ScriptOfState(int state) {
 124	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 125		return eScriptPython;
 126	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 127		return eScriptVBS;
 128	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 129		return eScriptJS;
 130	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
 131		return eScriptPHP;
 132	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
 133		return eScriptSGML;
 134	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
 135		return eScriptSGMLblock;
 136	} else {
 137		return eScriptNone;
 138	}
 139}
 140
 141static int statePrintForState(int state, script_mode inScriptType) {
 142	int StateToPrint = state;
 143
 144	if (state >= SCE_HJ_START) {
 145		if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 146			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 147		} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 148			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 149		} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 150			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 151		}
 152	}
 153
 154	return StateToPrint;
 155}
 156
 157static int stateForPrintState(int StateToPrint) {
 158	int state;
 159
 160	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
 161		state = StateToPrint - SCE_HA_PYTHON;
 162	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
 163		state = StateToPrint - SCE_HA_VBS;
 164	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
 165		state = StateToPrint - SCE_HA_JS;
 166	} else {
 167		state = StateToPrint;
 168	}
 169
 170	return state;
 171}
 172
 173static inline bool IsNumber(unsigned int start, Accessor &styler) {
 174	return IsADigit(styler[start]) || (styler[start] == '.') ||
 175	       (styler[start] == '-') || (styler[start] == '#');
 176}
 177
 178static inline bool isStringState(int state) {
 179	bool bResult;
 180
 181	switch (state) {
 182	case SCE_HJ_DOUBLESTRING:
 183	case SCE_HJ_SINGLESTRING:
 184	case SCE_HJA_DOUBLESTRING:
 185	case SCE_HJA_SINGLESTRING:
 186	case SCE_HB_STRING:
 187	case SCE_HBA_STRING:
 188	case SCE_HP_STRING:
 189	case SCE_HP_CHARACTER:
 190	case SCE_HP_TRIPLE:
 191	case SCE_HP_TRIPLEDOUBLE:
 192	case SCE_HPA_STRING:
 193	case SCE_HPA_CHARACTER:
 194	case SCE_HPA_TRIPLE:
 195	case SCE_HPA_TRIPLEDOUBLE:
 196	case SCE_HPHP_HSTRING:
 197	case SCE_HPHP_SIMPLESTRING:
 198	case SCE_HPHP_HSTRING_VARIABLE:
 199	case SCE_HPHP_COMPLEX_VARIABLE:
 200		bResult = true;
 201		break;
 202	default :
 203		bResult = false;
 204		break;
 205	}
 206	return bResult;
 207}
 208
 209static inline bool stateAllowsTermination(int state) {
 210	bool allowTermination = !isStringState(state);
 211	if (allowTermination) {
 212		switch (state) {
 213		case SCE_HB_COMMENTLINE:
 214		case SCE_HPHP_COMMENT:
 215		case SCE_HP_COMMENTLINE:
 216		case SCE_HPA_COMMENTLINE:
 217			allowTermination = false;
 218		}
 219	}
 220	return allowTermination;
 221}
 222
 223// not really well done, since it's only comments that should lex the %> and <%
 224static inline bool isCommentASPState(int state) {
 225	bool bResult;
 226
 227	switch (state) {
 228	case SCE_HJ_COMMENT:
 229	case SCE_HJ_COMMENTLINE:
 230	case SCE_HJ_COMMENTDOC:
 231	case SCE_HB_COMMENTLINE:
 232	case SCE_HP_COMMENTLINE:
 233	case SCE_HPHP_COMMENT:
 234	case SCE_HPHP_COMMENTLINE:
 235		bResult = true;
 236		break;
 237	default :
 238		bResult = false;
 239		break;
 240	}
 241	return bResult;
 242}
 243
 244static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 245	bool wordIsNumber = IsNumber(start, styler);
 246	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
 247	if (wordIsNumber) {
 248		chAttr = SCE_H_NUMBER;
 249	} else {
 250		char s[100];
 251		GetTextSegment(styler, start, end, s, sizeof(s));
 252		if (keywords.InList(s))
 253			chAttr = SCE_H_ATTRIBUTE;
 254	}
 255	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
 256		// No keywords -> all are known
 257		chAttr = SCE_H_ATTRIBUTE;
 258	styler.ColourTo(end, chAttr);
 259}
 260
 261static int classifyTagHTML(unsigned int start, unsigned int end,
 262                           WordList &keywords, Accessor &styler, bool &tagDontFold,
 263			   bool caseSensitive, bool isXml, bool allowScripts) {
 264	char withSpace[30 + 2] = " ";
 265	const char *s = withSpace + 1;
 266	// Copy after the '<'
 267	unsigned int i = 1;
 268	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
 269		char ch = styler[cPos];
 270		if ((ch != '<') && (ch != '/')) {
 271			withSpace[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
 272		}
 273	}
 274
 275	//The following is only a quick hack, to see if this whole thing would work
 276	//we first need the tagname with a trailing space...
 277	withSpace[i] = ' ';
 278	withSpace[i+1] = '\0';
 279
 280	// if the current language is XML, I can fold any tag
 281	// if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
 282	//...to find it in the list of no-container-tags
 283	tagDontFold = (!isXml) && (NULL != strstr(" area base basefont br col command embed frame hr img input isindex keygen link meta param source track wbr ", withSpace));
 284
 285	//now we can remove the trailing space
 286	withSpace[i] = '\0';
 287
 288	// No keywords -> all are known
 289	char chAttr = SCE_H_TAGUNKNOWN;
 290	if (s[0] == '!') {
 291		chAttr = SCE_H_SGML_DEFAULT;
 292	} else if (!keywords || keywords.InList(s)) {
 293		chAttr = SCE_H_TAG;
 294	}
 295	styler.ColourTo(end, chAttr);
 296	if (chAttr == SCE_H_TAG) {
 297		if (allowScripts && 0 == strcmp(s, "script")) {
 298			// check to see if this is a self-closing tag by sniffing ahead
 299			bool isSelfClose = false;
 300			for (unsigned int cPos = end; cPos <= end + 200; cPos++) {
 301				char ch = styler.SafeGetCharAt(cPos, '\0');
 302				if (ch == '\0' || ch == '>')
 303					break;
 304				else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
 305					isSelfClose = true;
 306					break;
 307				}
 308			}
 309
 310			// do not enter a script state if the tag self-closed
 311			if (!isSelfClose)
 312				chAttr = SCE_H_SCRIPT;
 313		} else if (!isXml && 0 == strcmp(s, "comment")) {
 314			chAttr = SCE_H_COMMENT;
 315		}
 316	}
 317	return chAttr;
 318}
 319
 320static void classifyWordHTJS(unsigned int start, unsigned int end,
 321                             WordList &keywords, Accessor &styler, script_mode inScriptType) {
 322	char s[30 + 1];
 323	unsigned int i = 0;
 324	for (; i < end - start + 1 && i < 30; i++) {
 325		s[i] = styler[start + i];
 326	}
 327	s[i] = '\0';
 328
 329	char chAttr = SCE_HJ_WORD;
 330	bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
 331	if (wordIsNumber) {
 332		chAttr = SCE_HJ_NUMBER;
 333	} else if (keywords.InList(s)) {
 334		chAttr = SCE_HJ_KEYWORD;
 335	}
 336	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 337}
 338
 339static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
 340	char chAttr = SCE_HB_IDENTIFIER;
 341	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 342	if (wordIsNumber)
 343		chAttr = SCE_HB_NUMBER;
 344	else {
 345		char s[100];
 346		GetTextSegment(styler, start, end, s, sizeof(s));
 347		if (keywords.InList(s)) {
 348			chAttr = SCE_HB_WORD;
 349			if (strcmp(s, "rem") == 0)
 350				chAttr = SCE_HB_COMMENTLINE;
 351		}
 352	}
 353	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 354	if (chAttr == SCE_HB_COMMENTLINE)
 355		return SCE_HB_COMMENTLINE;
 356	else
 357		return SCE_HB_DEFAULT;
 358}
 359
 360static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType, bool isMako) {
 361	bool wordIsNumber = IsADigit(styler[start]);
 362	char s[30 + 1];
 363	unsigned int i = 0;
 364	for (; i < end - start + 1 && i < 30; i++) {
 365		s[i] = styler[start + i];
 366	}
 367	s[i] = '\0';
 368	char chAttr = SCE_HP_IDENTIFIER;
 369	if (0 == strcmp(prevWord, "class"))
 370		chAttr = SCE_HP_CLASSNAME;
 371	else if (0 == strcmp(prevWord, "def"))
 372		chAttr = SCE_HP_DEFNAME;
 373	else if (wordIsNumber)
 374		chAttr = SCE_HP_NUMBER;
 375	else if (keywords.InList(s))
 376		chAttr = SCE_HP_WORD;
 377	else if (isMako && 0 == strcmp(s, "block"))
 378		chAttr = SCE_HP_WORD;
 379	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 380	strcpy(prevWord, s);
 381}
 382
 383// Update the word colour to default or keyword
 384// Called when in a PHP word
 385static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 386	char chAttr = SCE_HPHP_DEFAULT;
 387	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
 388	if (wordIsNumber)
 389		chAttr = SCE_HPHP_NUMBER;
 390	else {
 391		char s[100];
 392		GetTextSegment(styler, start, end, s, sizeof(s));
 393		if (keywords.InList(s))
 394			chAttr = SCE_HPHP_WORD;
 395	}
 396	styler.ColourTo(end, chAttr);
 397}
 398
 399static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 400	char s[30 + 1];
 401	unsigned int i = 0;
 402	for (; i < end - start + 1 && i < 30; i++) {
 403		s[i] = styler[start + i];
 404	}
 405	s[i] = '\0';
 406	return keywords.InList(s);
 407}
 408
 409static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
 410	char s[30 + 1];
 411	unsigned int i = 0;
 412	for (; i < end - start + 1 && i < 30; i++) {
 413		s[i] = styler[start + i];
 414	}
 415	s[i] = '\0';
 416	return (0 == strcmp(s, "[CDATA["));
 417}
 418
 419// Return the first state to reach when entering a scripting language
 420static int StateForScript(script_type scriptLanguage) {
 421	int Result;
 422	switch (scriptLanguage) {
 423	case eScriptVBS:
 424		Result = SCE_HB_START;
 425		break;
 426	case eScriptPython:
 427		Result = SCE_HP_START;
 428		break;
 429	case eScriptPHP:
 430		Result = SCE_HPHP_DEFAULT;
 431		break;
 432	case eScriptXML:
 433		Result = SCE_H_TAGUNKNOWN;
 434		break;
 435	case eScriptSGML:
 436		Result = SCE_H_SGML_DEFAULT;
 437		break;
 438	case eScriptComment:
 439		Result = SCE_H_COMMENT;
 440		break;
 441	default :
 442		Result = SCE_HJ_START;
 443		break;
 444	}
 445	return Result;
 446}
 447
 448static inline bool ishtmlwordchar(int ch) {
 449	return !isascii(ch) ||
 450		(isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
 451}
 452
 453static inline bool issgmlwordchar(int ch) {
 454	return !isascii(ch) ||
 455		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
 456}
 457
 458static inline bool IsPhpWordStart(int ch) {
 459	return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
 460}
 461
 462static inline bool IsPhpWordChar(int ch) {
 463	return IsADigit(ch) || IsPhpWordStart(ch);
 464}
 465
 466static bool InTagState(int state) {
 467	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
 468	       state == SCE_H_SCRIPT ||
 469	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
 470	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
 471	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
 472}
 473
 474static bool IsCommentState(const int state) {
 475	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
 476}
 477
 478static bool IsScriptCommentState(const int state) {
 479	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
 480		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
 481}
 482
 483static bool isLineEnd(int ch) {
 484	return ch == '\r' || ch == '\n';
 485}
 486
 487static bool isOKBeforeRE(int ch) {
 488	return (ch == '(') || (ch == '=') || (ch == ',');
 489}
 490
 491static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
 492	if (strlen(blockType) == 0) {
 493		return ((ch == '%') && (chNext == '>'));
 494	} else if ((0 == strcmp(blockType, "inherit")) ||
 495			   (0 == strcmp(blockType, "namespace")) ||
 496			   (0 == strcmp(blockType, "include")) ||
 497			   (0 == strcmp(blockType, "page"))) {
 498		return ((ch == '/') && (chNext == '>'));
 499	} else if (0 == strcmp(blockType, "%")) {
 500		if (ch == '/' && isLineEnd(chNext))
 501			return 1;
 502		else
 503		    return isLineEnd(ch);
 504	} else if (0 == strcmp(blockType, "{")) {
 505		return ch == '}';
 506	} else {
 507		return (ch == '>');
 508	}
 509}
 510
 511static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
 512	if (strlen(blockType) == 0) {
 513		return 0;
 514	} else if (0 == strcmp(blockType, "%")) {
 515		return ((ch == '%') && (chNext == '}'));
 516	} else if (0 == strcmp(blockType, "{")) {
 517		return ((ch == '}') && (chNext == '}'));
 518	} else {
 519		return 0;
 520	}
 521}
 522
 523static bool isPHPStringState(int state) {
 524	return
 525	    (state == SCE_HPHP_HSTRING) ||
 526	    (state == SCE_HPHP_SIMPLESTRING) ||
 527	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
 528	    (state == SCE_HPHP_COMPLEX_VARIABLE);
 529}
 530
 531static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
 532	int j;
 533	const int beginning = i - 1;
 534	bool isValidSimpleString = false;
 535
 536	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
 537		i++;
 538
 539	char ch = styler.SafeGetCharAt(i);
 540	const char chNext = styler.SafeGetCharAt(i + 1);
 541	if (!IsPhpWordStart(ch)) {
 542		if (ch == '\'' && IsPhpWordStart(chNext)) {
 543			i++;
 544			ch = chNext;
 545			isSimpleString = true;
 546		} else {
 547			phpStringDelimiter[0] = '\0';
 548			return beginning;
 549		}
 550	}
 551	phpStringDelimiter[0] = ch;
 552	i++;
 553
 554	for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
 555		if (!IsPhpWordChar(styler[j])) {
 556			if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
 557				isValidSimpleString = true;
 558				j++;
 559				break;
 560			} else {
 561				phpStringDelimiter[0] = '\0';
 562				return beginning;
 563			}
 564		}
 565		if (j - i < phpStringDelimiterSize - 2)
 566			phpStringDelimiter[j-i+1] = styler[j];
 567		else
 568			i++;
 569	}
 570	if (isSimpleString && !isValidSimpleString) {
 571		phpStringDelimiter[0] = '\0';
 572		return beginning;
 573	}
 574	phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
 575	return j - 1;
 576}
 577
 578static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 579                                  Accessor &styler, bool isXml) {
 580	WordList &keywords = *keywordlists[0];
 581	WordList &keywords2 = *keywordlists[1];
 582	WordList &keywords3 = *keywordlists[2];
 583	WordList &keywords4 = *keywordlists[3];
 584	WordList &keywords5 = *keywordlists[4];
 585	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
 586
 587	// Lexer for HTML requires more lexical states (8 bits worth) than most lexers
 588	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 589	char prevWord[200];
 590	prevWord[0] = '\0';
 591	char phpStringDelimiter[200]; // PHP is not limited in length, we are
 592	phpStringDelimiter[0] = '\0';
 593	int StateToPrint = initStyle;
 594	int state = stateForPrintState(StateToPrint);
 595	char makoBlockType[200];
 596	makoBlockType[0] = '\0';
 597	int makoComment = 0;
 598	char djangoBlockType[2];
 599	djangoBlockType[0] = '\0';
 600
 601	// If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
 602	if (InTagState(state)) {
 603		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
 604			int backLineStart = styler.LineStart(styler.GetLine(startPos-1));
 605			length += startPos - backLineStart;
 606			startPos = backLineStart;
 607		}
 608		state = SCE_H_DEFAULT;
 609	}
 610	// String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
 611	if (isPHPStringState(state)) {
 612		while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
 613			startPos--;
 614			length++;
 615			state = styler.StyleAt(startPos);
 616		}
 617		if (startPos == 0)
 618			state = SCE_H_DEFAULT;
 619	}
 620	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 621
 622	int lineCurrent = styler.GetLine(startPos);
 623	int lineState;
 624	if (lineCurrent > 0) {
 625		lineState = styler.GetLineState(lineCurrent-1);
 626	} else {
 627		// Default client and ASP scripting language is JavaScript
 628		lineState = eScriptJS << 8;
 629
 630		// property asp.default.language
 631		//	Script in ASP code is initially assumed to be in JavaScript.
 632		//	To change this to VBScript set asp.default.language to 2. Python is 3.
 633		lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
 634	}
 635	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
 636	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
 637	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
 638	bool tagDontFold = false; //some HTML tags should not be folded
 639	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
 640	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
 641	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
 642
 643	script_type scriptLanguage = ScriptOfState(state);
 644	// If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
 645	if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
 646		scriptLanguage = eScriptComment;
 647	}
 648	script_type beforeLanguage = ScriptOfState(beforePreProc);
 649
 650	// property fold.html
 651	//	Folding is turned on or off for HTML and XML files with this option.
 652	//	The fold option must also be on for folding to occur.
 653	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
 654
 655	const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
 656
 657	// property fold.html.preprocessor
 658	//	Folding is turned on or off for scripts embedded in HTML files with this option.
 659	//	The default is on.
 660	const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
 661
 662	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 663
 664	// property fold.hypertext.comment
 665	//	Allow folding for comments in scripts embedded in HTML.
 666	//	The default is off.
 667	const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
 668
 669	// property fold.hypertext.heredoc
 670	//	Allow folding for heredocs in scripts embedded in HTML.
 671	//	The default is off.
 672	const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
 673
 674	// property html.tags.case.sensitive
 675	//	For XML and HTML, setting this property to 1 will make tags match in a case
 676	//	sensitive way which is the expected behaviour for XML and XHTML.
 677	const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
 678
 679	// property lexer.xml.allow.scripts
 680	//	Set to 0 to disable scripts in XML.
 681	const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
 682
 683	// property lexer.html.mako
 684	//	Set to 1 to enable the mako template language.
 685	const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
 686
 687	// property lexer.html.django
 688	//	Set to 1 to enable the django template language.
 689	const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
 690
 691	const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
 692	const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
 693	const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
 694
 695	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
 696	int levelCurrent = levelPrev;
 697	int visibleChars = 0;
 698	int lineStartVisibleChars = 0;
 699
 700	int chPrev = ' ';
 701	int ch = ' ';
 702	int chPrevNonWhite = ' ';
 703	// look back to set chPrevNonWhite properly for better regex colouring
 704	if (scriptLanguage == eScriptJS && startPos > 0) {
 705		int back = startPos;
 706		int style = 0;
 707		while (--back) {
 708			style = styler.StyleAt(back);
 709			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
 710				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
 711				break;
 712		}
 713		if (style == SCE_HJ_SYMBOLS) {
 714			chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
 715		}
 716	}
 717
 718	styler.StartSegment(startPos);
 719	const int lengthDoc = startPos + length;
 720	for (int i = startPos; i < lengthDoc; i++) {
 721		const int chPrev2 = chPrev;
 722		chPrev = ch;
 723		if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
 724			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
 725			chPrevNonWhite = ch;
 726		ch = static_cast<unsigned char>(styler[i]);
 727		int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
 728		const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
 729
 730		// Handle DBCS codepages
 731		if (styler.IsLeadByte(static_cast<char>(ch))) {
 732			chPrev = ' ';
 733			i += 1;
 734			continue;
 735		}
 736
 737		if ((!IsASpace(ch) || !foldCompact) && fold)
 738			visibleChars++;
 739		if (!IsASpace(ch))
 740			lineStartVisibleChars++;
 741
 742		// decide what is the current state to print (depending of the script tag)
 743		StateToPrint = statePrintForState(state, inScriptType);
 744
 745		// handle script folding
 746		if (fold) {
 747			switch (scriptLanguage) {
 748			case eScriptJS:
 749			case eScriptPHP:
 750				//not currently supported				case eScriptVBS:
 751
 752				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
 753				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
 754				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
 755					if (ch == '#') {
 756						int j = i + 1;
 757						while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
 758							j++;
 759						}
 760						if (styler.Match(j, "region") || styler.Match(j, "if")) {
 761							levelCurrent++;
 762						} else if (styler.Match(j, "end")) {
 763							levelCurrent--;
 764						}
 765					} else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
 766						levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
 767					}
 768				} else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
 769					levelCurrent--;
 770				}
 771				break;
 772			case eScriptPython:
 773				if (state != SCE_HP_COMMENTLINE && !isMako) {
 774					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
 775						levelCurrent++;
 776					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
 777						// check if the number of tabs is lower than the level
 778						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
 779						for (int j = 0; Findlevel > 0; j++) {
 780							char chTmp = styler.SafeGetCharAt(i + j + 1);
 781							if (chTmp == '\t') {
 782								Findlevel -= 8;
 783							} else if (chTmp == ' ') {
 784								Findlevel--;
 785							} else {
 786								break;
 787							}
 788						}
 789
 790						if (Findlevel > 0) {
 791							levelCurrent -= Findlevel / 8;
 792							if (Findlevel % 8)
 793								levelCurrent--;
 794						}
 795					}
 796				}
 797				break;
 798			default:
 799				break;
 800			}
 801		}
 802
 803		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 804			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 805			// Avoid triggering two times on Dos/Win
 806			// New line -> record any line state onto /next/ line
 807			if (fold) {
 808				int lev = levelPrev;
 809				if (visibleChars == 0)
 810					lev |= SC_FOLDLEVELWHITEFLAG;
 811				if ((levelCurrent > levelPrev) && (visibleChars > 0))
 812					lev |= SC_FOLDLEVELHEADERFLAG;
 813
 814				styler.SetLevel(lineCurrent, lev);
 815				visibleChars = 0;
 816				levelPrev = levelCurrent;
 817			}
 818			styler.SetLineState(lineCurrent,
 819			                    ((inScriptType & 0x03) << 0) |
 820			                    ((tagOpened & 0x01) << 2) |
 821			                    ((tagClosing & 0x01) << 3) |
 822			                    ((aspScript & 0x0F) << 4) |
 823			                    ((clientScript & 0x0F) << 8) |
 824			                    ((beforePreProc & 0xFF) << 12));
 825			lineCurrent++;
 826			lineStartVisibleChars = 0;
 827		}
 828
 829		// handle start of Mako comment line
 830		if (isMako && ch == '#' && chNext == '#') {
 831			makoComment = 1;
 832		}
 833		
 834		// handle end of Mako comment line
 835		else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
 836			makoComment = 0;
 837			styler.ColourTo(i, SCE_HP_COMMENTLINE);
 838			state = SCE_HP_DEFAULT;
 839		}
 840		
 841		// Allow falling through to mako handling code if newline is going to end a block
 842		if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
 843			(!isMako || (0 != strcmp(makoBlockType, "%")))) {
 844		}
 845
 846		// generic end of script processing
 847		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
 848			// Check if it's the end of the script tag (or any other HTML tag)
 849			switch (state) {
 850				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
 851			case SCE_H_DOUBLESTRING:
 852			case SCE_H_SINGLESTRING:
 853			case SCE_HJ_COMMENT:
 854			case SCE_HJ_COMMENTDOC:
 855			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
 856			// the end of script marker from some JS interpreters.
 857			case SCE_HB_COMMENTLINE:
 858			case SCE_HBA_COMMENTLINE:
 859			case SCE_HJ_DOUBLESTRING:
 860			case SCE_HJ_SINGLESTRING:
 861			case SCE_HJ_REGEX:
 862			case SCE_HB_STRING:
 863			case SCE_HBA_STRING:
 864			case SCE_HP_STRING:
 865			case SCE_HP_TRIPLE:
 866			case SCE_HP_TRIPLEDOUBLE:
 867			case SCE_HPHP_HSTRING:
 868			case SCE_HPHP_SIMPLESTRING:
 869			case SCE_HPHP_COMMENT:
 870			case SCE_HPHP_COMMENTLINE:
 871				break;
 872			default :
 873				// check if the closing tag is a script tag
 874				if (const char *tag =
 875						state == SCE_HJ_COMMENTLINE || isXml ? "script" :
 876						state == SCE_H_COMMENT ? "comment" : 0) {
 877					int j = i + 2;
 878					int chr;
 879					do {
 880						chr = static_cast<int>(*tag++);
 881					} while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
 882					if (chr != 0) break;
 883				}
 884				// closing tag of the script (it's a closing HTML tag anyway)
 885				styler.ColourTo(i - 1, StateToPrint);
 886				state = SCE_H_TAGUNKNOWN;
 887				inScriptType = eHtml;
 888				scriptLanguage = eScriptNone;
 889				clientScript = eScriptJS;
 890				i += 2;
 891				visibleChars += 2;
 892				tagClosing = true;
 893				continue;
 894			}
 895		}
 896
 897		/////////////////////////////////////
 898		// handle the start of PHP pre-processor = Non-HTML
 899		else if ((state != SCE_H_ASPAT) &&
 900		         !isPHPStringState(state) &&
 901		         (state != SCE_HPHP_COMMENT) &&
 902		         (state != SCE_HPHP_COMMENTLINE) &&
 903		         (ch == '<') &&
 904		         (chNext == '?') &&
 905				 !IsScriptCommentState(state)) {
 906 			beforeLanguage = scriptLanguage;
 907			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
 908			if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
 909			styler.ColourTo(i - 1, StateToPrint);
 910			beforePreProc = state;
 911			i++;
 912			visibleChars++;
 913			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
 914			if (scriptLanguage == eScriptXML)
 915				styler.ColourTo(i, SCE_H_XMLSTART);
 916			else
 917				styler.ColourTo(i, SCE_H_QUESTION);
 918			state = StateForScript(scriptLanguage);
 919			if (inScriptType == eNonHtmlScript)
 920				inScriptType = eNonHtmlScriptPreProc;
 921			else
 922				inScriptType = eNonHtmlPreProc;
 923			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
 924			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 925				levelCurrent++;
 926			}
 927			// should be better
 928			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 929			continue;
 930		}
 931
 932		// handle the start Mako template Python code
 933		else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
 934															 (lineStartVisibleChars == 1 && ch == '%') ||
 935															 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
 936															 (ch == '$' && chNext == '{') ||
 937															 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
 938			if (ch == '%' || ch == '/')
 939				strcpy(makoBlockType, "%");
 940			else if (ch == '$')
 941				strcpy(makoBlockType, "{");
 942			else if (chNext == '/')
 943				GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
 944			else
 945				GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
 946			styler.ColourTo(i - 1, StateToPrint);
 947			beforePreProc = state;
 948			if (inScriptType == eNonHtmlScript)
 949				inScriptType = eNonHtmlScriptPreProc;
 950			else
 951				inScriptType = eNonHtmlPreProc;
 952
 953			if (chNext == '/') {
 954				i += 2;
 955				visibleChars += 2;
 956			} else if (ch != '%') {
 957				i++;
 958				visibleChars++;
 959			}
 960			state = SCE_HP_START;
 961			scriptLanguage = eScriptPython;
 962			styler.ColourTo(i, SCE_H_ASP);
 963
 964			if (ch != '%' && ch != '$' && ch != '/') {
 965				i += static_cast<int>(strlen(makoBlockType));
 966				visibleChars += static_cast<int>(strlen(makoBlockType));
 967				if (keywords4.InList(makoBlockType))
 968					styler.ColourTo(i, SCE_HP_WORD);
 969				else
 970					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
 971			}
 972
 973			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 974			continue;
 975		}
 976
 977		// handle the start/end of Django comment
 978		else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
 979			styler.ColourTo(i - 1, StateToPrint);
 980			beforePreProc = state;
 981			beforeLanguage = scriptLanguage;
 982			if (inScriptType == eNonHtmlScript)
 983				inScriptType = eNonHtmlScriptPreProc;
 984			else
 985				inScriptType = eNonHtmlPreProc;
 986			i += 1;
 987			visibleChars += 1;
 988			scriptLanguage = eScriptComment;
 989			state = SCE_H_COMMENT;
 990			styler.ColourTo(i, SCE_H_ASP);
 991			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 992			continue;
 993		} else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
 994			styler.ColourTo(i - 1, StateToPrint);
 995			i += 1;
 996			visibleChars += 1;
 997			styler.ColourTo(i, SCE_H_ASP);
 998			state = beforePreProc;
 999			if (inScriptType == eNonHtmlScriptPreProc)
1000				inScriptType = eNonHtmlScript;
1001			else
1002				inScriptType = eHtml;
1003			scriptLanguage = beforeLanguage;
1004			continue;
1005		}
1006
1007		// handle the start Django template code
1008		else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' ||  chNext == '{'))) {
1009			if (chNext == '%')
1010				strcpy(djangoBlockType, "%");
1011			else
1012				strcpy(djangoBlockType, "{");
1013			styler.ColourTo(i - 1, StateToPrint);
1014			beforePreProc = state;
1015			if (inScriptType == eNonHtmlScript)
1016				inScriptType = eNonHtmlScriptPreProc;
1017			else
1018				inScriptType = eNonHtmlPreProc;
1019
1020			i += 1;
1021			visibleChars += 1;
1022			state = SCE_HP_START;
1023			beforeLanguage = scriptLanguage;
1024			scriptLanguage = eScriptPython;
1025			styler.ColourTo(i, SCE_H_ASP);
1026
1027			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1028			continue;
1029		}
1030
1031		// handle the start of ASP pre-processor = Non-HTML
1032		else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1033			styler.ColourTo(i - 1, StateToPrint);
1034			beforePreProc = state;
1035			if (inScriptType == eNonHtmlScript)
1036				inScriptType = eNonHtmlScriptPreProc;
1037			else
1038				inScriptType = eNonHtmlPreProc;
1039
1040			if (chNext2 == '@') {
1041				i += 2; // place as if it was the second next char treated
1042				visibleChars += 2;
1043				state = SCE_H_ASPAT;
1044			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1045				styler.ColourTo(i + 3, SCE_H_ASP);
1046				state = SCE_H_XCCOMMENT;
1047				scriptLanguage = eScriptVBS;
1048				continue;
1049			} else {
1050				if (chNext2 == '=') {
1051					i += 2; // place as if it was the second next char treated
1052					visibleChars += 2;
1053				} else {
1054					i++; // place as if it was the next char treated
1055					visibleChars++;
1056				}
1057
1058				state = StateForScript(aspScript);
1059			}
1060			scriptLanguage = eScriptVBS;
1061			styler.ColourTo(i, SCE_H_ASP);
1062			// fold whole script
1063			if (foldHTMLPreprocessor)
1064				levelCurrent++;
1065			// should be better
1066			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1067			continue;
1068		}
1069
1070		/////////////////////////////////////
1071		// handle the start of SGML language (DTD)
1072		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1073				 (chPrev == '<') &&
1074				 (ch == '!') &&
1075				 (StateToPrint != SCE_H_CDATA) &&
1076				 (!IsCommentState(StateToPrint)) &&
1077				 (!IsScriptCommentState(StateToPrint))) {
1078			beforePreProc = state;
1079			styler.ColourTo(i - 2, StateToPrint);
1080			if ((chNext == '-') && (chNext2 == '-')) {
1081				state = SCE_H_COMMENT; // wait for a pending command
1082				styler.ColourTo(i + 2, SCE_H_COMMENT);
1083				i += 2; // follow styling after the --
1084			} else if (isWordCdata(i + 1, i + 7, styler)) {
1085				state = SCE_H_CDATA;
1086			} else {
1087				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1088				scriptLanguage = eScriptSGML;
1089				state = SCE_H_SGML_COMMAND; // wait for a pending command
1090			}
1091			// fold whole tag (-- when closing the tag)
1092			if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
1093				levelCurrent++;
1094			continue;
1095		}
1096
1097		// handle the end of Mako Python code
1098		else if (isMako &&
1099			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1100				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1101				 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1102			if (state == SCE_H_ASPAT) {
1103				aspScript = segIsScriptingIndicator(styler,
1104				                                    styler.GetStartSegment(), i - 1, aspScript);
1105			}
1106			if (state == SCE_HP_WORD) {
1107				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1108			} else {
1109				styler.ColourTo(i - 1, StateToPrint);
1110			}
1111			if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1112				i++;
1113				visibleChars++;
1114		    }
1115			else if (0 == strcmp(makoBlockType, "%") && ch == '/') {
1116				i++;
1117				visibleChars++;
1118			}
1119			if (0 != strcmp(makoBlockType, "%") || ch == '/') {
1120				styler.ColourTo(i, SCE_H_ASP);
1121			}
1122			state = beforePreProc;
1123			if (inScriptType == eNonHtmlScriptPreProc)
1124				inScriptType = eNonHtmlScript;
1125			else
1126				inScriptType = eHtml;
1127			scriptLanguage = eScriptNone;
1128			continue;
1129		}
1130
1131		// handle the end of Django template code
1132		else if (isDjango &&
1133			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1134				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1135				 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1136			if (state == SCE_H_ASPAT) {
1137				aspScript = segIsScriptingIndicator(styler,
1138				                                    styler.GetStartSegment(), i - 1, aspScript);
1139			}
1140			if (state == SCE_HP_WORD) {
1141				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1142			} else {
1143				styler.ColourTo(i - 1, StateToPrint);
1144			}
1145			i += 1;
1146			visibleChars += 1;
1147			styler.ColourTo(i, SCE_H_ASP);
1148			state = beforePreProc;
1149			if (inScriptType == eNonHtmlScriptPreProc)
1150				inScriptType = eNonHtmlScript;
1151			else
1152				inScriptType = eHtml;
1153			scriptLanguage = beforeLanguage;
1154			continue;
1155		}
1156
1157		// handle the end of a pre-processor = Non-HTML
1158		else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1159				  (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1160				  (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1161		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1162			if (state == SCE_H_ASPAT) {
1163				aspScript = segIsScriptingIndicator(styler,
1164				                                    styler.GetStartSegment(), i - 1, aspScript);
1165			}
1166			// Bounce out of any ASP mode
1167			switch (state) {
1168			case SCE_HJ_WORD:
1169				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1170				break;
1171			case SCE_HB_WORD:
1172				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1173				break;
1174			case SCE_HP_WORD:
1175				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1176				break;
1177			case SCE_HPHP_WORD:
1178				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1179				break;
1180			case SCE_H_XCCOMMENT:
1181				styler.ColourTo(i - 1, state);
1182				break;
1183			default :
1184				styler.ColourTo(i - 1, StateToPrint);
1185				break;
1186			}
1187			if (scriptLanguage != eScriptSGML) {
1188				i++;
1189				visibleChars++;
1190			}
1191			if (ch == '%')
1192				styler.ColourTo(i, SCE_H_ASP);
1193			else if (scriptLanguage == eScriptXML)
1194				styler.ColourTo(i, SCE_H_XMLEND);
1195			else if (scriptLanguage == eScriptSGML)
1196				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1197			else
1198				styler.ColourTo(i, SCE_H_QUESTION);
1199			state = beforePreProc;
1200			if (inScriptType == eNonHtmlScriptPreProc)
1201				inScriptType = eNonHtmlScript;
1202			else
1203				inScriptType = eHtml;
1204			// Unfold all scripting languages, except for XML tag
1205			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1206				levelCurrent--;
1207			}
1208			scriptLanguage = beforeLanguage;
1209			continue;
1210		}
1211		/////////////////////////////////////
1212
1213		switch (state) {
1214		case SCE_H_DEFAULT:
1215			if (ch == '<') {
1216				// in HTML, fold on tag open and unfold on tag close
1217				tagOpened = true;
1218				tagClosing = (chNext == '/');
1219				styler.ColourTo(i - 1, StateToPrint);
1220				if (chNext != '!')
1221					state = SCE_H_TAGUNKNOWN;
1222			} else if (ch == '&') {
1223				styler.ColourTo(i - 1, SCE_H_DEFAULT);
1224				state = SCE_H_ENTITY;
1225			}
1226			break;
1227		case SCE_H_SGML_DEFAULT:
1228		case SCE_H_SGML_BLOCK_DEFAULT:
1229//			if (scriptLanguage == eScriptSGMLblock)
1230//				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1231
1232			if (ch == '\"') {
1233				styler.ColourTo(i - 1, StateToPrint);
1234				state = SCE_H_SGML_DOUBLESTRING;
1235			} else if (ch == '\'') {
1236				styler.ColourTo(i - 1, StateToPrint);
1237				state = SCE_H_SGML_SIMPLESTRING;
1238			} else if ((ch == '-') && (chPrev == '-')) {
1239				if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1240					styler.ColourTo(i - 2, StateToPrint);
1241				}
1242				state = SCE_H_SGML_COMMENT;
1243			} else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
1244				styler.ColourTo(i - 2, StateToPrint);
1245				state = SCE_H_SGML_ENTITY;
1246			} else if (ch == '#') {
1247				styler.ColourTo(i - 1, StateToPrint);
1248				state = SCE_H_SGML_SPECIAL;
1249			} else if (ch == '[') {
1250				styler.ColourTo(i - 1, StateToPrint);
1251				scriptLanguage = eScriptSGMLblock;
1252				state = SCE_H_SGML_BLOCK_DEFAULT;
1253			} else if (ch == ']') {
1254				if (scriptLanguage == eScriptSGMLblock) {
1255					styler.ColourTo(i, StateToPrint);
1256					scriptLanguage = eScriptSGML;
1257				} else {
1258					styler.ColourTo(i - 1, StateToPrint);
1259					styler.ColourTo(i, SCE_H_SGML_ERROR);
1260				}
1261				state = SCE_H_SGML_DEFAULT;
1262			} else if (scriptLanguage == eScriptSGMLblock) {
1263				if ((ch == '!') && (chPrev == '<')) {
1264					styler.ColourTo(i - 2, StateToPrint);
1265					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1266					state = SCE_H_SGML_COMMAND;
1267				} else if (ch == '>') {
1268					styler.ColourTo(i - 1, StateToPrint);
1269					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1270				}
1271			}
1272			break;
1273		case SCE_H_SGML_COMMAND:
1274			if ((ch == '-') && (chPrev == '-')) {
1275				styler.ColourTo(i - 2, StateToPrint);
1276				state = SCE_H_SGML_COMMENT;
1277			} else if (!issgmlwordchar(ch)) {
1278				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1279					styler.ColourTo(i - 1, StateToPrint);
1280					state = SCE_H_SGML_1ST_PARAM;
1281				} else {
1282					state = SCE_H_SGML_ERROR;
1283				}
1284			}
1285			break;
1286		case SCE_H_SGML_1ST_PARAM:
1287			// wait for the beginning of the word
1288			if ((ch == '-') && (chPrev == '-')) {
1289				if (scriptLanguage == eScriptSGMLblock) {
1290					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1291				} else {
1292					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1293				}
1294				state = SCE_H_SGML_1ST_PARAM_COMMENT;
1295			} else if (issgmlwordchar(ch)) {
1296				if (scriptLanguage == eScriptSGMLblock) {
1297					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1298				} else {
1299					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1300				}
1301				// find the length of the word
1302				int size = 1;
1303				while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1304					size++;
1305				styler.ColourTo(i + size - 1, StateToPrint);
1306				i += size - 1;
1307				visibleChars += size - 1;
1308				ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1309				if (scriptLanguage == eScriptSGMLblock) {
1310					state = SCE_H_SGML_BLOCK_DEFAULT;
1311				} else {
1312					state = SCE_H_SGML_DEFAULT;
1313				}
1314				continue;
1315			}
1316			break;
1317		case SCE_H_SGML_ERROR:
1318			if ((ch == '-') && (chPrev == '-')) {
1319				styler.ColourTo(i - 2, StateToPrint);
1320				state = SCE_H_SGML_COMMENT;
1321			}
1322		case SCE_H_SGML_DOUBLESTRING:
1323			if (ch == '\"') {
1324				styler.ColourTo(i, StateToPrint);
1325				state = SCE_H_SGML_DEFAULT;
1326			}
1327			break;
1328		case SCE_H_SGML_SIMPLESTRING:
1329			if (ch == '\'') {
1330				styler.ColourTo(i, StateToPrint);
1331				state = SCE_H_SGML_DEFAULT;
1332			}
1333			break;
1334		case SCE_H_SGML_COMMENT:
1335			if ((ch == '-') && (chPrev == '-')) {
1336				styler.ColourTo(i, StateToPrint);
1337				state = SCE_H_SGML_DEFAULT;
1338			}
1339			break;
1340		case SCE_H_CDATA:
1341			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1342				styler.ColourTo(i, StateToPrint);
1343				state = SCE_H_DEFAULT;
1344				levelCurrent--;
1345			}
1346			break;
1347		case SCE_H_COMMENT:
1348			if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1349				styler.ColourTo(i, StateToPrint);
1350				state = SCE_H_DEFAULT;
1351				levelCurrent--;
1352			}
1353			break;
1354		case SCE_H_SGML_1ST_PARAM_COMMENT:
1355			if ((ch == '-') && (chPrev == '-')) {
1356				styler.ColourTo(i, SCE_H_SGML_COMMENT);
1357				state = SCE_H_SGML_1ST_PARAM;
1358			}
1359			break;
1360		case SCE_H_SGML_SPECIAL:
1361			if (!(isascii(ch) && isupper(ch))) {
1362				styler.ColourTo(i - 1, StateToPrint);
1363				if (isalnum(ch)) {
1364					state = SCE_H_SGML_ERROR;
1365				} else {
1366					state = SCE_H_SGML_DEFAULT;
1367				}
1368			}
1369			break;
1370		case SCE_H_SGML_ENTITY:
1371			if (ch == ';') {
1372				styler.ColourTo(i, StateToPrint);
1373				state = SCE_H_SGML_DEFAULT;
1374			} else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1375				styler.ColourTo(i, SCE_H_SGML_ERROR);
1376				state = SCE_H_SGML_DEFAULT;
1377			}
1378			break;
1379		case SCE_H_ENTITY:
1380			if (ch == ';') {
1381				styler.ColourTo(i, StateToPrint);
1382				state = SCE_H_DEFAULT;
1383			}
1384			if (ch != '#' && !(isascii(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1385				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1386				if (!isascii(ch))	// Possibly start of a multibyte character so don't allow this byte to be in entity style
1387					styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1388				else
1389					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1390				state = SCE_H_DEFAULT;
1391			}
1392			break;
1393		case SCE_H_TAGUNKNOWN:
1394			if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1395				int eClass = classifyTagHTML(styler.GetStartSegment(),
1396					i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1397				if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1398					if (!tagClosing) {
1399						inScriptType = eNonHtmlScript;
1400						scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1401					} else {
1402						scriptLanguage = eScriptNone;
1403					}
1404					eClass = SCE_H_TAG;
1405				}
1406				if (ch == '>') {
1407					styler.ColourTo(i, eClass);
1408					if (inScriptType == eNonHtmlScript) {
1409						state = StateForScript(scriptLanguage);
1410					} else {
1411						state = SCE_H_DEFAULT;
1412					}
1413					tagOpened = false;
1414					if (!tagDontFold) {
1415						if (tagClosing) {
1416							levelCurrent--;
1417						} else {
1418							levelCurrent++;
1419						}
1420					}
1421					tagClosing = false;
1422				} else if (ch == '/' && chNext == '>') {
1423					if (eClass == SCE_H_TAGUNKNOWN) {
1424						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1425					} else {
1426						styler.ColourTo(i - 1, StateToPrint);
1427						styler.ColourTo(i + 1, SCE_H_TAGEND);
1428					}
1429					i++;
1430					ch = chNext;
1431					state = SCE_H_DEFAULT;
1432					tagOpened = false;
1433				} else {
1434					if (eClass != SCE_H_TAGUNKNOWN) {
1435						if (eClass == SCE_H_SGML_DEFAULT) {
1436							state = SCE_H_SGML_DEFAULT;
1437						} else {
1438							state = SCE_H_OTHER;
1439						}
1440					}
1441				}
1442			}
1443			break;
1444		case SCE_H_ATTRIBUTE:
1445			if (!setAttributeContinue.Contains(ch)) {
1446				if (inScriptType == eNonHtmlScript) {
1447					int scriptLanguagePrev = scriptLanguage;
1448					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1449					scriptLanguage = clientScript;
1450					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1451						inScriptType = eHtml;
1452				}
1453				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1454				if (ch == '>') {
1455					styler.ColourTo(i, SCE_H_TAG);
1456					if (inScriptType == eNonHtmlScript) {
1457						state = StateForScript(scriptLanguage);
1458					} else {
1459						state = SCE_H_DEFAULT;
1460					}
1461					tagOpened = false;
1462					if (!tagDontFold) {
1463						if (tagClosing) {
1464							levelCurrent--;
1465						} else {
1466							levelCurrent++;
1467						}
1468					}
1469					tagClosing = false;
1470				} else if (ch == '=') {
1471					styler.ColourTo(i, SCE_H_OTHER);
1472					state = SCE_H_VALUE;
1473				} else {
1474					state = SCE_H_OTHER;
1475				}
1476			}
1477			break;
1478		case SCE_H_OTHER:
1479			if (ch == '>') {
1480				styler.ColourTo(i - 1, StateToPrint);
1481				styler.ColourTo(i, SCE_H_TAG);
1482				if (inScriptType == eNonHtmlScript) {
1483					state = StateForScript(scriptLanguage);
1484				} else {
1485					state = SCE_H_DEFAULT;
1486				}
1487				tagOpened = false;
1488				if (!tagDontFold) {
1489					if (tagClosing) {
1490						levelCurrent--;
1491					} else {
1492						levelCurrent++;
1493					}
1494				}
1495				tagClosing = false;
1496			} else if (ch == '\"') {
1497				styler.ColourTo(i - 1, StateToPrint);
1498				state = SCE_H_DOUBLESTRING;
1499			} else if (ch == '\'') {
1500				styler.ColourTo(i - 1, StateToPrint);
1501				state = SCE_H_SINGLESTRING;
1502			} else if (ch == '=') {
1503				styler.ColourTo(i, StateToPrint);
1504				state = SCE_H_VALUE;
1505			} else if (ch == '/' && chNext == '>') {
1506				styler.ColourTo(i - 1, StateToPrint);
1507				styler.ColourTo(i + 1, SCE_H_TAGEND);
1508				i++;
1509				ch = chNext;
1510				state = SCE_H_DEFAULT;
1511				tagOpened = false;
1512			} else if (ch == '?' && chNext == '>') {
1513				styler.ColourTo(i - 1, StateToPrint);
1514				styler.ColourTo(i + 1, SCE_H_XMLEND);
1515				i++;
1516				ch = chNext;
1517				state = SCE_H_DEFAULT;
1518			} else if (setHTMLWord.Contains(ch)) {
1519				styler.ColourTo(i - 1, StateToPrint);
1520				state = SCE_H_ATTRIBUTE;
1521			}
1522			break;
1523		case SCE_H_DOUBLESTRING:
1524			if (ch == '\"') {
1525				if (inScriptType == eNonHtmlScript) {
1526					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1527				}
1528				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1529				state = SCE_H_OTHER;
1530			}
1531			break;
1532		case SCE_H_SINGLESTRING:
1533			if (ch == '\'') {
1534				if (inScriptType == eNonHtmlScript) {
1535					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1536				}
1537				styler.ColourTo(i, SCE_H_SINGLESTRING);
1538				state = SCE_H_OTHER;
1539			}
1540			break;
1541		case SCE_H_VALUE:
1542			if (!setHTMLWord.Contains(ch)) {
1543				if (ch == '\"' && chPrev == '=') {
1544					// Should really test for being first character
1545					state = SCE_H_DOUBLESTRING;
1546				} else if (ch == '\'' && chPrev == '=') {
1547					state = SCE_H_SINGLESTRING;
1548				} else {
1549					if (IsNumber(styler.GetStartSegment(), styler)) {
1550						styler.ColourTo(i - 1, SCE_H_NUMBER);
1551					} else {
1552						styler.ColourTo(i - 1, StateToPrint);
1553					}
1554					if (ch == '>') {
1555						styler.ColourTo(i, SCE_H_TAG);
1556						if (inScriptType == eNonHtmlScript) {
1557							state = StateForScript(scriptLanguage);
1558						} else {
1559							state = SCE_H_DEFAULT;
1560						}
1561						tagOpened = false;
1562						if (!tagDontFold) {
1563							if (tagClosing) {
1564								levelCurrent--;
1565							} else {
1566								levelCurrent++;
1567							}
1568						}
1569						tagClosing = false;
1570					} else {
1571						state = SCE_H_OTHER;
1572					}
1573				}
1574			}
1575			break;
1576		case SCE_HJ_DEFAULT:
1577		case SCE_HJ_START:
1578		case SCE_HJ_SYMBOLS:
1579			if (IsAWordStart(ch)) {
1580				styler.ColourTo(i - 1, StateToPrint);
1581				state = SCE_HJ_WORD;
1582			} else if (ch == '/' && chNext == '*') {
1583				styler.ColourT

Large files files are truncated, but you can click here to view the full file