PageRenderTime 464ms CodeModel.GetById 121ms app.highlight 224ms RepoModel.GetById 87ms app.codeStats 2ms

/src/gtkscintilla2/scintilla/lexers/LexHTML.cxx

https://github.com/anoopjohn/gphpedit
C++ | 2125 lines | 1899 code | 117 blank | 109 comment | 1397 complexity | 35037bae42cc225b9778d8f2e5cc68c5 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Scintilla source code edit control
   2/** @file LexHTML.cxx
   3 ** Lexer for HTML.
   4 **/
   5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   6// The License.txt file describes the conditions under which this software may be distributed.
   7
   8#include <stdlib.h>
   9#include <string.h>
  10#include <stdio.h>
  11#include <stdarg.h>
  12#include <assert.h>
  13#include <ctype.h>
  14
  15#include "ILexer.h"
  16#include "Scintilla.h"
  17#include "SciLexer.h"
  18
  19#include "PropSetSimple.h"
  20#include "WordList.h"
  21#include "LexAccessor.h"
  22#include "Accessor.h"
  23#include "StyleContext.h"
  24#include "CharacterSet.h"
  25#include "LexerModule.h"
  26
  27#ifdef SCI_NAMESPACE
  28using namespace Scintilla;
  29#endif
  30
  31#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
  32#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
  33#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
  34
  35enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  36enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  37
  38static inline bool IsAWordChar(const int ch) {
  39	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  40}
  41
  42static inline bool IsAWordStart(const int ch) {
  43	return (ch < 0x80) && (isalnum(ch) || ch == '_');
  44}
  45
  46inline bool IsOperator(int ch) {
  47	if (isascii(ch) && isalnum(ch))
  48		return false;
  49	// '.' left out as it is used to make up numbers
  50	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  51	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  52	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  53	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  54	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  55	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
  56		return true;
  57	return false;
  58}
  59
  60static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  61	size_t i = 0;
  62	for (; (i < end - start + 1) && (i < len-1); i++) {
  63		s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  64	}
  65	s[i] = '\0';
  66}
  67
  68static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  69
  70	size_t i = 0;
  71	for (; i < sLen-1; i++) {
  72		char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  73		if ((i == 0) && !IsAWordStart(ch))
  74			break;
  75		if ((i > 0) && !IsAWordChar(ch))
  76			break;
  77		s[i] = ch;
  78	}
  79	s[i] = '\0';
  80
  81	return s;
  82}
  83
  84static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  85	char s[100];
  86	GetTextSegment(styler, start, end, s, sizeof(s));
  87	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  88	if (strstr(s, "src"))	// External script
  89		return eScriptNone;
  90	if (strstr(s, "vbs"))
  91		return eScriptVBS;
  92	if (strstr(s, "pyth"))
  93		return eScriptPython;
  94	if (strstr(s, "javas"))
  95		return eScriptJS;
  96	if (strstr(s, "jscr"))
  97		return eScriptJS;
  98	if (strstr(s, "php"))
  99		return eScriptPHP;
 100	if (strstr(s, "xml")) {
 101		const char *xml = strstr(s, "xml");
 102		for (const char *t=s; t<xml; t++) {
 103			if (!IsASpace(*t)) {
 104				return prevValue;
 105			}
 106		}
 107		return eScriptXML;
 108	}
 109
 110	return prevValue;
 111}
 112
 113static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
 114	int iResult = 0;
 115	char s[100];
 116	GetTextSegment(styler, start, end, s, sizeof(s));
 117	if (0 == strncmp(s, "php", 3)) {
 118		iResult = 3;
 119	}
 120
 121	return iResult;
 122}
 123
 124static script_type ScriptOfState(int state) {
 125	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 126		return eScriptPython;
 127	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 128		return eScriptVBS;
 129	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 130		return eScriptJS;
 131	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
 132		return eScriptPHP;
 133	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
 134		return eScriptSGML;
 135	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
 136		return eScriptSGMLblock;
 137	} else {
 138		return eScriptNone;
 139	}
 140}
 141
 142static int statePrintForState(int state, script_mode inScriptType) {
 143	int StateToPrint = state;
 144
 145	if (state >= SCE_HJ_START) {
 146		if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 147			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 148		} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 149			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 150		} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 151			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 152		}
 153	}
 154
 155	return StateToPrint;
 156}
 157
 158static int stateForPrintState(int StateToPrint) {
 159	int state;
 160
 161	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
 162		state = StateToPrint - SCE_HA_PYTHON;
 163	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
 164		state = StateToPrint - SCE_HA_VBS;
 165	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
 166		state = StateToPrint - SCE_HA_JS;
 167	} else {
 168		state = StateToPrint;
 169	}
 170
 171	return state;
 172}
 173
 174static inline bool IsNumber(unsigned int start, Accessor &styler) {
 175	return IsADigit(styler[start]) || (styler[start] == '.') ||
 176	       (styler[start] == '-') || (styler[start] == '#');
 177}
 178
 179static inline bool isStringState(int state) {
 180	bool bResult;
 181
 182	switch (state) {
 183	case SCE_HJ_DOUBLESTRING:
 184	case SCE_HJ_SINGLESTRING:
 185	case SCE_HJA_DOUBLESTRING:
 186	case SCE_HJA_SINGLESTRING:
 187	case SCE_HB_STRING:
 188	case SCE_HBA_STRING:
 189	case SCE_HP_STRING:
 190	case SCE_HP_CHARACTER:
 191	case SCE_HP_TRIPLE:
 192	case SCE_HP_TRIPLEDOUBLE:
 193	case SCE_HPA_STRING:
 194	case SCE_HPA_CHARACTER:
 195	case SCE_HPA_TRIPLE:
 196	case SCE_HPA_TRIPLEDOUBLE:
 197	case SCE_HPHP_HSTRING:
 198	case SCE_HPHP_SIMPLESTRING:
 199	case SCE_HPHP_HSTRING_VARIABLE:
 200	case SCE_HPHP_COMPLEX_VARIABLE:
 201		bResult = true;
 202		break;
 203	default :
 204		bResult = false;
 205		break;
 206	}
 207	return bResult;
 208}
 209
 210static inline bool stateAllowsTermination(int state) {
 211	bool allowTermination = !isStringState(state);
 212	if (allowTermination) {
 213		switch (state) {
 214		case SCE_HB_COMMENTLINE:
 215		case SCE_HPHP_COMMENT:
 216		case SCE_HP_COMMENTLINE:
 217		case SCE_HPA_COMMENTLINE:
 218			allowTermination = false;
 219		}
 220	}
 221	return allowTermination;
 222}
 223
 224// not really well done, since it's only comments that should lex the %> and <%
 225static inline bool isCommentASPState(int state) {
 226	bool bResult;
 227
 228	switch (state) {
 229	case SCE_HJ_COMMENT:
 230	case SCE_HJ_COMMENTLINE:
 231	case SCE_HJ_COMMENTDOC:
 232	case SCE_HB_COMMENTLINE:
 233	case SCE_HP_COMMENTLINE:
 234	case SCE_HPHP_COMMENT:
 235	case SCE_HPHP_COMMENTLINE:
 236		bResult = true;
 237		break;
 238	default :
 239		bResult = false;
 240		break;
 241	}
 242	return bResult;
 243}
 244
 245static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 246	bool wordIsNumber = IsNumber(start, styler);
 247	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
 248	if (wordIsNumber) {
 249		chAttr = SCE_H_NUMBER;
 250	} else {
 251		char s[100];
 252		GetTextSegment(styler, start, end, s, sizeof(s));
 253		if (keywords.InList(s))
 254			chAttr = SCE_H_ATTRIBUTE;
 255	}
 256	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
 257		// No keywords -> all are known
 258		chAttr = SCE_H_ATTRIBUTE;
 259	styler.ColourTo(end, chAttr);
 260}
 261
 262static int classifyTagHTML(unsigned int start, unsigned int end,
 263                           WordList &keywords, Accessor &styler, bool &tagDontFold,
 264			   bool caseSensitive, bool isXml, bool allowScripts) {
 265	char s[30 + 2];
 266	// Copy after the '<'
 267	unsigned int i = 0;
 268	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
 269		char ch = styler[cPos];
 270		if ((ch != '<') && (ch != '/')) {
 271			s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
 272		}
 273	}
 274
 275	//The following is only a quick hack, to see if this whole thing would work
 276	//we first need the tagname with a trailing space...
 277	s[i] = ' ';
 278	s[i+1] = '\0';
 279
 280	// if the current language is XML, I can fold any tag
 281	// if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
 282	//...to find it in the list of no-container-tags
 283	tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
 284
 285	//now we can remove the trailing space
 286	s[i] = '\0';
 287
 288	// No keywords -> all are known
 289	char chAttr = SCE_H_TAGUNKNOWN;
 290	if (s[0] == '!') {
 291		chAttr = SCE_H_SGML_DEFAULT;
 292	} else if (!keywords || keywords.InList(s)) {
 293		chAttr = SCE_H_TAG;
 294	}
 295	styler.ColourTo(end, chAttr);
 296	if (chAttr == SCE_H_TAG) {
 297		if (allowScripts && 0 == strcmp(s, "script")) {
 298			// check to see if this is a self-closing tag by sniffing ahead
 299			bool isSelfClose = false;
 300			for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
 301				char ch = styler.SafeGetCharAt(cPos, '\0');
 302				if (ch == '\0' || ch == '>')
 303					break;
 304				else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
 305					isSelfClose = true;
 306					break;
 307				}
 308			}
 309
 310			// do not enter a script state if the tag self-closed
 311			if (!isSelfClose)
 312				chAttr = SCE_H_SCRIPT;
 313		} else if (!isXml && 0 == strcmp(s, "comment")) {
 314			chAttr = SCE_H_COMMENT;
 315		}
 316	}
 317	return chAttr;
 318}
 319
 320static void classifyWordHTJS(unsigned int start, unsigned int end,
 321                             WordList &keywords, Accessor &styler, script_mode inScriptType) {
 322	char chAttr = SCE_HJ_WORD;
 323	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 324	if (wordIsNumber)
 325		chAttr = SCE_HJ_NUMBER;
 326	else {
 327		char s[30 + 1];
 328		unsigned int i = 0;
 329		for (; i < end - start + 1 && i < 30; i++) {
 330			s[i] = styler[start + i];
 331		}
 332		s[i] = '\0';
 333		if (keywords.InList(s))
 334			chAttr = SCE_HJ_KEYWORD;
 335	}
 336	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 337}
 338
 339static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
 340	char chAttr = SCE_HB_IDENTIFIER;
 341	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 342	if (wordIsNumber)
 343		chAttr = SCE_HB_NUMBER;
 344	else {
 345		char s[100];
 346		GetTextSegment(styler, start, end, s, sizeof(s));
 347		if (keywords.InList(s)) {
 348			chAttr = SCE_HB_WORD;
 349			if (strcmp(s, "rem") == 0)
 350				chAttr = SCE_HB_COMMENTLINE;
 351		}
 352	}
 353	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 354	if (chAttr == SCE_HB_COMMENTLINE)
 355		return SCE_HB_COMMENTLINE;
 356	else
 357		return SCE_HB_DEFAULT;
 358}
 359
 360static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
 361	bool wordIsNumber = IsADigit(styler[start]);
 362	char s[30 + 1];
 363	unsigned int i = 0;
 364	for (; i < end - start + 1 && i < 30; i++) {
 365		s[i] = styler[start + i];
 366	}
 367	s[i] = '\0';
 368	char chAttr = SCE_HP_IDENTIFIER;
 369	if (0 == strcmp(prevWord, "class"))
 370		chAttr = SCE_HP_CLASSNAME;
 371	else if (0 == strcmp(prevWord, "def"))
 372		chAttr = SCE_HP_DEFNAME;
 373	else if (wordIsNumber)
 374		chAttr = SCE_HP_NUMBER;
 375	else if (keywords.InList(s))
 376		chAttr = SCE_HP_WORD;
 377	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 378	strcpy(prevWord, s);
 379}
 380
 381// Update the word colour to default or keyword
 382// Called when in a PHP word
 383static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 384	char chAttr = SCE_HPHP_DEFAULT;
 385	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
 386	if (wordIsNumber)
 387		chAttr = SCE_HPHP_NUMBER;
 388	else {
 389		char s[100];
 390		GetTextSegment(styler, start, end, s, sizeof(s));
 391		if (keywords.InList(s))
 392			chAttr = SCE_HPHP_WORD;
 393	}
 394	styler.ColourTo(end, chAttr);
 395}
 396
 397static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 398	char s[30 + 1];
 399	unsigned int i = 0;
 400	for (; i < end - start + 1 && i < 30; i++) {
 401		s[i] = styler[start + i];
 402	}
 403	s[i] = '\0';
 404	return keywords.InList(s);
 405}
 406
 407static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
 408	char s[30 + 1];
 409	unsigned int i = 0;
 410	for (; i < end - start + 1 && i < 30; i++) {
 411		s[i] = styler[start + i];
 412	}
 413	s[i] = '\0';
 414	return (0 == strcmp(s, "[CDATA["));
 415}
 416
 417// Return the first state to reach when entering a scripting language
 418static int StateForScript(script_type scriptLanguage) {
 419	int Result;
 420	switch (scriptLanguage) {
 421	case eScriptVBS:
 422		Result = SCE_HB_START;
 423		break;
 424	case eScriptPython:
 425		Result = SCE_HP_START;
 426		break;
 427	case eScriptPHP:
 428		Result = SCE_HPHP_DEFAULT;
 429		break;
 430	case eScriptXML:
 431		Result = SCE_H_TAGUNKNOWN;
 432		break;
 433	case eScriptSGML:
 434		Result = SCE_H_SGML_DEFAULT;
 435		break;
 436	case eScriptComment:
 437		Result = SCE_H_COMMENT;
 438		break;
 439	default :
 440		Result = SCE_HJ_START;
 441		break;
 442	}
 443	return Result;
 444}
 445
 446static inline bool ishtmlwordchar(int ch) {
 447	return !isascii(ch) ||
 448		(isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
 449}
 450
 451static inline bool issgmlwordchar(int ch) {
 452	return !isascii(ch) ||
 453		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
 454}
 455
 456static inline bool IsPhpWordStart(int ch) {
 457	return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
 458}
 459
 460static inline bool IsPhpWordChar(int ch) {
 461	return IsADigit(ch) || IsPhpWordStart(ch);
 462}
 463
 464static bool InTagState(int state) {
 465	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
 466	       state == SCE_H_SCRIPT ||
 467	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
 468	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
 469	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
 470}
 471
 472static bool IsCommentState(const int state) {
 473	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
 474}
 475
 476static bool IsScriptCommentState(const int state) {
 477	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
 478		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
 479}
 480
 481static bool isLineEnd(int ch) {
 482	return ch == '\r' || ch == '\n';
 483}
 484
 485static bool isOKBeforeRE(int ch) {
 486	return (ch == '(') || (ch == '=') || (ch == ',');
 487}
 488
 489static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
 490	if (strlen(blockType) == 0) {
 491		return ((ch == '%') && (chNext == '>'));
 492	} else if ((0 == strcmp(blockType, "inherit")) ||
 493			   (0 == strcmp(blockType, "namespace")) ||
 494			   (0 == strcmp(blockType, "include")) ||
 495			   (0 == strcmp(blockType, "page"))) {
 496		return ((ch == '/') && (chNext == '>'));
 497	} else if (0 == strcmp(blockType, "%")) {
 498		return isLineEnd(ch);
 499	} else if (0 == strcmp(blockType, "{")) {
 500		return ch == '}';
 501	} else {
 502		return (ch == '>');
 503	}
 504}
 505
 506static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
 507	if (strlen(blockType) == 0) {
 508		return 0;
 509	} else if (0 == strcmp(blockType, "%")) {
 510		return ((ch == '%') && (chNext == '}'));
 511	} else if (0 == strcmp(blockType, "{")) {
 512		return ((ch == '}') && (chNext == '}'));
 513	} else {
 514		return 0;
 515	}
 516}
 517
 518static bool isPHPStringState(int state) {
 519	return
 520	    (state == SCE_HPHP_HSTRING) ||
 521	    (state == SCE_HPHP_SIMPLESTRING) ||
 522	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
 523	    (state == SCE_HPHP_COMPLEX_VARIABLE);
 524}
 525
 526static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
 527	int j;
 528	const int beginning = i - 1;
 529	bool isValidSimpleString = false;
 530
 531	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
 532		i++;
 533
 534	char ch = styler.SafeGetCharAt(i);
 535	const char chNext = styler.SafeGetCharAt(i + 1);
 536	if (!IsPhpWordStart(ch)) {
 537		if (ch == '\'' && IsPhpWordStart(chNext)) {
 538			i++;
 539			ch = chNext;
 540			isSimpleString = true;
 541		} else {
 542			phpStringDelimiter[0] = '\0';
 543			return beginning;
 544		}
 545	}
 546	phpStringDelimiter[0] = ch;
 547	i++;
 548
 549	for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
 550		if (!IsPhpWordChar(styler[j])) {
 551			if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
 552				isValidSimpleString = true;
 553				j++;
 554				break;
 555			} else {
 556				phpStringDelimiter[0] = '\0';
 557				return beginning;
 558			}
 559		}
 560		if (j - i < phpStringDelimiterSize - 2)
 561			phpStringDelimiter[j-i+1] = styler[j];
 562		else
 563			i++;
 564	}
 565	if (isSimpleString && !isValidSimpleString) {
 566		phpStringDelimiter[0] = '\0';
 567		return beginning;
 568	}
 569	phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
 570	return j - 1;
 571}
 572
 573static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 574                                  Accessor &styler, bool isXml) {
 575	WordList &keywords = *keywordlists[0];
 576	WordList &keywords2 = *keywordlists[1];
 577	WordList &keywords3 = *keywordlists[2];
 578	WordList &keywords4 = *keywordlists[3];
 579	WordList &keywords5 = *keywordlists[4];
 580	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
 581
 582	// Lexer for HTML requires more lexical states (8 bits worth) than most lexers
 583	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 584	char prevWord[200];
 585	prevWord[0] = '\0';
 586	char phpStringDelimiter[200]; // PHP is not limited in length, we are
 587	phpStringDelimiter[0] = '\0';
 588	int StateToPrint = initStyle;
 589	int state = stateForPrintState(StateToPrint);
 590	char makoBlockType[200];
 591	makoBlockType[0] = '\0';
 592	char djangoBlockType[2];
 593	djangoBlockType[0] = '\0';
 594
 595	// If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
 596	if (InTagState(state)) {
 597		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
 598			startPos--;
 599			length++;
 600		}
 601		state = SCE_H_DEFAULT;
 602	}
 603	// String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
 604	if (isPHPStringState(state)) {
 605		while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
 606			startPos--;
 607			length++;
 608			state = styler.StyleAt(startPos);
 609		}
 610		if (startPos == 0)
 611			state = SCE_H_DEFAULT;
 612	}
 613	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 614
 615	int lineCurrent = styler.GetLine(startPos);
 616	int lineState;
 617	if (lineCurrent > 0) {
 618		lineState = styler.GetLineState(lineCurrent-1);
 619	} else {
 620		// Default client and ASP scripting language is JavaScript
 621		lineState = eScriptJS << 8;
 622
 623		// property asp.default.language
 624		//	Script in ASP code is initially assumed to be in JavaScript.
 625		//	To change this to VBScript set asp.default.language to 2. Python is 3.
 626		lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
 627	}
 628	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
 629	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
 630	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
 631	bool tagDontFold = false; //some HTML tags should not be folded
 632	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
 633	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
 634	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
 635
 636	script_type scriptLanguage = ScriptOfState(state);
 637	// If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
 638	if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
 639		scriptLanguage = eScriptComment;
 640	}
 641	script_type beforeLanguage = ScriptOfState(beforePreProc);
 642
 643	// property fold.html
 644	//	Folding is turned on or off for HTML and XML files with this option.
 645	//	The fold option must also be on for folding to occur.
 646	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
 647
 648	const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
 649
 650	// property fold.html.preprocessor
 651	//	Folding is turned on or off for scripts embedded in HTML files with this option.
 652	//	The default is on.
 653	const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
 654
 655	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 656
 657	// property fold.hypertext.comment
 658	//	Allow folding for comments in scripts embedded in HTML.
 659	//	The default is off.
 660	const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
 661
 662	// property fold.hypertext.heredoc
 663	//	Allow folding for heredocs in scripts embedded in HTML.
 664	//	The default is off.
 665	const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
 666
 667	// property html.tags.case.sensitive
 668	//	For XML and HTML, setting this property to 1 will make tags match in a case
 669	//	sensitive way which is the expected behaviour for XML and XHTML.
 670	const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
 671
 672	// property lexer.xml.allow.scripts
 673	//	Set to 0 to disable scripts in XML.
 674	const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
 675
 676	// property lexer.html.mako
 677	//	Set to 1 to enable the mako template language.
 678	const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
 679
 680	// property lexer.html.django
 681	//	Set to 1 to enable the django template language.
 682	const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
 683
 684	const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
 685	const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
 686	const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
 687
 688	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
 689	int levelCurrent = levelPrev;
 690	int visibleChars = 0;
 691	int lineStartVisibleChars = 0;
 692
 693	int chPrev = ' ';
 694	int ch = ' ';
 695	int chPrevNonWhite = ' ';
 696	// look back to set chPrevNonWhite properly for better regex colouring
 697	if (scriptLanguage == eScriptJS && startPos > 0) {
 698		int back = startPos;
 699		int style = 0;
 700		while (--back) {
 701			style = styler.StyleAt(back);
 702			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
 703				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
 704				break;
 705		}
 706		if (style == SCE_HJ_SYMBOLS) {
 707			chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
 708		}
 709	}
 710
 711	styler.StartSegment(startPos);
 712	const int lengthDoc = startPos + length;
 713	for (int i = startPos; i < lengthDoc; i++) {
 714		const int chPrev2 = chPrev;
 715		chPrev = ch;
 716		if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
 717			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
 718			chPrevNonWhite = ch;
 719		ch = static_cast<unsigned char>(styler[i]);
 720		int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
 721		const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
 722
 723		// Handle DBCS codepages
 724		if (styler.IsLeadByte(static_cast<char>(ch))) {
 725			chPrev = ' ';
 726			i += 1;
 727			continue;
 728		}
 729
 730		if ((!IsASpace(ch) || !foldCompact) && fold)
 731			visibleChars++;
 732		if (!IsASpace(ch))
 733			lineStartVisibleChars++;
 734
 735		// decide what is the current state to print (depending of the script tag)
 736		StateToPrint = statePrintForState(state, inScriptType);
 737
 738		// handle script folding
 739		if (fold) {
 740			switch (scriptLanguage) {
 741			case eScriptJS:
 742			case eScriptPHP:
 743				//not currently supported				case eScriptVBS:
 744
 745				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
 746				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
 747				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
 748					if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
 749						levelCurrent += ((ch == '{') || (ch == '/')) ? 1 : -1;
 750					}
 751				} else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
 752					levelCurrent--;
 753				}
 754				break;
 755			case eScriptPython:
 756				if (state != SCE_HP_COMMENTLINE) {
 757					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
 758						levelCurrent++;
 759					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
 760						// check if the number of tabs is lower than the level
 761						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
 762						for (int j = 0; Findlevel > 0; j++) {
 763							char chTmp = styler.SafeGetCharAt(i + j + 1);
 764							if (chTmp == '\t') {
 765								Findlevel -= 8;
 766							} else if (chTmp == ' ') {
 767								Findlevel--;
 768							} else {
 769								break;
 770							}
 771						}
 772
 773						if (Findlevel > 0) {
 774							levelCurrent -= Findlevel / 8;
 775							if (Findlevel % 8)
 776								levelCurrent--;
 777						}
 778					}
 779				}
 780				break;
 781			default:
 782				break;
 783			}
 784		}
 785
 786		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 787			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 788			// Avoid triggering two times on Dos/Win
 789			// New line -> record any line state onto /next/ line
 790			if (fold) {
 791				int lev = levelPrev;
 792				if (visibleChars == 0)
 793					lev |= SC_FOLDLEVELWHITEFLAG;
 794				if ((levelCurrent > levelPrev) && (visibleChars > 0))
 795					lev |= SC_FOLDLEVELHEADERFLAG;
 796
 797				styler.SetLevel(lineCurrent, lev);
 798				visibleChars = 0;
 799				levelPrev = levelCurrent;
 800			}
 801			styler.SetLineState(lineCurrent,
 802			                    ((inScriptType & 0x03) << 0) |
 803			                    ((tagOpened & 0x01) << 2) |
 804			                    ((tagClosing & 0x01) << 3) |
 805			                    ((aspScript & 0x0F) << 4) |
 806			                    ((clientScript & 0x0F) << 8) |
 807			                    ((beforePreProc & 0xFF) << 12));
 808			lineCurrent++;
 809			lineStartVisibleChars = 0;
 810		}
 811
 812		// Allow falling through to mako handling code if newline is going to end a block
 813		if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
 814			(!isMako || (0 != strcmp(makoBlockType, "%")))) {
 815		}
 816
 817		// generic end of script processing
 818		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
 819			// Check if it's the end of the script tag (or any other HTML tag)
 820			switch (state) {
 821				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
 822			case SCE_H_DOUBLESTRING:
 823			case SCE_H_SINGLESTRING:
 824			case SCE_HJ_COMMENT:
 825			case SCE_HJ_COMMENTDOC:
 826			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
 827			// the end of script marker from some JS interpreters.
 828			case SCE_HB_COMMENTLINE:
 829			case SCE_HBA_COMMENTLINE:
 830			case SCE_HJ_DOUBLESTRING:
 831			case SCE_HJ_SINGLESTRING:
 832			case SCE_HJ_REGEX:
 833			case SCE_HB_STRING:
 834			case SCE_HBA_STRING:
 835			case SCE_HP_STRING:
 836			case SCE_HP_TRIPLE:
 837			case SCE_HP_TRIPLEDOUBLE:
 838			case SCE_HPHP_HSTRING:
 839			case SCE_HPHP_SIMPLESTRING:
 840			case SCE_HPHP_COMMENT:
 841			case SCE_HPHP_COMMENTLINE:
 842				break;
 843			default :
 844				// check if the closing tag is a script tag
 845				if (const char *tag =
 846						state == SCE_HJ_COMMENTLINE || isXml ? "script" :
 847						state == SCE_H_COMMENT ? "comment" : 0) {
 848					int j = i + 2;
 849					int chr;
 850					do {
 851						chr = static_cast<int>(*tag++);
 852					} while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
 853					if (chr != 0) break;
 854				}
 855				// closing tag of the script (it's a closing HTML tag anyway)
 856				styler.ColourTo(i - 1, StateToPrint);
 857				state = SCE_H_TAGUNKNOWN;
 858				inScriptType = eHtml;
 859				scriptLanguage = eScriptNone;
 860				clientScript = eScriptJS;
 861				i += 2;
 862				visibleChars += 2;
 863				tagClosing = true;
 864				continue;
 865			}
 866		}
 867
 868		/////////////////////////////////////
 869		// handle the start of PHP pre-processor = Non-HTML
 870		else if ((state != SCE_H_ASPAT) &&
 871		         !isPHPStringState(state) &&
 872		         (state != SCE_HPHP_COMMENT) &&
 873		         (state != SCE_HPHP_COMMENTLINE) &&
 874		         (ch == '<') &&
 875		         (chNext == '?') &&
 876				 !IsScriptCommentState(state) ) {
 877			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, eScriptPHP);
 878			if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
 879			styler.ColourTo(i - 1, StateToPrint);
 880			beforePreProc = state;
 881			i++;
 882			visibleChars++;
 883			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
 884			if (scriptLanguage == eScriptXML)
 885				styler.ColourTo(i, SCE_H_XMLSTART);
 886			else
 887				styler.ColourTo(i, SCE_H_QUESTION);
 888			state = StateForScript(scriptLanguage);
 889			if (inScriptType == eNonHtmlScript)
 890				inScriptType = eNonHtmlScriptPreProc;
 891			else
 892				inScriptType = eNonHtmlPreProc;
 893			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
 894			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 895				levelCurrent++;
 896			}
 897			// should be better
 898			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 899			continue;
 900		}
 901
 902		// handle the start Mako template Python code
 903		else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
 904															 (lineStartVisibleChars == 1 && ch == '%') ||
 905															 (ch == '$' && chNext == '{') ||
 906															 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
 907			if (ch == '%')
 908				strcpy(makoBlockType, "%");
 909			else if (ch == '$')
 910				strcpy(makoBlockType, "{");
 911			else if (chNext == '/')
 912				GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
 913			else
 914				GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
 915			styler.ColourTo(i - 1, StateToPrint);
 916			beforePreProc = state;
 917			if (inScriptType == eNonHtmlScript)
 918				inScriptType = eNonHtmlScriptPreProc;
 919			else
 920				inScriptType = eNonHtmlPreProc;
 921
 922			if (chNext == '/') {
 923				i += 2;
 924				visibleChars += 2;
 925			} else if (ch != '%') {
 926				i++;
 927				visibleChars++;
 928			}
 929			state = SCE_HP_START;
 930			scriptLanguage = eScriptPython;
 931			styler.ColourTo(i, SCE_H_ASP);
 932			if (foldHTMLPreprocessor && ch == '<')
 933				levelCurrent++;
 934
 935			if (ch != '%' && ch != '$') {
 936				i += strlen(makoBlockType);
 937				visibleChars += strlen(makoBlockType);
 938				if (keywords4.InList(makoBlockType))
 939					styler.ColourTo(i, SCE_HP_WORD);
 940				else
 941					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
 942			}
 943
 944			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 945			continue;
 946		}
 947
 948		// handle the start Django template code
 949		else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' ||  chNext == '{'))) {
 950			if (chNext == '%')
 951				strcpy(djangoBlockType, "%");
 952			else
 953				strcpy(djangoBlockType, "{");
 954			styler.ColourTo(i - 1, StateToPrint);
 955			beforePreProc = state;
 956			if (inScriptType == eNonHtmlScript)
 957				inScriptType = eNonHtmlScriptPreProc;
 958			else
 959				inScriptType = eNonHtmlPreProc;
 960
 961			i += 1;
 962			visibleChars += 1;
 963			state = SCE_HP_START;
 964			beforeLanguage = scriptLanguage;
 965			scriptLanguage = eScriptPython;
 966			styler.ColourTo(i, SCE_H_ASP);
 967
 968			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 969			continue;
 970		}
 971
 972		// handle the start of ASP pre-processor = Non-HTML
 973		else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
 974			styler.ColourTo(i - 1, StateToPrint);
 975			beforePreProc = state;
 976			if (inScriptType == eNonHtmlScript)
 977				inScriptType = eNonHtmlScriptPreProc;
 978			else
 979				inScriptType = eNonHtmlPreProc;
 980
 981			if (chNext2 == '@') {
 982				i += 2; // place as if it was the second next char treated
 983				visibleChars += 2;
 984				state = SCE_H_ASPAT;
 985			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
 986				styler.ColourTo(i + 3, SCE_H_ASP);
 987				state = SCE_H_XCCOMMENT;
 988				scriptLanguage = eScriptVBS;
 989				continue;
 990			} else {
 991				if (chNext2 == '=') {
 992					i += 2; // place as if it was the second next char treated
 993					visibleChars += 2;
 994				} else {
 995					i++; // place as if it was the next char treated
 996					visibleChars++;
 997				}
 998
 999				state = StateForScript(aspScript);
1000			}
1001			scriptLanguage = eScriptVBS;
1002			styler.ColourTo(i, SCE_H_ASP);
1003			// fold whole script
1004			if (foldHTMLPreprocessor)
1005				levelCurrent++;
1006			// should be better
1007			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1008			continue;
1009		}
1010
1011		/////////////////////////////////////
1012		// handle the start of SGML language (DTD)
1013		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1014				 (chPrev == '<') &&
1015				 (ch == '!') &&
1016				 (StateToPrint != SCE_H_CDATA) &&
1017				 (!IsCommentState(StateToPrint)) &&
1018				 (!IsScriptCommentState(StateToPrint)) ) {
1019			beforePreProc = state;
1020			styler.ColourTo(i - 2, StateToPrint);
1021			if ((chNext == '-') && (chNext2 == '-')) {
1022				state = SCE_H_COMMENT; // wait for a pending command
1023				styler.ColourTo(i + 2, SCE_H_COMMENT);
1024				i += 2; // follow styling after the --
1025			} else if (isWordCdata(i + 1, i + 7, styler)) {
1026				state = SCE_H_CDATA;
1027			} else {
1028				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1029				scriptLanguage = eScriptSGML;
1030				state = SCE_H_SGML_COMMAND; // wait for a pending command
1031			}
1032			// fold whole tag (-- when closing the tag)
1033			if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
1034				levelCurrent++;
1035			continue;
1036		}
1037
1038		// handle the end of Mako Python code
1039		else if (isMako &&
1040			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1041				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1042				 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1043			if (state == SCE_H_ASPAT) {
1044				aspScript = segIsScriptingIndicator(styler,
1045				                                    styler.GetStartSegment(), i - 1, aspScript);
1046			}
1047			if (state == SCE_HP_WORD) {
1048				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1049			} else {
1050				styler.ColourTo(i - 1, StateToPrint);
1051			}
1052			if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1053				i++;
1054				visibleChars++;
1055		    }
1056			if (0 != strcmp(makoBlockType, "%")) {
1057				styler.ColourTo(i, SCE_H_ASP);
1058			}
1059			state = beforePreProc;
1060			if (inScriptType == eNonHtmlScriptPreProc)
1061				inScriptType = eNonHtmlScript;
1062			else
1063				inScriptType = eHtml;
1064			if (foldHTMLPreprocessor && ch != '\n' && ch != '\r') {
1065				levelCurrent--;
1066			}
1067			scriptLanguage = eScriptNone;
1068			continue;
1069		}
1070
1071		// handle the end of Django template code
1072		else if (isDjango &&
1073			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1074				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1075				 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1076			if (state == SCE_H_ASPAT) {
1077				aspScript = segIsScriptingIndicator(styler,
1078				                                    styler.GetStartSegment(), i - 1, aspScript);
1079			}
1080			if (state == SCE_HP_WORD) {
1081				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1082			} else {
1083				styler.ColourTo(i - 1, StateToPrint);
1084			}
1085			i += 1;
1086			visibleChars += 1;
1087			styler.ColourTo(i, SCE_H_ASP);
1088			state = beforePreProc;
1089			if (inScriptType == eNonHtmlScriptPreProc)
1090				inScriptType = eNonHtmlScript;
1091			else
1092				inScriptType = eHtml;
1093			scriptLanguage = beforeLanguage;
1094			continue;
1095		}
1096
1097		// handle the end of a pre-processor = Non-HTML
1098		else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1099				  (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1100				  (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1101		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1102			if (state == SCE_H_ASPAT) {
1103				aspScript = segIsScriptingIndicator(styler,
1104				                                    styler.GetStartSegment(), i - 1, aspScript);
1105			}
1106			// Bounce out of any ASP mode
1107			switch (state) {
1108			case SCE_HJ_WORD:
1109				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1110				break;
1111			case SCE_HB_WORD:
1112				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1113				break;
1114			case SCE_HP_WORD:
1115				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1116				break;
1117			case SCE_HPHP_WORD:
1118				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1119				break;
1120			case SCE_H_XCCOMMENT:
1121				styler.ColourTo(i - 1, state);
1122				break;
1123			default :
1124				styler.ColourTo(i - 1, StateToPrint);
1125				break;
1126			}
1127			if (scriptLanguage != eScriptSGML) {
1128				i++;
1129				visibleChars++;
1130			}
1131			if (ch == '%')
1132				styler.ColourTo(i, SCE_H_ASP);
1133			else if (scriptLanguage == eScriptXML)
1134				styler.ColourTo(i, SCE_H_XMLEND);
1135			else if (scriptLanguage == eScriptSGML)
1136				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1137			else
1138				styler.ColourTo(i, SCE_H_QUESTION);
1139			state = beforePreProc;
1140			if (inScriptType == eNonHtmlScriptPreProc)
1141				inScriptType = eNonHtmlScript;
1142			else
1143				inScriptType = eHtml;
1144			// Unfold all scripting languages, except for XML tag
1145			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1146				levelCurrent--;
1147			}
1148			scriptLanguage = eScriptNone;
1149			continue;
1150		}
1151		/////////////////////////////////////
1152
1153		switch (state) {
1154		case SCE_H_DEFAULT:
1155			if (ch == '<') {
1156				// in HTML, fold on tag open and unfold on tag close
1157				tagOpened = true;
1158				tagClosing = (chNext == '/');
1159				styler.ColourTo(i - 1, StateToPrint);
1160				if (chNext != '!')
1161					state = SCE_H_TAGUNKNOWN;
1162			} else if (ch == '&') {
1163				styler.ColourTo(i - 1, SCE_H_DEFAULT);
1164				state = SCE_H_ENTITY;
1165			}
1166			break;
1167		case SCE_H_SGML_DEFAULT:
1168		case SCE_H_SGML_BLOCK_DEFAULT:
1169//			if (scriptLanguage == eScriptSGMLblock)
1170//				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1171
1172			if (ch == '\"') {
1173				styler.ColourTo(i - 1, StateToPrint);
1174				state = SCE_H_SGML_DOUBLESTRING;
1175			} else if (ch == '\'') {
1176				styler.ColourTo(i - 1, StateToPrint);
1177				state = SCE_H_SGML_SIMPLESTRING;
1178			} else if ((ch == '-') && (chPrev == '-')) {
1179				if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1180					styler.ColourTo(i - 2, StateToPrint);
1181				}
1182				state = SCE_H_SGML_COMMENT;
1183			} else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
1184				styler.ColourTo(i - 2, StateToPrint);
1185				state = SCE_H_SGML_ENTITY;
1186			} else if (ch == '#') {
1187				styler.ColourTo(i - 1, StateToPrint);
1188				state = SCE_H_SGML_SPECIAL;
1189			} else if (ch == '[') {
1190				styler.ColourTo(i - 1, StateToPrint);
1191				scriptLanguage = eScriptSGMLblock;
1192				state = SCE_H_SGML_BLOCK_DEFAULT;
1193			} else if (ch == ']') {
1194				if (scriptLanguage == eScriptSGMLblock) {
1195					styler.ColourTo(i, StateToPrint);
1196					scriptLanguage = eScriptSGML;
1197				} else {
1198					styler.ColourTo(i - 1, StateToPrint);
1199					styler.ColourTo(i, SCE_H_SGML_ERROR);
1200				}
1201				state = SCE_H_SGML_DEFAULT;
1202			} else if (scriptLanguage == eScriptSGMLblock) {
1203				if ((ch == '!') && (chPrev == '<')) {
1204					styler.ColourTo(i - 2, StateToPrint);
1205					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1206					state = SCE_H_SGML_COMMAND;
1207				} else if (ch == '>') {
1208					styler.ColourTo(i - 1, StateToPrint);
1209					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1210				}
1211			}
1212			break;
1213		case SCE_H_SGML_COMMAND:
1214			if ((ch == '-') && (chPrev == '-')) {
1215				styler.ColourTo(i - 2, StateToPrint);
1216				state = SCE_H_SGML_COMMENT;
1217			} else if (!issgmlwordchar(ch)) {
1218				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1219					styler.ColourTo(i - 1, StateToPrint);
1220					state = SCE_H_SGML_1ST_PARAM;
1221				} else {
1222					state = SCE_H_SGML_ERROR;
1223				}
1224			}
1225			break;
1226		case SCE_H_SGML_1ST_PARAM:
1227			// wait for the beginning of the word
1228			if ((ch == '-') && (chPrev == '-')) {
1229				if (scriptLanguage == eScriptSGMLblock) {
1230					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1231				} else {
1232					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1233				}
1234				state = SCE_H_SGML_1ST_PARAM_COMMENT;
1235			} else if (issgmlwordchar(ch)) {
1236				if (scriptLanguage == eScriptSGMLblock) {
1237					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1238				} else {
1239					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1240				}
1241				// find the length of the word
1242				int size = 1;
1243				while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1244					size++;
1245				styler.ColourTo(i + size - 1, StateToPrint);
1246				i += size - 1;
1247				visibleChars += size - 1;
1248				ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1249				if (scriptLanguage == eScriptSGMLblock) {
1250					state = SCE_H_SGML_BLOCK_DEFAULT;
1251				} else {
1252					state = SCE_H_SGML_DEFAULT;
1253				}
1254				continue;
1255			}
1256			break;
1257		case SCE_H_SGML_ERROR:
1258			if ((ch == '-') && (chPrev == '-')) {
1259				styler.ColourTo(i - 2, StateToPrint);
1260				state = SCE_H_SGML_COMMENT;
1261			}
1262		case SCE_H_SGML_DOUBLESTRING:
1263			if (ch == '\"') {
1264				styler.ColourTo(i, StateToPrint);
1265				state = SCE_H_SGML_DEFAULT;
1266			}
1267			break;
1268		case SCE_H_SGML_SIMPLESTRING:
1269			if (ch == '\'') {
1270				styler.ColourTo(i, StateToPrint);
1271				state = SCE_H_SGML_DEFAULT;
1272			}
1273			break;
1274		case SCE_H_SGML_COMMENT:
1275			if ((ch == '-') && (chPrev == '-')) {
1276				styler.ColourTo(i, StateToPrint);
1277				state = SCE_H_SGML_DEFAULT;
1278			}
1279			break;
1280		case SCE_H_CDATA:
1281			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1282				styler.ColourTo(i, StateToPrint);
1283				state = SCE_H_DEFAULT;
1284				levelCurrent--;
1285			}
1286			break;
1287		case SCE_H_COMMENT:
1288			if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1289				styler.ColourTo(i, StateToPrint);
1290				state = SCE_H_DEFAULT;
1291				levelCurrent--;
1292			}
1293			break;
1294		case SCE_H_SGML_1ST_PARAM_COMMENT:
1295			if ((ch == '-') && (chPrev == '-')) {
1296				styler.ColourTo(i, SCE_H_SGML_COMMENT);
1297				state = SCE_H_SGML_1ST_PARAM;
1298			}
1299			break;
1300		case SCE_H_SGML_SPECIAL:
1301			if (!(isascii(ch) && isupper(ch))) {
1302				styler.ColourTo(i - 1, StateToPrint);
1303				if (isalnum(ch)) {
1304					state = SCE_H_SGML_ERROR;
1305				} else {
1306					state = SCE_H_SGML_DEFAULT;
1307				}
1308			}
1309			break;
1310		case SCE_H_SGML_ENTITY:
1311			if (ch == ';') {
1312				styler.ColourTo(i, StateToPrint);
1313				state = SCE_H_SGML_DEFAULT;
1314			} else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1315				styler.ColourTo(i, SCE_H_SGML_ERROR);
1316				state = SCE_H_SGML_DEFAULT;
1317			}
1318			break;
1319		case SCE_H_ENTITY:
1320			if (ch == ';') {
1321				styler.ColourTo(i, StateToPrint);
1322				state = SCE_H_DEFAULT;
1323			}
1324			if (ch != '#' && !(isascii(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1325				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1326				if (!isascii(ch))	// Possibly start of a multibyte character so don't allow this byte to be in entity style
1327					styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1328				else
1329					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1330				state = SCE_H_DEFAULT;
1331			}
1332			break;
1333		case SCE_H_TAGUNKNOWN:
1334			if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1335				int eClass = classifyTagHTML(styler.GetStartSegment(),
1336					i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1337				if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1338					if (!tagClosing) {
1339						inScriptType = eNonHtmlScript;
1340						scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1341					} else {
1342						scriptLanguage = eScriptNone;
1343					}
1344					eClass = SCE_H_TAG;
1345				}
1346				if (ch == '>') {
1347					styler.ColourTo(i, eClass);
1348					if (inScriptType == eNonHtmlScript) {
1349						state = StateForScript(scriptLanguage);
1350					} else {
1351						state = SCE_H_DEFAULT;
1352					}
1353					tagOpened = false;
1354					if (!tagDontFold) {
1355						if (tagClosing) {
1356							levelCurrent--;
1357						} else {
1358							levelCurrent++;
1359						}
1360					}
1361					tagClosing = false;
1362				} else if (ch == '/' && chNext == '>') {
1363					if (eClass == SCE_H_TAGUNKNOWN) {
1364						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1365					} else {
1366						styler.ColourTo(i - 1, StateToPrint);
1367						styler.ColourTo(i + 1, SCE_H_TAGEND);
1368					}
1369					i++;
1370					ch = chNext;
1371					state = SCE_H_DEFAULT;
1372					tagOpened = false;
1373				} else {
1374					if (eClass != SCE_H_TAGUNKNOWN) {
1375						if (eClass == SCE_H_SGML_DEFAULT) {
1376							state = SCE_H_SGML_DEFAULT;
1377						} else {
1378							state = SCE_H_OTHER;
1379						}
1380					}
1381				}
1382			}
1383			break;
1384		case SCE_H_ATTRIBUTE:
1385			if (!setAttributeContinue.Contains(ch)) {
1386				if (inScriptType == eNonHtmlScript) {
1387					int scriptLanguagePrev = scriptLanguage;
1388					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1389					scriptLanguage = clientScript;
1390					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1391						inScriptType = eHtml;
1392				}
1393				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1394				if (ch == '>') {
1395					styler.ColourTo(i, SCE_H_TAG);
1396					if (inScriptType == eNonHtmlScript) {
1397						state = StateForScript(scriptLanguage);
1398					} else {
1399						state = SCE_H_DEFAULT;
1400					}
1401					tagOpened = false;
1402					if (!tagDontFold) {
1403						if (tagClosing) {
1404							levelCurrent--;
1405						} else {
1406							levelCurrent++;
1407						}
1408					}
1409					tagClosing = false;
1410				} else if (ch == '=') {
1411					styler.ColourTo(i, SCE_H_OTHER);
1412					state = SCE_H_VALUE;
1413				} else {
1414					state = SCE_H_OTHER;
1415				}
1416			}
1417			break;
1418		case SCE_H_OTHER:
1419			if (ch == '>') {
1420				styler.ColourTo(i - 1, StateToPrint);
1421				styler.ColourTo(i, SCE_H_TAG);
1422				if (inScriptType == eNonHtmlScript) {
1423					state = StateForScript(scriptLanguage);
1424				} else {
1425					state = SCE_H_DEFAULT;
1426				}
1427				tagOpened = false;
1428				if (!tagDontFold) {
1429					if (tagClosing) {
1430						levelCurrent--;
1431					} else {
1432						levelCurrent++;
1433					}
1434				}
1435				tagClosing = false;
1436			} else if (ch == '\"') {
1437				styler.ColourTo(i - 1, StateToPrint);
1438				state = SCE_H_DOUBLESTRING;
1439			} else if (ch == '\'') {
1440				styler.ColourTo(i - 1, StateToPrint);
1441				state = SCE_H_SINGLESTRING;
1442			} else if (ch == '=') {
1443				styler.ColourTo(i, StateToPrint);
1444				state = SCE_H_VALUE;
1445			} else if (ch == '/' && chNext == '>') {
1446				styler.ColourTo(i - 1, StateToPrint);
1447				styler.ColourTo(i + 1, SCE_H_TAGEND);
1448				i++;
1449				ch = chNext;
1450				state = SCE_H_DEFAULT;
1451				tagOpened = false;
1452			} else if (ch == '?' && chNext == '>') {
1453				styler.ColourTo(i - 1, StateToPrint);
1454				styler.ColourTo(i + 1, SCE_H_XMLEND);
1455				i++;
1456				ch = chNext;
1457				state = SCE_H_DEFAULT;
1458			} else if (setHTMLWord.Contains(ch)) {
1459				styler.ColourTo(i - 1, StateToPrint);
1460				state = SCE_H_ATTRIBUTE;
1461			}
1462			break;
1463		case SCE_H_DOUBLESTRING:
1464			if (ch == '\"') {
1465				if (inScriptType == eNonHtmlScript) {
1466					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1467				}
1468				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1469				state = SCE_H_OTHER;
1470			}
1471			break;
1472		case SCE_H_SINGLESTRING:
1473			if (ch == '\'') {
1474				if (inScriptType == eNonHtmlScript) {
1475					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1476				}
1477				styler.ColourTo(i, SCE_H_SINGLESTRING);
1478				state = SCE_H_OTHER;
1479			}
1480			break;
1481		case SCE_H_VALUE:
1482			if (!setHTMLWord.Contains(ch)) {
1483				if (ch == '\"' && chPrev == '=') {
1484					// Should really test for being first character
1485					state = SCE_H_DOUBLESTRING;
1486				} else if (ch == '\'' && chPrev == '=') {
1487					state = SCE_H_SINGLESTRING;
1488				} else {
1489					if (IsNumber(styler.GetStartSegment(), styler)) {
1490						styler.ColourTo(i - 1, SCE_H_NUMBER);
1491					} else {
1492						styler.ColourTo(i - 1, StateToPrint);
1493					}
1494					if (ch == '>') {
1495						styler.ColourTo(i, SCE_H_TAG);
1496						if (inScriptType == eNonHtmlScript) {
1497							state = StateForScript(scriptLanguage);
1498						} else {
1499							state = SCE_H_DEFAULT;
1500						}
1501						tagOpened = false;
1502						if (!tagDontFold) {
1503							if (tagClosing) {
1504								levelCurrent--;
1505							} else {
1506								levelCurrent++;
1507							}
1508						}
1509						tagClosing = false;
1510					} else {
1511						state = SCE_H_OTHER;
1512					}
1513				}
1514			}
1515			break;
1516		case SCE_HJ_DEFAULT:
1517		case SCE_HJ_START:
1518		case SCE_HJ_SYMBOLS:
1519			if (IsAWordStart(ch)) {
1520				styler.ColourTo(i - 1, StateToPrint);
1521				state = SCE_HJ_WORD;
1522			} else if (ch == '/' && chNext == '*') {
1523				styler.ColourTo(i - 1, StateToPrint);
1524				if (chNext2 == '*')
1525					state = SCE_HJ_COMMENTDOC;
1526				else
1527					state = SCE_HJ_COMMENT;
1528			} else if (ch == '/' && chNext == '/') {
1529				styler.ColourTo(i - 1, StateToPrint);
1530				state = SCE_HJ_COMMENTLINE;
1531			} else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1532				styler.ColourTo(i - 1, StateToPrint);
1533				state = SCE_HJ_REGEX;
1534			} else if (ch == '\"') {
1535				styler.ColourTo(i - 1, StateToPrint);
1536				state = SCE_HJ_DOUBLESTRING;
1537			} else if (ch == '\'') {
1538				styler.ColourTo(i - 1, StateToPrint);
1539				state = SCE_HJ_SINGLESTRING;
1540			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1541			           styler.SafeGetCharAt(i + 3) == '-') {
1542				styler.ColourTo(i - 1, StateToPrint);
1543				state = SCE_HJ_COMMENTLINE;
1544			} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1545				styler.ColourTo(i - 1, StateToPrint);
1546				state = SCE_HJ_COMMENTLINE;
1547				i += 2;
1548			} else if (IsOperator(ch)) {
1549				styler.ColourTo(i - 1, StateToPrint);
1550				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1551				state = SCE_HJ_DEFAULT;
1552			} else if ((ch == ' ') || (ch == '\t')) {
1553				if (state == SCE_HJ_START) {
1554					styler.ColourTo(i - 1, StateToPrint);
1555					state = SCE_HJ_DEFAULT;
1556				}
1557			}
1558			break;
1559		case SCE_HJ_WORD:
1560			if (!IsAWordChar(ch)) {
1561				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1562				//styler.ColourTo(i - 1, eHTJSKeyword);
1563				state = SCE_HJ_DEFAULT;
1564				if (ch == '/' && chNext == '*') {
1565					if (chNext2 == '*')
1566						state = SCE_HJ_COMMENTDOC;
1567					else
1568						state = SCE_HJ_COMMENT;
1569				} else if (ch == '/' && chNext == '/') {
1570					state = SCE_HJ_COMMENTLINE;
1571				} else if (ch == '\"') {
1572					state = SCE_HJ_DOUBLESTRING;
1573				} else if (ch == '\'') {
1574					state = SCE_HJ_SINGLESTRING;
1575				} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1576					styler.ColourTo(i - 1, StateToPrint);
1577					state = SCE_HJ_COMMENTLINE;
1578					i += 2;
1579				} else if (IsOperator(ch)) {
1580					styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1581					state = SCE_HJ_DEFAULT;
1582				}
1583			}
1584			break;
1585		case SCE_HJ_COMMENT:
1586		case SCE_HJ_COMMENTDOC:
1587			if (ch == '/' && chPrev == '*') {
1588				sty

Large files files are truncated, but you can click here to view the full file