PageRenderTime 989ms CodeModel.GetById 121ms app.highlight 696ms RepoModel.GetById 99ms app.codeStats 1ms

/scintilla/lexers/LexHTML.cxx

https://github.com/npp-community/notepad-plus-svn-mirror
C++ | 2165 lines | 1934 code | 119 blank | 112 comment | 1426 complexity | 2eeb6d160c613dbfe23c37fd8f6be0d9 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Scintilla source code edit control
   2/** @file LexHTML.cxx
   3 ** Lexer for HTML.
   4 **/
   5// Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   6// The License.txt file describes the conditions under which this software may be distributed.
   7
   8#include <stdlib.h>

   9#include <string.h>

  10#include <stdio.h>

  11#include <stdarg.h>

  12#include <assert.h>

  13#include <ctype.h>

  14
  15#include "ILexer.h"

  16#include "Scintilla.h"

  17#include "SciLexer.h"

  18
  19#include "WordList.h"

  20#include "LexAccessor.h"

  21#include "Accessor.h"

  22#include "StyleContext.h"

  23#include "CharacterSet.h"

  24#include "LexerModule.h"

  25
  26#ifdef SCI_NAMESPACE

  27using namespace Scintilla;
  28#endif

  29
  30#define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)

  31#define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)

  32#define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)

  33
  34enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
  35enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
  36
  37static inline bool IsAWordChar(const int ch) {
  38	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
  39}
  40
  41static inline bool IsAWordStart(const int ch) {
  42	return (ch < 0x80) && (isalnum(ch) || ch == '_');
  43}
  44
  45inline bool IsOperator(int ch) {
  46	if (isascii(ch) && isalnum(ch))
  47		return false;
  48	// '.' left out as it is used to make up numbers
  49	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
  50	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
  51	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
  52	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
  53	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
  54	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
  55		return true;
  56	return false;
  57}
  58
  59static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
  60	size_t i = 0;
  61	for (; (i < end - start + 1) && (i < len-1); i++) {
  62		s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
  63	}
  64	s[i] = '\0';
  65}
  66
  67static const char *GetNextWord(Accessor &styler, unsigned int start, char *s, size_t sLen) {
  68
  69	size_t i = 0;
  70	for (; i < sLen-1; i++) {
  71		char ch = static_cast<char>(styler.SafeGetCharAt(start + i));
  72		if ((i == 0) && !IsAWordStart(ch))
  73			break;
  74		if ((i > 0) && !IsAWordChar(ch))
  75			break;
  76		s[i] = ch;
  77	}
  78	s[i] = '\0';
  79
  80	return s;
  81}
  82
  83static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
  84	char s[100];
  85	GetTextSegment(styler, start, end, s, sizeof(s));
  86	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
  87	//if (strstr(s, "src"))	// External script
  88		//return eScriptNone;
  89	if (strstr(s, "vbs"))
  90		return eScriptVBS;
  91	if (strstr(s, "pyth"))
  92		return eScriptPython;
  93	if (strstr(s, "javas"))
  94		return eScriptJS;
  95	if (strstr(s, "jscr"))
  96		return eScriptJS;
  97	if (strstr(s, "php"))
  98		return eScriptPHP;
  99	if (strstr(s, "xml")) {
 100		const char *xml = strstr(s, "xml");
 101		for (const char *t=s; t<xml; t++) {
 102			if (!IsASpace(*t)) {
 103				return prevValue;
 104			}
 105		}
 106		return eScriptXML;
 107	}
 108
 109	return prevValue;
 110}
 111
 112static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
 113	int iResult = 0;
 114	char s[100];
 115	GetTextSegment(styler, start, end, s, sizeof(s));
 116	if (0 == strncmp(s, "php", 3)) {
 117		iResult = 3;
 118	}
 119
 120	return iResult;
 121}
 122
 123static script_type ScriptOfState(int state) {
 124	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 125		return eScriptPython;
 126	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 127		return eScriptVBS;
 128	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 129		return eScriptJS;
 130	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
 131		return eScriptPHP;
 132	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
 133		return eScriptSGML;
 134	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
 135		return eScriptSGMLblock;
 136	} else {
 137		return eScriptNone;
 138	}
 139}
 140
 141static int statePrintForState(int state, script_mode inScriptType) {
 142	int StateToPrint = state;
 143
 144	if (state >= SCE_HJ_START) {
 145		if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
 146			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
 147		} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
 148			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
 149		} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
 150			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
 151		}
 152	}
 153
 154	return StateToPrint;
 155}
 156
 157static int stateForPrintState(int StateToPrint) {
 158	int state;
 159
 160	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
 161		state = StateToPrint - SCE_HA_PYTHON;
 162	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
 163		state = StateToPrint - SCE_HA_VBS;
 164	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
 165		state = StateToPrint - SCE_HA_JS;
 166	} else {
 167		state = StateToPrint;
 168	}
 169
 170	return state;
 171}
 172
 173static inline bool IsNumber(unsigned int start, Accessor &styler) {
 174	return IsADigit(styler[start]) || (styler[start] == '.') ||
 175	       (styler[start] == '-') || (styler[start] == '#');
 176}
 177
 178static inline bool isStringState(int state) {
 179	bool bResult;
 180
 181	switch (state) {
 182	case SCE_HJ_DOUBLESTRING:
 183	case SCE_HJ_SINGLESTRING:
 184	case SCE_HJA_DOUBLESTRING:
 185	case SCE_HJA_SINGLESTRING:
 186	case SCE_HB_STRING:
 187	case SCE_HBA_STRING:
 188	case SCE_HP_STRING:
 189	case SCE_HP_CHARACTER:
 190	case SCE_HP_TRIPLE:
 191	case SCE_HP_TRIPLEDOUBLE:
 192	case SCE_HPA_STRING:
 193	case SCE_HPA_CHARACTER:
 194	case SCE_HPA_TRIPLE:
 195	case SCE_HPA_TRIPLEDOUBLE:
 196	case SCE_HPHP_HSTRING:
 197	case SCE_HPHP_SIMPLESTRING:
 198	case SCE_HPHP_HSTRING_VARIABLE:
 199	case SCE_HPHP_COMPLEX_VARIABLE:
 200		bResult = true;
 201		break;
 202	default :
 203		bResult = false;
 204		break;
 205	}
 206	return bResult;
 207}
 208
 209static inline bool stateAllowsTermination(int state) {
 210	bool allowTermination = !isStringState(state);
 211	if (allowTermination) {
 212		switch (state) {
 213		case SCE_HB_COMMENTLINE:
 214		case SCE_HPHP_COMMENT:
 215		case SCE_HP_COMMENTLINE:
 216		case SCE_HPA_COMMENTLINE:
 217			allowTermination = false;
 218		}
 219	}
 220	return allowTermination;
 221}
 222
 223// not really well done, since it's only comments that should lex the %> and <%
 224static inline bool isCommentASPState(int state) {
 225	bool bResult;
 226
 227	switch (state) {
 228	case SCE_HJ_COMMENT:
 229	case SCE_HJ_COMMENTLINE:
 230	case SCE_HJ_COMMENTDOC:
 231	case SCE_HB_COMMENTLINE:
 232	case SCE_HP_COMMENTLINE:
 233	case SCE_HPHP_COMMENT:
 234	case SCE_HPHP_COMMENTLINE:
 235		bResult = true;
 236		break;
 237	default :
 238		bResult = false;
 239		break;
 240	}
 241	return bResult;
 242}
 243
 244static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 245	bool wordIsNumber = IsNumber(start, styler);
 246	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
 247	if (wordIsNumber) {
 248		chAttr = SCE_H_NUMBER;
 249	} else {
 250		char s[100];
 251		GetTextSegment(styler, start, end, s, sizeof(s));
 252		if (keywords.InList(s))
 253			chAttr = SCE_H_ATTRIBUTE;
 254	}
 255	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
 256		// No keywords -> all are known
 257		chAttr = SCE_H_ATTRIBUTE;
 258	styler.ColourTo(end, chAttr);
 259}
 260
 261static int classifyTagHTML(unsigned int start, unsigned int end,
 262                           WordList &keywords, Accessor &styler, bool &tagDontFold,
 263			   bool caseSensitive, bool isXml, bool allowScripts) {
 264	char s[30 + 2];
 265	// Copy after the '<'
 266	unsigned int i = 0;
 267	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
 268		char ch = styler[cPos];
 269		if ((ch != '<') && (ch != '/')) {
 270			s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
 271		}
 272	}
 273
 274	//The following is only a quick hack, to see if this whole thing would work
 275	//we first need the tagname with a trailing space...
 276	s[i] = ' ';
 277	s[i+1] = '\0';
 278
 279	// if the current language is XML, I can fold any tag
 280	// if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
 281	//...to find it in the list of no-container-tags
 282	tagDontFold = (!isXml) && (NULL != strstr("meta link img area br hr input ", s));
 283
 284	//now we can remove the trailing space
 285	s[i] = '\0';
 286
 287	// No keywords -> all are known
 288	char chAttr = SCE_H_TAGUNKNOWN;
 289	if (s[0] == '!') {
 290		chAttr = SCE_H_SGML_DEFAULT;
 291	} else if (!keywords || keywords.InList(s)) {
 292		chAttr = SCE_H_TAG;
 293	}
 294	styler.ColourTo(end, chAttr);
 295	if (chAttr == SCE_H_TAG) {
 296		if (allowScripts && 0 == strcmp(s, "script")) {
 297			// check to see if this is a self-closing tag by sniffing ahead
 298			bool isSelfClose = false;
 299			for (unsigned int cPos = end; cPos <= end + 100; cPos++) {
 300				char ch = styler.SafeGetCharAt(cPos, '\0');
 301				if (ch == '\0' || ch == '>')
 302					break;
 303				else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
 304					isSelfClose = true;
 305					break;
 306				}
 307			}
 308
 309			// do not enter a script state if the tag self-closed
 310			if (!isSelfClose)
 311				chAttr = SCE_H_SCRIPT;
 312		} else if (!isXml && 0 == strcmp(s, "comment")) {
 313			chAttr = SCE_H_COMMENT;
 314		}
 315	}
 316	return chAttr;
 317}
 318
 319static void classifyWordHTJS(unsigned int start, unsigned int end,
 320                             WordList &keywords, Accessor &styler, script_mode inScriptType) {
 321	char s[30 + 1];
 322	unsigned int i = 0;
 323	for (; i < end - start + 1 && i < 30; i++) {
 324		s[i] = styler[start + i];
 325	}
 326	s[i] = '\0';
 327
 328	char chAttr = SCE_HJ_WORD;
 329	bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
 330	if (wordIsNumber) {
 331		chAttr = SCE_HJ_NUMBER;
 332	} else if (keywords.InList(s)) {
 333		chAttr = SCE_HJ_KEYWORD;
 334	}
 335	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 336}
 337
 338static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
 339	char chAttr = SCE_HB_IDENTIFIER;
 340	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
 341	if (wordIsNumber)
 342		chAttr = SCE_HB_NUMBER;
 343	else {
 344		char s[100];
 345		GetTextSegment(styler, start, end, s, sizeof(s));
 346		if (keywords.InList(s)) {
 347			chAttr = SCE_HB_WORD;
 348			if (strcmp(s, "rem") == 0)
 349				chAttr = SCE_HB_COMMENTLINE;
 350		}
 351	}
 352	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 353	if (chAttr == SCE_HB_COMMENTLINE)
 354		return SCE_HB_COMMENTLINE;
 355	else
 356		return SCE_HB_DEFAULT;
 357}
 358
 359static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
 360	bool wordIsNumber = IsADigit(styler[start]);
 361	char s[30 + 1];
 362	unsigned int i = 0;
 363	for (; i < end - start + 1 && i < 30; i++) {
 364		s[i] = styler[start + i];
 365	}
 366	s[i] = '\0';
 367	char chAttr = SCE_HP_IDENTIFIER;
 368	if (0 == strcmp(prevWord, "class"))
 369		chAttr = SCE_HP_CLASSNAME;
 370	else if (0 == strcmp(prevWord, "def"))
 371		chAttr = SCE_HP_DEFNAME;
 372	else if (wordIsNumber)
 373		chAttr = SCE_HP_NUMBER;
 374	else if (keywords.InList(s))
 375		chAttr = SCE_HP_WORD;
 376	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
 377	strcpy(prevWord, s);
 378}
 379
 380// Update the word colour to default or keyword
 381// Called when in a PHP word
 382static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 383	char chAttr = SCE_HPHP_DEFAULT;
 384	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
 385	if (wordIsNumber)
 386		chAttr = SCE_HPHP_NUMBER;
 387	else {
 388		char s[100];
 389		GetTextSegment(styler, start, end, s, sizeof(s));
 390		if (keywords.InList(s))
 391			chAttr = SCE_HPHP_WORD;
 392	}
 393	styler.ColourTo(end, chAttr);
 394}
 395
 396static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
 397	char s[30 + 1];
 398	unsigned int i = 0;
 399	for (; i < end - start + 1 && i < 30; i++) {
 400		s[i] = styler[start + i];
 401	}
 402	s[i] = '\0';
 403	return keywords.InList(s);
 404}
 405
 406static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
 407	char s[30 + 1];
 408	unsigned int i = 0;
 409	for (; i < end - start + 1 && i < 30; i++) {
 410		s[i] = styler[start + i];
 411	}
 412	s[i] = '\0';
 413	return (0 == strcmp(s, "[CDATA["));
 414}
 415
 416// Return the first state to reach when entering a scripting language
 417static int StateForScript(script_type scriptLanguage) {
 418	int Result;
 419	switch (scriptLanguage) {
 420	case eScriptJS:
 421		Result = SCE_HJ_START;
 422		break;
 423	case eScriptPython:
 424		Result = SCE_HP_START;
 425		break;
 426	case eScriptPHP:
 427		Result = SCE_HPHP_DEFAULT;
 428		break;
 429	case eScriptXML:
 430		Result = SCE_H_TAGUNKNOWN;
 431		break;
 432	case eScriptSGML:
 433		Result = SCE_H_SGML_DEFAULT;
 434		break;
 435	case eScriptComment:
 436		Result = SCE_H_COMMENT;
 437		break;
 438	default :
 439		Result = SCE_HB_START;
 440		break;
 441	}
 442	return Result;
 443}
 444
 445static inline bool ishtmlwordchar(int ch) {
 446	return !isascii(ch) ||
 447		(isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
 448}
 449
 450static inline bool issgmlwordchar(int ch) {
 451	return !isascii(ch) ||
 452		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
 453}
 454
 455static inline bool IsPhpWordStart(int ch) {
 456	return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
 457}
 458
 459static inline bool IsPhpWordChar(int ch) {
 460	return IsADigit(ch) || IsPhpWordStart(ch);
 461}
 462
 463static bool InTagState(int state) {
 464	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
 465	       state == SCE_H_SCRIPT ||
 466	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
 467	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
 468	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
 469}
 470
 471static bool IsCommentState(const int state) {
 472	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
 473}
 474
 475static bool IsScriptCommentState(const int state) {
 476	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
 477		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
 478}
 479
 480static bool isLineEnd(int ch) {
 481	return ch == '\r' || ch == '\n';
 482}
 483
 484static bool isOKBeforeRE(int ch) {
 485	return (ch == '(') || (ch == '=') || (ch == ',');
 486}
 487
 488static bool isMakoBlockEnd(const int ch, const int chNext, const char *blockType) {
 489	if (strlen(blockType) == 0) {
 490		return ((ch == '%') && (chNext == '>'));
 491	} else if ((0 == strcmp(blockType, "inherit")) ||
 492			   (0 == strcmp(blockType, "namespace")) ||
 493			   (0 == strcmp(blockType, "include")) ||
 494			   (0 == strcmp(blockType, "page"))) {
 495		return ((ch == '/') && (chNext == '>'));
 496	} else if (0 == strcmp(blockType, "%")) {
 497		return isLineEnd(ch);
 498	} else if (0 == strcmp(blockType, "{")) {
 499		return ch == '}';
 500	} else {
 501		return (ch == '>');
 502	}
 503}
 504
 505static bool isDjangoBlockEnd(const int ch, const int chNext, const char *blockType) {
 506	if (strlen(blockType) == 0) {
 507		return 0;
 508	} else if (0 == strcmp(blockType, "%")) {
 509		return ((ch == '%') && (chNext == '}'));
 510	} else if (0 == strcmp(blockType, "{")) {
 511		return ((ch == '}') && (chNext == '}'));
 512	} else {
 513		return 0;
 514	}
 515}
 516
 517static bool isPHPStringState(int state) {
 518	return
 519	    (state == SCE_HPHP_HSTRING) ||
 520	    (state == SCE_HPHP_SIMPLESTRING) ||
 521	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
 522	    (state == SCE_HPHP_COMPLEX_VARIABLE);
 523}
 524
 525static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler, bool &isSimpleString) {
 526	int j;
 527	const int beginning = i - 1;
 528	bool isValidSimpleString = false;
 529
 530	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
 531		i++;
 532
 533	char ch = styler.SafeGetCharAt(i);
 534	const char chNext = styler.SafeGetCharAt(i + 1);
 535	if (!IsPhpWordStart(ch)) {
 536		if (ch == '\'' && IsPhpWordStart(chNext)) {
 537			i++;
 538			ch = chNext;
 539			isSimpleString = true;
 540		} else {
 541			phpStringDelimiter[0] = '\0';
 542			return beginning;
 543		}
 544	}
 545	phpStringDelimiter[0] = ch;
 546	i++;
 547
 548	for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
 549		if (!IsPhpWordChar(styler[j])) {
 550			if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
 551				isValidSimpleString = true;
 552				j++;
 553				break;
 554			} else {
 555				phpStringDelimiter[0] = '\0';
 556				return beginning;
 557			}
 558		}
 559		if (j - i < phpStringDelimiterSize - 2)
 560			phpStringDelimiter[j-i+1] = styler[j];
 561		else
 562			i++;
 563	}
 564	if (isSimpleString && !isValidSimpleString) {
 565		phpStringDelimiter[0] = '\0';
 566		return beginning;
 567	}
 568	phpStringDelimiter[j-i+1 - (isSimpleString ? 1 : 0)] = '\0';
 569	return j - 1;
 570}
 571
 572static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
 573                                  Accessor &styler, bool isXml) {
 574	WordList &keywords = *keywordlists[0];
 575	WordList &keywords2 = *keywordlists[1];
 576	WordList &keywords3 = *keywordlists[2];
 577	WordList &keywords4 = *keywordlists[3];
 578	WordList &keywords5 = *keywordlists[4];
 579	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
 580
 581	// Lexer for HTML requires more lexical states (8 bits worth) than most lexers
 582	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 583	char prevWord[200];
 584	prevWord[0] = '\0';
 585	char phpStringDelimiter[200]; // PHP is not limited in length, we are
 586	phpStringDelimiter[0] = '\0';
 587	int StateToPrint = initStyle;
 588	int state = stateForPrintState(StateToPrint);
 589	char makoBlockType[200];
 590	makoBlockType[0] = '\0';
 591	char djangoBlockType[2];
 592	djangoBlockType[0] = '\0';
 593
 594	// If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
 595	if (InTagState(state)) {
 596		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
 597			startPos--;
 598			length++;
 599		}
 600		state = SCE_H_DEFAULT;
 601	}
 602	// String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
 603	if (isPHPStringState(state)) {
 604		while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
 605			startPos--;
 606			length++;
 607			state = styler.StyleAt(startPos);
 608		}
 609		if (startPos == 0)
 610			state = SCE_H_DEFAULT;
 611	}
 612	styler.StartAt(startPos, static_cast<char>(STYLE_MAX));
 613
 614	int lineCurrent = styler.GetLine(startPos);
 615	int lineState;
 616	if (lineCurrent > 0) {
 617		lineState = styler.GetLineState(lineCurrent-1);
 618	} else {
 619		// Default client and ASP scripting language is JavaScript
 620		lineState = eScriptJS << 8;
 621
 622		// property asp.default.language
 623		//	Script in ASP code is initially assumed to be in JavaScript.
 624		//	To change this to VBScript set asp.default.language to 2. Python is 3.
 625		lineState |= styler.GetPropertyInt("asp.default.language", eScriptVBS) << 4;
 626	}
 627	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
 628	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
 629	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
 630	bool tagDontFold = false; //some HTML tags should not be folded
 631	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
 632	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
 633	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
 634
 635	script_type scriptLanguage = ScriptOfState(state);
 636	// If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
 637	if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
 638		scriptLanguage = eScriptComment;
 639	}
 640	script_type beforeLanguage = ScriptOfState(beforePreProc);
 641
 642	// property fold.html
 643	//	Folding is turned on or off for HTML and XML files with this option.
 644	//	The fold option must also be on for folding to occur.
 645	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
 646
 647	const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
 648
 649	// property fold.html.preprocessor
 650	//	Folding is turned on or off for scripts embedded in HTML files with this option.
 651	//	The default is on.
 652	const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
 653
 654	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
 655
 656	// property fold.hypertext.comment
 657	//	Allow folding for comments in scripts embedded in HTML.
 658	//	The default is off.
 659	const bool foldComment = fold && styler.GetPropertyInt("fold.hypertext.comment", 0) != 0;
 660
 661	// property fold.hypertext.heredoc
 662	//	Allow folding for heredocs in scripts embedded in HTML.
 663	//	The default is off.
 664	const bool foldHeredoc = fold && styler.GetPropertyInt("fold.hypertext.heredoc", 0) != 0;
 665
 666	// property html.tags.case.sensitive
 667	//	For XML and HTML, setting this property to 1 will make tags match in a case
 668	//	sensitive way which is the expected behaviour for XML and XHTML.
 669	const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
 670
 671	// property lexer.xml.allow.scripts
 672	//	Set to 0 to disable scripts in XML.
 673	const bool allowScripts = styler.GetPropertyInt("lexer.xml.allow.scripts", 1) != 0;
 674
 675	// property lexer.html.mako
 676	//	Set to 1 to enable the mako template language.
 677	const bool isMako = styler.GetPropertyInt("lexer.html.mako", 0) != 0;
 678
 679	// property lexer.html.django
 680	//	Set to 1 to enable the django template language.
 681	const bool isDjango = styler.GetPropertyInt("lexer.html.django", 0) != 0;
 682
 683	const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
 684	const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
 685	const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
 686
 687	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
 688	int levelCurrent = levelPrev;
 689	int visibleChars = 0;
 690	int lineStartVisibleChars = 0;
 691
 692	int chPrev = ' ';
 693	int ch = ' ';
 694	int chPrevNonWhite = ' ';
 695	// look back to set chPrevNonWhite properly for better regex colouring
 696	if (scriptLanguage == eScriptJS && startPos > 0) {
 697		int back = startPos;
 698		int style = 0;
 699		while (--back) {
 700			style = styler.StyleAt(back);
 701			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
 702				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
 703				break;
 704		}
 705		if (style == SCE_HJ_SYMBOLS) {
 706			chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
 707		}
 708	}
 709
 710	styler.StartSegment(startPos);
 711	const int lengthDoc = startPos + length;
 712	for (int i = startPos; i < lengthDoc; i++) {
 713		const int chPrev2 = chPrev;
 714		chPrev = ch;
 715		if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
 716			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
 717			chPrevNonWhite = ch;
 718		ch = static_cast<unsigned char>(styler[i]);
 719		int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
 720		const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
 721
 722		// Handle DBCS codepages
 723		if (styler.IsLeadByte(static_cast<char>(ch))) {
 724			chPrev = ' ';
 725			i += 1;
 726			continue;
 727		}
 728
 729		if ((!IsASpace(ch) || !foldCompact) && fold)
 730			visibleChars++;
 731		if (!IsASpace(ch))
 732			lineStartVisibleChars++;
 733
 734		// decide what is the current state to print (depending of the script tag)
 735		StateToPrint = statePrintForState(state, inScriptType);
 736
 737		// handle script folding
 738		if (fold) {
 739			switch (scriptLanguage) {
 740			case eScriptJS:
 741			case eScriptPHP:
 742				//not currently supported				case eScriptVBS:
 743
 744				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
 745				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
 746				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
 747					if (ch == '#') {
 748						int j = i + 1;
 749						while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
 750							j++;
 751						}
 752						if (styler.Match(j, "region") || styler.Match(j, "if")) {
 753							levelCurrent++;
 754						} else if (styler.Match(j, "end")) {
 755							levelCurrent--;
 756						}
 757					} else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
 758						levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
 759					}
 760				} else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
 761					levelCurrent--;
 762				}
 763				break;
 764			case eScriptPython:
 765				if (state != SCE_HP_COMMENTLINE) {
 766					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
 767						levelCurrent++;
 768					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
 769						// check if the number of tabs is lower than the level
 770						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
 771						for (int j = 0; Findlevel > 0; j++) {
 772							char chTmp = styler.SafeGetCharAt(i + j + 1);
 773							if (chTmp == '\t') {
 774								Findlevel -= 8;
 775							} else if (chTmp == ' ') {
 776								Findlevel--;
 777							} else {
 778								break;
 779							}
 780						}
 781
 782						if (Findlevel > 0) {
 783							levelCurrent -= Findlevel / 8;
 784							if (Findlevel % 8)
 785								levelCurrent--;
 786						}
 787					}
 788				}
 789				break;
 790			default:
 791				break;
 792			}
 793		}
 794
 795		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
 796			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
 797			// Avoid triggering two times on Dos/Win
 798			// New line -> record any line state onto /next/ line
 799			if (fold) {
 800				int lev = levelPrev;
 801				if (visibleChars == 0)
 802					lev |= SC_FOLDLEVELWHITEFLAG;
 803				if ((levelCurrent > levelPrev) && (visibleChars > 0))
 804					lev |= SC_FOLDLEVELHEADERFLAG;
 805
 806				styler.SetLevel(lineCurrent, lev);
 807				visibleChars = 0;
 808				levelPrev = levelCurrent;
 809			}
 810			styler.SetLineState(lineCurrent,
 811			                    ((inScriptType & 0x03) << 0) |
 812			                    ((tagOpened & 0x01) << 2) |
 813			                    ((tagClosing & 0x01) << 3) |
 814			                    ((aspScript & 0x0F) << 4) |
 815			                    ((clientScript & 0x0F) << 8) |
 816			                    ((beforePreProc & 0xFF) << 12));
 817			lineCurrent++;
 818			lineStartVisibleChars = 0;
 819		}
 820
 821		// Allow falling through to mako handling code if newline is going to end a block
 822		if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
 823			(!isMako || (0 != strcmp(makoBlockType, "%")))) {
 824		}
 825
 826		// generic end of script processing
 827		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
 828			// Check if it's the end of the script tag (or any other HTML tag)
 829			switch (state) {
 830				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
 831			case SCE_H_DOUBLESTRING:
 832			case SCE_H_SINGLESTRING:
 833			case SCE_HJ_COMMENT:
 834			case SCE_HJ_COMMENTDOC:
 835			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
 836			// the end of script marker from some JS interpreters.
 837			case SCE_HB_COMMENTLINE:
 838			case SCE_HBA_COMMENTLINE:
 839			case SCE_HJ_DOUBLESTRING:
 840			case SCE_HJ_SINGLESTRING:
 841			case SCE_HJ_REGEX:
 842			case SCE_HB_STRING:
 843			case SCE_HBA_STRING:
 844			case SCE_HP_STRING:
 845			case SCE_HP_TRIPLE:
 846			case SCE_HP_TRIPLEDOUBLE:
 847			case SCE_HPHP_HSTRING:
 848			case SCE_HPHP_SIMPLESTRING:
 849			case SCE_HPHP_COMMENT:
 850			case SCE_HPHP_COMMENTLINE:
 851				break;
 852			default :
 853				// check if the closing tag is a script tag
 854				if (const char *tag =
 855						state == SCE_HJ_COMMENTLINE || isXml ? "script" :
 856						state == SCE_H_COMMENT ? "comment" : 0) {
 857					int j = i + 2;
 858					int chr;
 859					do {
 860						chr = static_cast<int>(*tag++);
 861					} while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
 862					if (chr != 0) break;
 863				}
 864				// closing tag of the script (it's a closing HTML tag anyway)
 865				styler.ColourTo(i - 1, StateToPrint);
 866				state = SCE_H_TAGUNKNOWN;
 867				inScriptType = eHtml;
 868				scriptLanguage = eScriptNone;
 869				clientScript = eScriptJS;
 870				i += 2;
 871				visibleChars += 2;
 872				tagClosing = true;
 873				continue;
 874			}
 875		}
 876
 877		/////////////////////////////////////
 878		// handle the start of PHP pre-processor = Non-HTML
 879		else if ((state != SCE_H_ASPAT) &&
 880		         !isPHPStringState(state) &&
 881		         (state != SCE_HPHP_COMMENT) &&
 882		         (state != SCE_HPHP_COMMENTLINE) &&
 883		         (ch == '<') &&
 884		         (chNext == '?') &&
 885				 !IsScriptCommentState(state)) {
 886 			beforeLanguage = scriptLanguage;
 887			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
 888			if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
 889			styler.ColourTo(i - 1, StateToPrint);
 890			beforePreProc = state;
 891			i++;
 892			visibleChars++;
 893			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
 894			if (scriptLanguage == eScriptXML)
 895				styler.ColourTo(i, SCE_H_XMLSTART);
 896			else
 897				styler.ColourTo(i, SCE_H_QUESTION);
 898			state = StateForScript(scriptLanguage);
 899			if (inScriptType == eNonHtmlScript)
 900				inScriptType = eNonHtmlScriptPreProc;
 901			else
 902				inScriptType = eNonHtmlPreProc;
 903			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
 904			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
 905				levelCurrent++;
 906			}
 907			// should be better
 908			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 909			continue;
 910		}
 911
 912		// handle the start Mako template Python code
 913		else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
 914															 (lineStartVisibleChars == 1 && ch == '%') ||
 915															 (ch == '$' && chNext == '{') ||
 916															 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
 917			if (ch == '%')
 918				strcpy(makoBlockType, "%");
 919			else if (ch == '$')
 920				strcpy(makoBlockType, "{");
 921			else if (chNext == '/')
 922				GetNextWord(styler, i+3, makoBlockType, sizeof(makoBlockType));
 923			else
 924				GetNextWord(styler, i+2, makoBlockType, sizeof(makoBlockType));
 925			styler.ColourTo(i - 1, StateToPrint);
 926			beforePreProc = state;
 927			if (inScriptType == eNonHtmlScript)
 928				inScriptType = eNonHtmlScriptPreProc;
 929			else
 930				inScriptType = eNonHtmlPreProc;
 931
 932			if (chNext == '/') {
 933				i += 2;
 934				visibleChars += 2;
 935			} else if (ch != '%') {
 936				i++;
 937				visibleChars++;
 938			}
 939			state = SCE_HP_START;
 940			scriptLanguage = eScriptPython;
 941			styler.ColourTo(i, SCE_H_ASP);
 942			if (foldHTMLPreprocessor && ch == '<')
 943				levelCurrent++;
 944
 945			if (ch != '%' && ch != '$') {
 946				i += strlen(makoBlockType);
 947				visibleChars += strlen(makoBlockType);
 948				if (keywords4.InList(makoBlockType))
 949					styler.ColourTo(i, SCE_HP_WORD);
 950				else
 951					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
 952			}
 953
 954			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 955			continue;
 956		}
 957
 958		// handle the start/end of Django comment
 959		else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
 960			styler.ColourTo(i - 1, StateToPrint);
 961			beforePreProc = state;
 962			beforeLanguage = scriptLanguage;
 963			if (inScriptType == eNonHtmlScript)
 964				inScriptType = eNonHtmlScriptPreProc;
 965			else
 966				inScriptType = eNonHtmlPreProc;
 967			i += 1;
 968			visibleChars += 1;
 969			scriptLanguage = eScriptComment;
 970			state = SCE_H_COMMENT;
 971			styler.ColourTo(i, SCE_H_ASP);
 972			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
 973			continue;
 974		} else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
 975			styler.ColourTo(i - 1, StateToPrint);
 976			i += 1;
 977			visibleChars += 1;
 978			styler.ColourTo(i, SCE_H_ASP);
 979			state = beforePreProc;
 980			if (inScriptType == eNonHtmlScriptPreProc)
 981				inScriptType = eNonHtmlScript;
 982			else
 983				inScriptType = eHtml;
 984			scriptLanguage = beforeLanguage;
 985			continue;
 986		}
 987
 988		// handle the start Django template code
 989		else if (isDjango && scriptLanguage != eScriptPython && (ch == '{' && (chNext == '%' ||  chNext == '{'))) {
 990			if (chNext == '%')
 991				strcpy(djangoBlockType, "%");
 992			else
 993				strcpy(djangoBlockType, "{");
 994			styler.ColourTo(i - 1, StateToPrint);
 995			beforePreProc = state;
 996			if (inScriptType == eNonHtmlScript)
 997				inScriptType = eNonHtmlScriptPreProc;
 998			else
 999				inScriptType = eNonHtmlPreProc;
1000
1001			i += 1;
1002			visibleChars += 1;
1003			state = SCE_HP_START;
1004			beforeLanguage = scriptLanguage;
1005			scriptLanguage = eScriptPython;
1006			styler.ColourTo(i, SCE_H_ASP);
1007
1008			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1009			continue;
1010		}
1011
1012		// handle the start of ASP pre-processor = Non-HTML
1013		else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1014			styler.ColourTo(i - 1, StateToPrint);
1015			beforePreProc = state;
1016			if (inScriptType == eNonHtmlScript)
1017				inScriptType = eNonHtmlScriptPreProc;
1018			else
1019				inScriptType = eNonHtmlPreProc;
1020
1021			if (chNext2 == '@') {
1022				i += 2; // place as if it was the second next char treated
1023				visibleChars += 2;
1024				state = SCE_H_ASPAT;
1025			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1026				styler.ColourTo(i + 3, SCE_H_ASP);
1027				state = SCE_H_XCCOMMENT;
1028				scriptLanguage = eScriptVBS;
1029				continue;
1030			} else {
1031				if (chNext2 == '=') {
1032					i += 2; // place as if it was the second next char treated
1033					visibleChars += 2;
1034				} else {
1035					i++; // place as if it was the next char treated
1036					visibleChars++;
1037				}
1038
1039				state = StateForScript(aspScript);
1040			}
1041			scriptLanguage = eScriptVBS;
1042			styler.ColourTo(i, SCE_H_ASP);
1043			// fold whole script
1044			if (foldHTMLPreprocessor)
1045				levelCurrent++;
1046			// should be better
1047			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1048			continue;
1049		}
1050
1051		/////////////////////////////////////
1052		// handle the start of SGML language (DTD)
1053		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1054				 (chPrev == '<') &&
1055				 (ch == '!') &&
1056				 (StateToPrint != SCE_H_CDATA) &&
1057				 (!IsCommentState(StateToPrint)) &&
1058				 (!IsScriptCommentState(StateToPrint))) {
1059			beforePreProc = state;
1060			styler.ColourTo(i - 2, StateToPrint);
1061			if ((chNext == '-') && (chNext2 == '-')) {
1062				state = SCE_H_COMMENT; // wait for a pending command
1063				styler.ColourTo(i + 2, SCE_H_COMMENT);
1064				i += 2; // follow styling after the --
1065			} else if (isWordCdata(i + 1, i + 7, styler)) {
1066				state = SCE_H_CDATA;
1067			} else {
1068				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1069				scriptLanguage = eScriptSGML;
1070				state = SCE_H_SGML_COMMAND; // wait for a pending command
1071			}
1072			// fold whole tag (-- when closing the tag)
1073			if (foldHTMLPreprocessor || (state == SCE_H_COMMENT))
1074				levelCurrent++;
1075			continue;
1076		}
1077
1078		// handle the end of Mako Python code
1079		else if (isMako &&
1080			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1081				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1082				 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1083			if (state == SCE_H_ASPAT) {
1084				aspScript = segIsScriptingIndicator(styler,
1085				                                    styler.GetStartSegment(), i - 1, aspScript);
1086			}
1087			if (state == SCE_HP_WORD) {
1088				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1089			} else {
1090				styler.ColourTo(i - 1, StateToPrint);
1091			}
1092			if (0 != strcmp(makoBlockType, "%") && (0 != strcmp(makoBlockType, "{")) && ch != '>') {
1093				i++;
1094				visibleChars++;
1095		    }
1096			if (0 != strcmp(makoBlockType, "%")) {
1097				styler.ColourTo(i, SCE_H_ASP);
1098			}
1099			state = beforePreProc;
1100			if (inScriptType == eNonHtmlScriptPreProc)
1101				inScriptType = eNonHtmlScript;
1102			else
1103				inScriptType = eHtml;
1104			if (foldHTMLPreprocessor && ch != '\n' && ch != '\r') {
1105				levelCurrent--;
1106			}
1107			scriptLanguage = eScriptNone;
1108			continue;
1109		}
1110
1111		// handle the end of Django template code
1112		else if (isDjango &&
1113			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1114				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1115				 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1116			if (state == SCE_H_ASPAT) {
1117				aspScript = segIsScriptingIndicator(styler,
1118				                                    styler.GetStartSegment(), i - 1, aspScript);
1119			}
1120			if (state == SCE_HP_WORD) {
1121				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1122			} else {
1123				styler.ColourTo(i - 1, StateToPrint);
1124			}
1125			i += 1;
1126			visibleChars += 1;
1127			styler.ColourTo(i, SCE_H_ASP);
1128			state = beforePreProc;
1129			if (inScriptType == eNonHtmlScriptPreProc)
1130				inScriptType = eNonHtmlScript;
1131			else
1132				inScriptType = eHtml;
1133			scriptLanguage = beforeLanguage;
1134			continue;
1135		}
1136
1137		// handle the end of a pre-processor = Non-HTML
1138		else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1139				  (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1140				  (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1141		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1142			if (state == SCE_H_ASPAT) {
1143				aspScript = segIsScriptingIndicator(styler,
1144				                                    styler.GetStartSegment(), i - 1, aspScript);
1145			}
1146			// Bounce out of any ASP mode
1147			switch (state) {
1148			case SCE_HJ_WORD:
1149				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1150				break;
1151			case SCE_HB_WORD:
1152				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1153				break;
1154			case SCE_HP_WORD:
1155				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1156				break;
1157			case SCE_HPHP_WORD:
1158				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1159				break;
1160			case SCE_H_XCCOMMENT:
1161				styler.ColourTo(i - 1, state);
1162				break;
1163			default :
1164				styler.ColourTo(i - 1, StateToPrint);
1165				break;
1166			}
1167			if (scriptLanguage != eScriptSGML) {
1168				i++;
1169				visibleChars++;
1170			}
1171			if (ch == '%')
1172				styler.ColourTo(i, SCE_H_ASP);
1173			else if (scriptLanguage == eScriptXML)
1174				styler.ColourTo(i, SCE_H_XMLEND);
1175			else if (scriptLanguage == eScriptSGML)
1176				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1177			else
1178				styler.ColourTo(i, SCE_H_QUESTION);
1179			state = beforePreProc;
1180			if (inScriptType == eNonHtmlScriptPreProc)
1181				inScriptType = eNonHtmlScript;
1182			else
1183				inScriptType = eHtml;
1184			// Unfold all scripting languages, except for XML tag
1185			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1186				levelCurrent--;
1187			}
1188			scriptLanguage = beforeLanguage;
1189			continue;
1190		}
1191		/////////////////////////////////////
1192
1193		switch (state) {
1194		case SCE_H_DEFAULT:
1195			if (ch == '<') {
1196				// in HTML, fold on tag open and unfold on tag close
1197				tagOpened = true;
1198				tagClosing = (chNext == '/');
1199				styler.ColourTo(i - 1, StateToPrint);
1200				if (chNext != '!')
1201					state = SCE_H_TAGUNKNOWN;
1202			} else if (ch == '&') {
1203				styler.ColourTo(i - 1, SCE_H_DEFAULT);
1204				state = SCE_H_ENTITY;
1205			}
1206			break;
1207		case SCE_H_SGML_DEFAULT:
1208		case SCE_H_SGML_BLOCK_DEFAULT:
1209//			if (scriptLanguage == eScriptSGMLblock)
1210//				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1211
1212			if (ch == '\"') {
1213				styler.ColourTo(i - 1, StateToPrint);
1214				state = SCE_H_SGML_DOUBLESTRING;
1215			} else if (ch == '\'') {
1216				styler.ColourTo(i - 1, StateToPrint);
1217				state = SCE_H_SGML_SIMPLESTRING;
1218			} else if ((ch == '-') && (chPrev == '-')) {
1219				if (static_cast<int>(styler.GetStartSegment()) <= (i - 2)) {
1220					styler.ColourTo(i - 2, StateToPrint);
1221				}
1222				state = SCE_H_SGML_COMMENT;
1223			} else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
1224				styler.ColourTo(i - 2, StateToPrint);
1225				state = SCE_H_SGML_ENTITY;
1226			} else if (ch == '#') {
1227				styler.ColourTo(i - 1, StateToPrint);
1228				state = SCE_H_SGML_SPECIAL;
1229			} else if (ch == '[') {
1230				styler.ColourTo(i - 1, StateToPrint);
1231				scriptLanguage = eScriptSGMLblock;
1232				state = SCE_H_SGML_BLOCK_DEFAULT;
1233			} else if (ch == ']') {
1234				if (scriptLanguage == eScriptSGMLblock) {
1235					styler.ColourTo(i, StateToPrint);
1236					scriptLanguage = eScriptSGML;
1237				} else {
1238					styler.ColourTo(i - 1, StateToPrint);
1239					styler.ColourTo(i, SCE_H_SGML_ERROR);
1240				}
1241				state = SCE_H_SGML_DEFAULT;
1242			} else if (scriptLanguage == eScriptSGMLblock) {
1243				if ((ch == '!') && (chPrev == '<')) {
1244					styler.ColourTo(i - 2, StateToPrint);
1245					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1246					state = SCE_H_SGML_COMMAND;
1247				} else if (ch == '>') {
1248					styler.ColourTo(i - 1, StateToPrint);
1249					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1250				}
1251			}
1252			break;
1253		case SCE_H_SGML_COMMAND:
1254			if ((ch == '-') && (chPrev == '-')) {
1255				styler.ColourTo(i - 2, StateToPrint);
1256				state = SCE_H_SGML_COMMENT;
1257			} else if (!issgmlwordchar(ch)) {
1258				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1259					styler.ColourTo(i - 1, StateToPrint);
1260					state = SCE_H_SGML_1ST_PARAM;
1261				} else {
1262					state = SCE_H_SGML_ERROR;
1263				}
1264			}
1265			break;
1266		case SCE_H_SGML_1ST_PARAM:
1267			// wait for the beginning of the word
1268			if ((ch == '-') && (chPrev == '-')) {
1269				if (scriptLanguage == eScriptSGMLblock) {
1270					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1271				} else {
1272					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1273				}
1274				state = SCE_H_SGML_1ST_PARAM_COMMENT;
1275			} else if (issgmlwordchar(ch)) {
1276				if (scriptLanguage == eScriptSGMLblock) {
1277					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1278				} else {
1279					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1280				}
1281				// find the length of the word
1282				int size = 1;
1283				while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1284					size++;
1285				styler.ColourTo(i + size - 1, StateToPrint);
1286				i += size - 1;
1287				visibleChars += size - 1;
1288				ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1289				if (scriptLanguage == eScriptSGMLblock) {
1290					state = SCE_H_SGML_BLOCK_DEFAULT;
1291				} else {
1292					state = SCE_H_SGML_DEFAULT;
1293				}
1294				continue;
1295			}
1296			break;
1297		case SCE_H_SGML_ERROR:
1298			if ((ch == '-') && (chPrev == '-')) {
1299				styler.ColourTo(i - 2, StateToPrint);
1300				state = SCE_H_SGML_COMMENT;
1301			}
1302		case SCE_H_SGML_DOUBLESTRING:
1303			if (ch == '\"') {
1304				styler.ColourTo(i, StateToPrint);
1305				state = SCE_H_SGML_DEFAULT;
1306			}
1307			break;
1308		case SCE_H_SGML_SIMPLESTRING:
1309			if (ch == '\'') {
1310				styler.ColourTo(i, StateToPrint);
1311				state = SCE_H_SGML_DEFAULT;
1312			}
1313			break;
1314		case SCE_H_SGML_COMMENT:
1315			if ((ch == '-') && (chPrev == '-')) {
1316				styler.ColourTo(i, StateToPrint);
1317				state = SCE_H_SGML_DEFAULT;
1318			}
1319			break;
1320		case SCE_H_CDATA:
1321			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1322				styler.ColourTo(i, StateToPrint);
1323				state = SCE_H_DEFAULT;
1324				levelCurrent--;
1325			}
1326			break;
1327		case SCE_H_COMMENT:
1328			if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1329				styler.ColourTo(i, StateToPrint);
1330				state = SCE_H_DEFAULT;
1331				levelCurrent--;
1332			}
1333			break;
1334		case SCE_H_SGML_1ST_PARAM_COMMENT:
1335			if ((ch == '-') && (chPrev == '-')) {
1336				styler.ColourTo(i, SCE_H_SGML_COMMENT);
1337				state = SCE_H_SGML_1ST_PARAM;
1338			}
1339			break;
1340		case SCE_H_SGML_SPECIAL:
1341			if (!(isascii(ch) && isupper(ch))) {
1342				styler.ColourTo(i - 1, StateToPrint);
1343				if (isalnum(ch)) {
1344					state = SCE_H_SGML_ERROR;
1345				} else {
1346					state = SCE_H_SGML_DEFAULT;
1347				}
1348			}
1349			break;
1350		case SCE_H_SGML_ENTITY:
1351			if (ch == ';') {
1352				styler.ColourTo(i, StateToPrint);
1353				state = SCE_H_SGML_DEFAULT;
1354			} else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1355				styler.ColourTo(i, SCE_H_SGML_ERROR);
1356				state = SCE_H_SGML_DEFAULT;
1357			}
1358			break;
1359		case SCE_H_ENTITY:
1360			if (ch == ';') {
1361				styler.ColourTo(i, StateToPrint);
1362				state = SCE_H_DEFAULT;
1363			}
1364			if (ch != '#' && !(isascii(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1365				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1366				if (!isascii(ch))	// Possibly start of a multibyte character so don't allow this byte to be in entity style
1367					styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1368				else
1369					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1370				state = SCE_H_DEFAULT;
1371			}
1372			break;
1373		case SCE_H_TAGUNKNOWN:
1374			if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1375				int eClass = classifyTagHTML(styler.GetStartSegment(),
1376					i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts);
1377				if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1378					if (!tagClosing) {
1379						inScriptType = eNonHtmlScript;
1380						scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1381					} else {
1382						scriptLanguage = eScriptNone;
1383					}
1384					eClass = SCE_H_TAG;
1385				}
1386				if (ch == '>') {
1387					styler.ColourTo(i, eClass);
1388					if (inScriptType == eNonHtmlScript) {
1389						state = StateForScript(scriptLanguage);
1390					} else {
1391						state = SCE_H_DEFAULT;
1392					}
1393					tagOpened = false;
1394					if (!tagDontFold) {
1395						if (tagClosing) {
1396							levelCurrent--;
1397						} else {
1398							levelCurrent++;
1399						}
1400					}
1401					tagClosing = false;
1402				} else if (ch == '/' && chNext == '>') {
1403					if (eClass == SCE_H_TAGUNKNOWN) {
1404						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1405					} else {
1406						styler.ColourTo(i - 1, StateToPrint);
1407						styler.ColourTo(i + 1, SCE_H_TAGEND);
1408					}
1409					i++;
1410					ch = chNext;
1411					state = SCE_H_DEFAULT;
1412					tagOpened = false;
1413				} else {
1414					if (eClass != SCE_H_TAGUNKNOWN) {
1415						if (eClass == SCE_H_SGML_DEFAULT) {
1416							state = SCE_H_SGML_DEFAULT;
1417						} else {
1418							state = SCE_H_OTHER;
1419						}
1420					}
1421				}
1422			}
1423			break;
1424		case SCE_H_ATTRIBUTE:
1425			if (!setAttributeContinue.Contains(ch)) {
1426				if (inScriptType == eNonHtmlScript) {
1427					int scriptLanguagePrev = scriptLanguage;
1428					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1429					scriptLanguage = clientScript;
1430					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1431						inScriptType = eHtml;
1432				}
1433				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1434				if (ch == '>') {
1435					styler.ColourTo(i, SCE_H_TAG);
1436					if (inScriptType == eNonHtmlScript) {
1437						state = StateForScript(scriptLanguage);
1438					} else {
1439						state = SCE_H_DEFAULT;
1440					}
1441					tagOpened = false;
1442					if (!tagDontFold) {
1443						if (tagClosing) {
1444							levelCurrent--;
1445						} else {
1446							levelCurrent++;
1447						}
1448					}
1449					tagClosing = false;
1450				} else if (ch == '=') {
1451					styler.ColourTo(i, SCE_H_OTHER);
1452					state = SCE_H_VALUE;
1453				} else {
1454					state = SCE_H_OTHER;
1455				}
1456			}
1457			break;
1458		case SCE_H_OTHER:
1459			if (ch == '>') {
1460				styler.ColourTo(i - 1, StateToPrint);
1461				styler.ColourTo(i, SCE_H_TAG);
1462				if (inScriptType == eNonHtmlScript) {
1463					state = StateForScript(scriptLanguage);
1464				} else {
1465					state = SCE_H_DEFAULT;
1466				}
1467				tagOpened = false;
1468				if (!tagDontFold) {
1469					if (tagClosing) {
1470						levelCurrent--;
1471					} else {
1472						levelCurrent++;
1473					}
1474				}
1475				tagClosing = false;
1476			} else if (ch == '\"') {
1477				styler.ColourTo(i - 1, StateToPrint);
1478				state = SCE_H_DOUBLESTRING;
1479			} else if (ch == '\'') {
1480				styler.ColourTo(i - 1, StateToPrint);
1481				state = SCE_H_SINGLESTRING;
1482			} else if (ch == '=') {
1483				styler.ColourTo(i, StateToPrint);
1484				state = SCE_H_VALUE;
1485			} else if (ch == '/' && chNext == '>') {
1486				styler.ColourTo(i - 1, StateToPrint);
1487				styler.ColourTo(i + 1, SCE_H_TAGEND);
1488				i++;
1489				ch = chNext;
1490				state = SCE_H_DEFAULT;
1491				tagOpened = false;
1492			} else if (ch == '?' && chNext == '>') {
1493				styler.ColourTo(i - 1, StateToPrint);
1494				styler.ColourTo(i + 1, SCE_H_XMLEND);
1495				i++;
1496				ch = chNext;
1497				state = SCE_H_DEFAULT;
1498			} else if (setHTMLWord.Contains(ch)) {
1499				styler.ColourTo(i - 1, StateToPrint);
1500				state = SCE_H_ATTRIBUTE;
1501			}
1502			break;
1503		case SCE_H_DOUBLESTRING:
1504			if (ch == '\"') {
1505				if (inScriptType == eNonHtmlScript) {
1506					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1507				}
1508				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1509				state = SCE_H_OTHER;
1510			}
1511			break;
1512		case SCE_H_SINGLESTRING:
1513			if (ch == '\'') {
1514				if (inScriptType == eNonHtmlScript) {
1515					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1516				}
1517				styler.ColourTo(i, SCE_H_SINGLESTRING);
1518				state = SCE_H_OTHER;
1519			}
1520			break;
1521		case SCE_H_VALUE:
1522			if (!setHTMLWord.Contains(ch)) {
1523				if (ch == '\"' && chPrev == '=') {
1524					// Should really test for being first character
1525					state = SCE_H_DOUBLESTRING;
1526				} else if (ch == '\'' && chPrev == '=') {
1527					state = SCE_H_SINGLESTRING;
1528				} else {
1529					if (IsNumber(styler.GetStartSegment(), styler)) {
1530						styler.ColourTo(i - 1, SCE_H_NUMBER);
1531					} else {
1532						styler.ColourTo(i - 1, StateToPrint);
1533					}
1534					if (ch == '>') {
1535						styler.ColourTo(i, SCE_H_TAG);
1536						if (inScriptTyp

Large files files are truncated, but you can click here to view the full file