PageRenderTime 157ms CodeModel.GetById 40ms app.highlight 96ms RepoModel.GetById 14ms app.codeStats 1ms

/lexers/LexLaTeX.cxx

https://bitbucket.org/nyamatongwe/unicodelineends
C++ | 539 lines | 496 code | 30 blank | 13 comment | 238 complexity | 20b31f00e0d35954c1e3fcdda050bd5f MD5 | raw file
  1// Scintilla source code edit control
  2/** @file LexLaTeX.cxx
  3 ** Lexer for LaTeX2e.
  4  **/
  5// Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
  6// The License.txt file describes the conditions under which this software may be distributed.
  7
  8// Modified by G. HU in 2013. Added folding, syntax highting inside math environments, and changed some minor behaviors.
  9
 10#include <stdlib.h>
 11#include <string.h>
 12#include <stdio.h>
 13#include <stdarg.h>
 14#include <assert.h>
 15#include <ctype.h>
 16#include <vector>
 17
 18#include "ILexer.h"
 19#include "Scintilla.h"
 20#include "SciLexer.h"
 21
 22#include "PropSetSimple.h"
 23#include "WordList.h"
 24#include "LexAccessor.h"
 25#include "Accessor.h"
 26#include "StyleContext.h"
 27#include "CharacterSet.h"
 28#include "LexerModule.h"
 29#include "LexerBase.h"
 30
 31#ifdef SCI_NAMESPACE
 32using namespace Scintilla;
 33#endif
 34
 35using namespace std;
 36
 37struct latexFoldSave {
 38	latexFoldSave() : structLev(0) {
 39		for (int i = 0; i < 8; ++i) openBegins[i] = 0;
 40	}
 41	latexFoldSave(const latexFoldSave &save) : structLev(save.structLev) {
 42		for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
 43	}
 44	int openBegins[8];
 45	int structLev;
 46};
 47
 48class LexerLaTeX : public LexerBase {
 49private:
 50	vector<int> modes;
 51	void setMode(int line, int mode) {
 52		if (line >= static_cast<int>(modes.size())) modes.resize(line + 1, 0);
 53		modes[line] = mode;
 54	}
 55	int getMode(int line) {
 56		if (line >= 0 && line < static_cast<int>(modes.size())) return modes[line];
 57		return 0;
 58	}
 59	void truncModes(int numLines) {
 60		if (static_cast<int>(modes.size()) > numLines * 2 + 256)
 61			modes.resize(numLines + 128);
 62	}
 63	
 64	vector<latexFoldSave> saves;
 65	void setSave(int line, const latexFoldSave &save) {
 66		if (line >= static_cast<int>(saves.size())) saves.resize(line + 1);
 67		saves[line] = save;
 68	}
 69	void getSave(int line, latexFoldSave &save) {
 70		if (line >= 0 && line < static_cast<int>(saves.size())) save = saves[line];
 71		else {
 72			save.structLev = 0;
 73			for (int i = 0; i < 8; ++i) save.openBegins[i] = 0;
 74		}
 75	}
 76	void truncSaves(int numLines) {
 77		if (static_cast<int>(saves.size()) > numLines * 2 + 256)
 78			saves.resize(numLines + 128);
 79	}
 80public:
 81	static ILexer *LexerFactoryLaTeX() {
 82		return new LexerLaTeX();
 83	}
 84	void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 85	void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 86};
 87
 88static bool latexIsSpecial(int ch) {
 89	return (ch == '#') || (ch == '$') || (ch == '%') || (ch == '&') || (ch == '_') ||
 90		   (ch == '{') || (ch == '}') || (ch == ' ');
 91}
 92
 93static bool latexIsBlank(int ch) {
 94	return (ch == ' ') || (ch == '\t');
 95}
 96
 97static bool latexIsBlankAndNL(int ch) {
 98	return (ch == ' ') || (ch == '\t') || (ch == '\r') || (ch == '\n');
 99}
100
101static bool latexIsLetter(int ch) {
102	return isascii(ch) && isalpha(ch);
103}
104
105static bool latexIsTagValid(int &i, int l, Accessor &styler) {
106	while (i < l) {
107		if (styler.SafeGetCharAt(i) == '{') {
108			while (i < l) {
109				i++;
110				if (styler.SafeGetCharAt(i) == '}') {
111					return true;
112				}	else if (!latexIsLetter(styler.SafeGetCharAt(i)) &&
113                   styler.SafeGetCharAt(i)!='*') {
114					return false;
115				}
116			}
117		} else if (!latexIsBlank(styler.SafeGetCharAt(i))) {
118			return false;
119		}
120		i++;
121	}
122	return false;
123}
124
125static bool latexNextNotBlankIs(int i, Accessor &styler, char needle) {
126  char ch;
127	while (i < styler.Length()) {
128    ch = styler.SafeGetCharAt(i);
129		if (!latexIsBlankAndNL(ch) && ch != '*') {
130      if (ch == needle)
131        return true;
132      else
133        return false;
134		}
135		i++;
136	}
137	return false;
138}
139
140static bool latexLastWordIs(int start, Accessor &styler, const char *needle) {
141	unsigned int i = 0;
142	unsigned int l = static_cast<unsigned int>(strlen(needle));
143	int ini = start-l+1;
144	char s[32];
145
146	while (i < l && i < 31) {
147		s[i] = styler.SafeGetCharAt(ini + i);
148		i++;
149	}
150	s[i] = '\0';
151
152	return (strcmp(s, needle) == 0);
153}
154
155static bool latexLastWordIsMathEnv(int pos, Accessor &styler) {
156	int i, j;
157	char s[32];
158	const char *mathEnvs[] = { "align", "alignat", "flalign", "gather",
159		"multiline", "displaymath", "eqnarray", "equation" };
160	if (styler.SafeGetCharAt(pos) != '}') return false;
161	for (i = pos - 1; i >= 0; --i) {
162		if (styler.SafeGetCharAt(i) == '{') break;
163		if (pos - i >= 20) return false;
164	}
165	if (i < 0 || i == pos - 1) return false;
166	++i;
167	for (j = 0; i + j < pos; ++j)
168		s[j] = styler.SafeGetCharAt(i + j);
169	s[j] = '\0';
170	if (j == 0) return false;
171	if (s[j - 1] == '*') s[--j] = '\0';
172	for (i = 0; i < static_cast<int>(sizeof(mathEnvs) / sizeof(const char *)); ++i)
173		if (strcmp(s, mathEnvs[i]) == 0) return true;
174	return false;
175}
176
177static inline void latexStateReset(int &mode, int &state) {
178	switch (mode) {
179	case 1:     state = SCE_L_MATH; break;
180	case 2:     state = SCE_L_MATH2; break;
181	default:    state = SCE_L_DEFAULT; break;
182	}
183}
184
185// There are cases not handled correctly, like $abcd\textrm{what is $x+y$}z+w$.
186// But I think it's already good enough.
187void SCI_METHOD LexerLaTeX::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
188	// startPos is assumed to be the first character of a line
189	Accessor styler(pAccess, &props);
190	styler.StartAt(startPos);
191	int mode = getMode(styler.GetLine(startPos) - 1);
192	int state = initStyle;
193	if (state == SCE_L_ERROR || state == SCE_L_SHORTCMD || state == SCE_L_SPECIAL)   // should not happen
194		latexStateReset(mode, state);
195	
196	char chNext = styler.SafeGetCharAt(startPos);
197	char chVerbatimDelim = '\0';
198	styler.StartSegment(startPos);
199	int lengthDoc = startPos + length;
200
201	for (int i = startPos; i < lengthDoc; i++) {
202		char ch = chNext;
203		chNext = styler.SafeGetCharAt(i + 1);
204
205		if (styler.IsLeadByte(ch)) {
206			i++;
207			chNext = styler.SafeGetCharAt(i + 1);
208			continue;
209		}
210		
211		if (ch == '\r' || ch == '\n')
212			setMode(styler.GetLine(i), mode);
213
214		switch (state) {
215		case SCE_L_DEFAULT :
216			switch (ch) {
217			case '\\' :
218				styler.ColourTo(i - 1, state);
219				if (latexIsLetter(chNext)) {
220					state = SCE_L_COMMAND;
221				} else if (latexIsSpecial(chNext)) {
222					styler.ColourTo(i + 1, SCE_L_SPECIAL);
223					i++;
224					chNext = styler.SafeGetCharAt(i + 1);
225				} else if (chNext == '\r' || chNext == '\n') {
226					styler.ColourTo(i, SCE_L_ERROR);
227				} else {
228					styler.ColourTo(i + 1, SCE_L_SHORTCMD);
229					if (chNext == '(') {
230						mode = 1;
231						state = SCE_L_MATH;
232					} else if (chNext == '[') {
233						mode = 2;
234						state = SCE_L_MATH2;
235					}
236					i++;
237					chNext = styler.SafeGetCharAt(i + 1);
238				}
239				break;
240			case '$' :
241				styler.ColourTo(i - 1, state);
242				if (chNext == '$') {
243					styler.ColourTo(i + 1, SCE_L_SHORTCMD);
244					mode = 2;
245					state = SCE_L_MATH2;
246					i++;
247					chNext = styler.SafeGetCharAt(i + 1);
248				} else {
249					styler.ColourTo(i, SCE_L_SHORTCMD);
250					mode = 1;
251					state = SCE_L_MATH;
252				}
253				break;
254			case '%' :
255				styler.ColourTo(i - 1, state);
256				state = SCE_L_COMMENT;
257				break;
258			}
259			break;	
260		// These 3 will never be reached.
261		case SCE_L_ERROR:
262		case SCE_L_SPECIAL:
263		case SCE_L_SHORTCMD:
264			break;
265		case SCE_L_COMMAND :
266			if (!latexIsLetter(chNext)) {
267				styler.ColourTo(i, state);
268				if (latexNextNotBlankIs(i + 1, styler, '[' )) {
269					state = SCE_L_CMDOPT;
270				} else if (latexLastWordIs(i, styler, "\\begin")) {
271					state = SCE_L_TAG;
272				} else if (latexLastWordIs(i, styler, "\\end")) {
273					state = SCE_L_TAG2;
274				} else if (latexLastWordIs(i, styler, "\\verb") && chNext != '*' && chNext != ' ') {
275					chVerbatimDelim = chNext;
276					state = SCE_L_VERBATIM;
277				} else {
278					latexStateReset(mode, state);
279				}
280			}
281			break;
282		case SCE_L_CMDOPT :
283			if (ch == ']') {
284				styler.ColourTo(i, state);
285				latexStateReset(mode, state);
286			}
287			break;
288		case SCE_L_TAG :
289			if (latexIsTagValid(i, lengthDoc, styler)) {
290				styler.ColourTo(i, state);
291				latexStateReset(mode, state);
292				if (latexLastWordIs(i, styler, "{verbatim}")) {
293					state = SCE_L_VERBATIM;
294				} else if (latexLastWordIs(i, styler, "{comment}")) {
295					state = SCE_L_COMMENT2;
296				} else if (latexLastWordIs(i, styler, "{math}") && mode == 0) {
297					mode = 1;
298					state = SCE_L_MATH;
299				} else if (latexLastWordIsMathEnv(i, styler) && mode == 0) {
300					mode = 2;
301					state = SCE_L_MATH2;
302				}
303			} else {
304				styler.ColourTo(i, SCE_L_ERROR);
305				latexStateReset(mode, state);
306				ch = styler.SafeGetCharAt(i);
307				if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
308			}
309			chNext = styler.SafeGetCharAt(i+1);
310			break;
311		case SCE_L_TAG2 :
312			if (latexIsTagValid(i, lengthDoc, styler)) {
313				styler.ColourTo(i, state);
314				latexStateReset(mode, state);
315			} else {
316				styler.ColourTo(i, SCE_L_ERROR);
317				latexStateReset(mode, state);
318				ch = styler.SafeGetCharAt(i);
319				if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
320			}
321			chNext = styler.SafeGetCharAt(i+1);
322			break;
323		case SCE_L_MATH :
324			switch (ch) {
325			case '\\' :
326				styler.ColourTo(i - 1, state);
327				if (latexIsLetter(chNext)) {
328					int match = i + 3;
329					if (latexLastWordIs(match, styler, "\\end")) {
330						match++;
331						if (latexIsTagValid(match, lengthDoc, styler)) {
332							if (latexLastWordIs(match, styler, "{math}"))
333								mode = 0;
334						}
335					}
336					state = SCE_L_COMMAND;
337				} else if (latexIsSpecial(chNext)) {
338					styler.ColourTo(i + 1, SCE_L_SPECIAL);
339					i++;
340					chNext = styler.SafeGetCharAt(i + 1);
341				} else if (chNext == '\r' || chNext == '\n') {
342					styler.ColourTo(i, SCE_L_ERROR);
343				} else {
344					if (chNext == ')') {
345						mode = 0;
346						state = SCE_L_DEFAULT;
347					}
348					styler.ColourTo(i + 1, SCE_L_SHORTCMD);
349					i++;
350					chNext = styler.SafeGetCharAt(i + 1);
351				}
352				break;
353			case '$' :
354				styler.ColourTo(i - 1, state);
355				styler.ColourTo(i, SCE_L_SHORTCMD);
356				mode = 0;
357				state = SCE_L_DEFAULT;
358				break;
359			case '%' :
360				styler.ColourTo(i - 1, state);
361				state = SCE_L_COMMENT;
362				break;
363			}
364			break;
365		case SCE_L_MATH2 :
366			switch (ch) {
367			case '\\' :
368				styler.ColourTo(i - 1, state);
369				if (latexIsLetter(chNext)) {
370					int match = i + 3;
371					if (latexLastWordIs(match, styler, "\\end")) {
372						match++;
373						if (latexIsTagValid(match, lengthDoc, styler)) {
374							if (latexLastWordIsMathEnv(match, styler))
375								mode = 0;
376						}
377					}
378					state = SCE_L_COMMAND;
379				} else if (latexIsSpecial(chNext)) {
380					styler.ColourTo(i + 1, SCE_L_SPECIAL);
381					i++;
382					chNext = styler.SafeGetCharAt(i + 1);
383				} else if (chNext == '\r' || chNext == '\n') {
384					styler.ColourTo(i, SCE_L_ERROR);
385				} else {
386					if (chNext == ']') {
387						mode = 0;
388						state = SCE_L_DEFAULT;
389					}
390					styler.ColourTo(i + 1, SCE_L_SHORTCMD);
391					i++;
392					chNext = styler.SafeGetCharAt(i + 1);
393				}
394				break;
395			case '$' :
396				styler.ColourTo(i - 1, state);
397				if (chNext == '$') {
398					styler.ColourTo(i + 1, SCE_L_SHORTCMD);
399					i++;
400					chNext = styler.SafeGetCharAt(i + 1);
401					mode = 0;
402					state = SCE_L_DEFAULT;
403				} else { // This may not be an error, e.g. \begin{equation}\text{$a$}\end{equation}	
404					styler.ColourTo(i, SCE_L_SHORTCMD);
405				}
406				break;
407			case '%' :
408				styler.ColourTo(i - 1, state);
409				state = SCE_L_COMMENT;
410				break;
411			}
412			break;
413		case SCE_L_COMMENT :
414			if (ch == '\r' || ch == '\n') {
415				styler.ColourTo(i - 1, state);
416				latexStateReset(mode, state);
417			}
418			break;
419		case SCE_L_COMMENT2 :
420			if (ch == '\\') {
421				int match = i + 3;
422				if (latexLastWordIs(match, styler, "\\end")) {
423					match++;
424					if (latexIsTagValid(match, lengthDoc, styler)) {
425						if (latexLastWordIs(match, styler, "{comment}")) {
426							styler.ColourTo(i - 1, state);
427							state = SCE_L_COMMAND;
428						}
429					}
430				}
431			}
432			break;
433		case SCE_L_VERBATIM :
434			if (ch == '\\') {
435				int match = i + 3;
436				if (latexLastWordIs(match, styler, "\\end")) {
437					match++;
438					if (latexIsTagValid(match, lengthDoc, styler)) {
439						if (latexLastWordIs(match, styler, "{verbatim}")) {
440							styler.ColourTo(i - 1, state);
441							state = SCE_L_COMMAND;
442						}
443					}
444				}
445			} else if (chNext == chVerbatimDelim) {
446				styler.ColourTo(i + 1, state);
447				latexStateReset(mode, state);
448				chVerbatimDelim = '\0';
449				i++;
450				chNext = styler.SafeGetCharAt(i + 1);
451			} else if (chVerbatimDelim != '\0' && (ch == '\n' || ch == '\r')) {
452				styler.ColourTo(i, SCE_L_ERROR);
453				latexStateReset(mode, state);
454				chVerbatimDelim = '\0';
455			}
456			break;
457		}
458	}
459	if (lengthDoc == styler.Length()) truncModes(styler.GetLine(lengthDoc - 1));
460	styler.ColourTo(lengthDoc - 1, state);
461	styler.Flush();
462}
463
464static int latexFoldSaveToInt(const latexFoldSave &save) {
465	int sum = 0;
466	for (int i = 0; i <= save.structLev; ++i)
467		sum += save.openBegins[i];
468	return ((sum + save.structLev + SC_FOLDLEVELBASE) & SC_FOLDLEVELNUMBERMASK);
469}
470
471// Change folding state while processing a line
472// Return the level before the first relevant command
473void SCI_METHOD LexerLaTeX::Fold(unsigned int startPos, int length, int, IDocument *pAccess) {
474	const char *structWords[7] = {"part", "chapter", "section", "subsection",
475		"subsubsection", "paragraph", "subparagraph"};
476	Accessor styler(pAccess, &props);
477	unsigned int endPos = startPos + length;
478	int curLine = styler.GetLine(startPos);
479	latexFoldSave save;
480	getSave(curLine - 1, save);
481	do {
482		char ch, buf[16];
483		int i, j, lev = -1;
484		bool needFold = false;
485		for (i = static_cast<int>(startPos); i < static_cast<int>(endPos); ++i) {
486			ch = styler.SafeGetCharAt(i);
487			if (ch == '\r' || ch == '\n') break;
488			if (ch != '\\' || styler.StyleAt(i) != SCE_L_COMMAND) continue;
489			for (j = 0; j < 15 && i + 1 < static_cast<int>(endPos); ++j, ++i) {
490				buf[j] = styler.SafeGetCharAt(i + 1);
491				if (!latexIsLetter(buf[j])) break;
492			}
493			buf[j] = '\0';
494			if (strcmp(buf, "begin") == 0) {
495				if (lev < 0) lev = latexFoldSaveToInt(save);
496				++save.openBegins[save.structLev];
497				needFold = true;
498			}
499			else if (strcmp(buf, "end") == 0) {
500				while (save.structLev > 0 && save.openBegins[save.structLev] == 0)
501					--save.structLev;
502				if (lev < 0) lev = latexFoldSaveToInt(save);
503				if (save.openBegins[save.structLev] > 0) --save.openBegins[save.structLev];
504			}
505			else {
506				for (j = 0; j < 7; ++j)
507					if (strcmp(buf, structWords[j]) == 0) break;
508				if (j >= 7) continue;
509				save.structLev = j;   // level before the command
510				for (j = save.structLev + 1; j < 8; ++j) {
511					save.openBegins[save.structLev] += save.openBegins[j];
512					save.openBegins[j] = 0;
513				}
514				if (lev < 0) lev = latexFoldSaveToInt(save);
515				++save.structLev;   // level after the command
516				needFold = true;
517			}
518		}
519		if (lev < 0) lev = latexFoldSaveToInt(save);
520		if (needFold) lev |= SC_FOLDLEVELHEADERFLAG;
521		styler.SetLevel(curLine, lev);
522		setSave(curLine, save);
523		++curLine;
524		startPos = styler.LineStart(curLine);
525		if (static_cast<int>(startPos) == styler.Length()) {
526			lev = latexFoldSaveToInt(save);
527			styler.SetLevel(curLine, lev);
528			setSave(curLine, save);
529			truncSaves(curLine);
530		}
531	} while (startPos < endPos);
532	styler.Flush();
533}
534
535static const char *const emptyWordListDesc[] = {
536	0
537};
538
539LexerModule lmLatex(SCLEX_LATEX, LexerLaTeX::LexerFactoryLaTeX, "latex", emptyWordListDesc);