PageRenderTime 26ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/lexers/LexLaTeX.cxx

https://bitbucket.org/nyamatongwe/unicodelineends
C++ | 539 lines | 496 code | 30 blank | 13 comment | 238 complexity | 20b31f00e0d35954c1e3fcdda050bd5f MD5 | raw file
  1. // Scintilla source code edit control
  2. /** @file LexLaTeX.cxx
  3. ** Lexer for LaTeX2e.
  4. **/
  5. // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
  6. // The License.txt file describes the conditions under which this software may be distributed.
  7. // Modified by G. HU in 2013. Added folding, syntax highting inside math environments, and changed some minor behaviors.
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <stdio.h>
  11. #include <stdarg.h>
  12. #include <assert.h>
  13. #include <ctype.h>
  14. #include <vector>
  15. #include "ILexer.h"
  16. #include "Scintilla.h"
  17. #include "SciLexer.h"
  18. #include "PropSetSimple.h"
  19. #include "WordList.h"
  20. #include "LexAccessor.h"
  21. #include "Accessor.h"
  22. #include "StyleContext.h"
  23. #include "CharacterSet.h"
  24. #include "LexerModule.h"
  25. #include "LexerBase.h"
  26. #ifdef SCI_NAMESPACE
  27. using namespace Scintilla;
  28. #endif
  29. using namespace std;
  30. struct latexFoldSave {
  31. latexFoldSave() : structLev(0) {
  32. for (int i = 0; i < 8; ++i) openBegins[i] = 0;
  33. }
  34. latexFoldSave(const latexFoldSave &save) : structLev(save.structLev) {
  35. for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
  36. }
  37. int openBegins[8];
  38. int structLev;
  39. };
  40. class LexerLaTeX : public LexerBase {
  41. private:
  42. vector<int> modes;
  43. void setMode(int line, int mode) {
  44. if (line >= static_cast<int>(modes.size())) modes.resize(line + 1, 0);
  45. modes[line] = mode;
  46. }
  47. int getMode(int line) {
  48. if (line >= 0 && line < static_cast<int>(modes.size())) return modes[line];
  49. return 0;
  50. }
  51. void truncModes(int numLines) {
  52. if (static_cast<int>(modes.size()) > numLines * 2 + 256)
  53. modes.resize(numLines + 128);
  54. }
  55. vector<latexFoldSave> saves;
  56. void setSave(int line, const latexFoldSave &save) {
  57. if (line >= static_cast<int>(saves.size())) saves.resize(line + 1);
  58. saves[line] = save;
  59. }
  60. void getSave(int line, latexFoldSave &save) {
  61. if (line >= 0 && line < static_cast<int>(saves.size())) save = saves[line];
  62. else {
  63. save.structLev = 0;
  64. for (int i = 0; i < 8; ++i) save.openBegins[i] = 0;
  65. }
  66. }
  67. void truncSaves(int numLines) {
  68. if (static_cast<int>(saves.size()) > numLines * 2 + 256)
  69. saves.resize(numLines + 128);
  70. }
  71. public:
  72. static ILexer *LexerFactoryLaTeX() {
  73. return new LexerLaTeX();
  74. }
  75. void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
  76. void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
  77. };
  78. static bool latexIsSpecial(int ch) {
  79. return (ch == '#') || (ch == '$') || (ch == '%') || (ch == '&') || (ch == '_') ||
  80. (ch == '{') || (ch == '}') || (ch == ' ');
  81. }
  82. static bool latexIsBlank(int ch) {
  83. return (ch == ' ') || (ch == '\t');
  84. }
  85. static bool latexIsBlankAndNL(int ch) {
  86. return (ch == ' ') || (ch == '\t') || (ch == '\r') || (ch == '\n');
  87. }
  88. static bool latexIsLetter(int ch) {
  89. return isascii(ch) && isalpha(ch);
  90. }
  91. static bool latexIsTagValid(int &i, int l, Accessor &styler) {
  92. while (i < l) {
  93. if (styler.SafeGetCharAt(i) == '{') {
  94. while (i < l) {
  95. i++;
  96. if (styler.SafeGetCharAt(i) == '}') {
  97. return true;
  98. } else if (!latexIsLetter(styler.SafeGetCharAt(i)) &&
  99. styler.SafeGetCharAt(i)!='*') {
  100. return false;
  101. }
  102. }
  103. } else if (!latexIsBlank(styler.SafeGetCharAt(i))) {
  104. return false;
  105. }
  106. i++;
  107. }
  108. return false;
  109. }
  110. static bool latexNextNotBlankIs(int i, Accessor &styler, char needle) {
  111. char ch;
  112. while (i < styler.Length()) {
  113. ch = styler.SafeGetCharAt(i);
  114. if (!latexIsBlankAndNL(ch) && ch != '*') {
  115. if (ch == needle)
  116. return true;
  117. else
  118. return false;
  119. }
  120. i++;
  121. }
  122. return false;
  123. }
  124. static bool latexLastWordIs(int start, Accessor &styler, const char *needle) {
  125. unsigned int i = 0;
  126. unsigned int l = static_cast<unsigned int>(strlen(needle));
  127. int ini = start-l+1;
  128. char s[32];
  129. while (i < l && i < 31) {
  130. s[i] = styler.SafeGetCharAt(ini + i);
  131. i++;
  132. }
  133. s[i] = '\0';
  134. return (strcmp(s, needle) == 0);
  135. }
  136. static bool latexLastWordIsMathEnv(int pos, Accessor &styler) {
  137. int i, j;
  138. char s[32];
  139. const char *mathEnvs[] = { "align", "alignat", "flalign", "gather",
  140. "multiline", "displaymath", "eqnarray", "equation" };
  141. if (styler.SafeGetCharAt(pos) != '}') return false;
  142. for (i = pos - 1; i >= 0; --i) {
  143. if (styler.SafeGetCharAt(i) == '{') break;
  144. if (pos - i >= 20) return false;
  145. }
  146. if (i < 0 || i == pos - 1) return false;
  147. ++i;
  148. for (j = 0; i + j < pos; ++j)
  149. s[j] = styler.SafeGetCharAt(i + j);
  150. s[j] = '\0';
  151. if (j == 0) return false;
  152. if (s[j - 1] == '*') s[--j] = '\0';
  153. for (i = 0; i < static_cast<int>(sizeof(mathEnvs) / sizeof(const char *)); ++i)
  154. if (strcmp(s, mathEnvs[i]) == 0) return true;
  155. return false;
  156. }
  157. static inline void latexStateReset(int &mode, int &state) {
  158. switch (mode) {
  159. case 1: state = SCE_L_MATH; break;
  160. case 2: state = SCE_L_MATH2; break;
  161. default: state = SCE_L_DEFAULT; break;
  162. }
  163. }
  164. // There are cases not handled correctly, like $abcd\textrm{what is $x+y$}z+w$.
  165. // But I think it's already good enough.
  166. void SCI_METHOD LexerLaTeX::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
  167. // startPos is assumed to be the first character of a line
  168. Accessor styler(pAccess, &props);
  169. styler.StartAt(startPos);
  170. int mode = getMode(styler.GetLine(startPos) - 1);
  171. int state = initStyle;
  172. if (state == SCE_L_ERROR || state == SCE_L_SHORTCMD || state == SCE_L_SPECIAL) // should not happen
  173. latexStateReset(mode, state);
  174. char chNext = styler.SafeGetCharAt(startPos);
  175. char chVerbatimDelim = '\0';
  176. styler.StartSegment(startPos);
  177. int lengthDoc = startPos + length;
  178. for (int i = startPos; i < lengthDoc; i++) {
  179. char ch = chNext;
  180. chNext = styler.SafeGetCharAt(i + 1);
  181. if (styler.IsLeadByte(ch)) {
  182. i++;
  183. chNext = styler.SafeGetCharAt(i + 1);
  184. continue;
  185. }
  186. if (ch == '\r' || ch == '\n')
  187. setMode(styler.GetLine(i), mode);
  188. switch (state) {
  189. case SCE_L_DEFAULT :
  190. switch (ch) {
  191. case '\\' :
  192. styler.ColourTo(i - 1, state);
  193. if (latexIsLetter(chNext)) {
  194. state = SCE_L_COMMAND;
  195. } else if (latexIsSpecial(chNext)) {
  196. styler.ColourTo(i + 1, SCE_L_SPECIAL);
  197. i++;
  198. chNext = styler.SafeGetCharAt(i + 1);
  199. } else if (chNext == '\r' || chNext == '\n') {
  200. styler.ColourTo(i, SCE_L_ERROR);
  201. } else {
  202. styler.ColourTo(i + 1, SCE_L_SHORTCMD);
  203. if (chNext == '(') {
  204. mode = 1;
  205. state = SCE_L_MATH;
  206. } else if (chNext == '[') {
  207. mode = 2;
  208. state = SCE_L_MATH2;
  209. }
  210. i++;
  211. chNext = styler.SafeGetCharAt(i + 1);
  212. }
  213. break;
  214. case '$' :
  215. styler.ColourTo(i - 1, state);
  216. if (chNext == '$') {
  217. styler.ColourTo(i + 1, SCE_L_SHORTCMD);
  218. mode = 2;
  219. state = SCE_L_MATH2;
  220. i++;
  221. chNext = styler.SafeGetCharAt(i + 1);
  222. } else {
  223. styler.ColourTo(i, SCE_L_SHORTCMD);
  224. mode = 1;
  225. state = SCE_L_MATH;
  226. }
  227. break;
  228. case '%' :
  229. styler.ColourTo(i - 1, state);
  230. state = SCE_L_COMMENT;
  231. break;
  232. }
  233. break;
  234. // These 3 will never be reached.
  235. case SCE_L_ERROR:
  236. case SCE_L_SPECIAL:
  237. case SCE_L_SHORTCMD:
  238. break;
  239. case SCE_L_COMMAND :
  240. if (!latexIsLetter(chNext)) {
  241. styler.ColourTo(i, state);
  242. if (latexNextNotBlankIs(i + 1, styler, '[' )) {
  243. state = SCE_L_CMDOPT;
  244. } else if (latexLastWordIs(i, styler, "\\begin")) {
  245. state = SCE_L_TAG;
  246. } else if (latexLastWordIs(i, styler, "\\end")) {
  247. state = SCE_L_TAG2;
  248. } else if (latexLastWordIs(i, styler, "\\verb") && chNext != '*' && chNext != ' ') {
  249. chVerbatimDelim = chNext;
  250. state = SCE_L_VERBATIM;
  251. } else {
  252. latexStateReset(mode, state);
  253. }
  254. }
  255. break;
  256. case SCE_L_CMDOPT :
  257. if (ch == ']') {
  258. styler.ColourTo(i, state);
  259. latexStateReset(mode, state);
  260. }
  261. break;
  262. case SCE_L_TAG :
  263. if (latexIsTagValid(i, lengthDoc, styler)) {
  264. styler.ColourTo(i, state);
  265. latexStateReset(mode, state);
  266. if (latexLastWordIs(i, styler, "{verbatim}")) {
  267. state = SCE_L_VERBATIM;
  268. } else if (latexLastWordIs(i, styler, "{comment}")) {
  269. state = SCE_L_COMMENT2;
  270. } else if (latexLastWordIs(i, styler, "{math}") && mode == 0) {
  271. mode = 1;
  272. state = SCE_L_MATH;
  273. } else if (latexLastWordIsMathEnv(i, styler) && mode == 0) {
  274. mode = 2;
  275. state = SCE_L_MATH2;
  276. }
  277. } else {
  278. styler.ColourTo(i, SCE_L_ERROR);
  279. latexStateReset(mode, state);
  280. ch = styler.SafeGetCharAt(i);
  281. if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
  282. }
  283. chNext = styler.SafeGetCharAt(i+1);
  284. break;
  285. case SCE_L_TAG2 :
  286. if (latexIsTagValid(i, lengthDoc, styler)) {
  287. styler.ColourTo(i, state);
  288. latexStateReset(mode, state);
  289. } else {
  290. styler.ColourTo(i, SCE_L_ERROR);
  291. latexStateReset(mode, state);
  292. ch = styler.SafeGetCharAt(i);
  293. if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
  294. }
  295. chNext = styler.SafeGetCharAt(i+1);
  296. break;
  297. case SCE_L_MATH :
  298. switch (ch) {
  299. case '\\' :
  300. styler.ColourTo(i - 1, state);
  301. if (latexIsLetter(chNext)) {
  302. int match = i + 3;
  303. if (latexLastWordIs(match, styler, "\\end")) {
  304. match++;
  305. if (latexIsTagValid(match, lengthDoc, styler)) {
  306. if (latexLastWordIs(match, styler, "{math}"))
  307. mode = 0;
  308. }
  309. }
  310. state = SCE_L_COMMAND;
  311. } else if (latexIsSpecial(chNext)) {
  312. styler.ColourTo(i + 1, SCE_L_SPECIAL);
  313. i++;
  314. chNext = styler.SafeGetCharAt(i + 1);
  315. } else if (chNext == '\r' || chNext == '\n') {
  316. styler.ColourTo(i, SCE_L_ERROR);
  317. } else {
  318. if (chNext == ')') {
  319. mode = 0;
  320. state = SCE_L_DEFAULT;
  321. }
  322. styler.ColourTo(i + 1, SCE_L_SHORTCMD);
  323. i++;
  324. chNext = styler.SafeGetCharAt(i + 1);
  325. }
  326. break;
  327. case '$' :
  328. styler.ColourTo(i - 1, state);
  329. styler.ColourTo(i, SCE_L_SHORTCMD);
  330. mode = 0;
  331. state = SCE_L_DEFAULT;
  332. break;
  333. case '%' :
  334. styler.ColourTo(i - 1, state);
  335. state = SCE_L_COMMENT;
  336. break;
  337. }
  338. break;
  339. case SCE_L_MATH2 :
  340. switch (ch) {
  341. case '\\' :
  342. styler.ColourTo(i - 1, state);
  343. if (latexIsLetter(chNext)) {
  344. int match = i + 3;
  345. if (latexLastWordIs(match, styler, "\\end")) {
  346. match++;
  347. if (latexIsTagValid(match, lengthDoc, styler)) {
  348. if (latexLastWordIsMathEnv(match, styler))
  349. mode = 0;
  350. }
  351. }
  352. state = SCE_L_COMMAND;
  353. } else if (latexIsSpecial(chNext)) {
  354. styler.ColourTo(i + 1, SCE_L_SPECIAL);
  355. i++;
  356. chNext = styler.SafeGetCharAt(i + 1);
  357. } else if (chNext == '\r' || chNext == '\n') {
  358. styler.ColourTo(i, SCE_L_ERROR);
  359. } else {
  360. if (chNext == ']') {
  361. mode = 0;
  362. state = SCE_L_DEFAULT;
  363. }
  364. styler.ColourTo(i + 1, SCE_L_SHORTCMD);
  365. i++;
  366. chNext = styler.SafeGetCharAt(i + 1);
  367. }
  368. break;
  369. case '$' :
  370. styler.ColourTo(i - 1, state);
  371. if (chNext == '$') {
  372. styler.ColourTo(i + 1, SCE_L_SHORTCMD);
  373. i++;
  374. chNext = styler.SafeGetCharAt(i + 1);
  375. mode = 0;
  376. state = SCE_L_DEFAULT;
  377. } else { // This may not be an error, e.g. \begin{equation}\text{$a$}\end{equation}
  378. styler.ColourTo(i, SCE_L_SHORTCMD);
  379. }
  380. break;
  381. case '%' :
  382. styler.ColourTo(i - 1, state);
  383. state = SCE_L_COMMENT;
  384. break;
  385. }
  386. break;
  387. case SCE_L_COMMENT :
  388. if (ch == '\r' || ch == '\n') {
  389. styler.ColourTo(i - 1, state);
  390. latexStateReset(mode, state);
  391. }
  392. break;
  393. case SCE_L_COMMENT2 :
  394. if (ch == '\\') {
  395. int match = i + 3;
  396. if (latexLastWordIs(match, styler, "\\end")) {
  397. match++;
  398. if (latexIsTagValid(match, lengthDoc, styler)) {
  399. if (latexLastWordIs(match, styler, "{comment}")) {
  400. styler.ColourTo(i - 1, state);
  401. state = SCE_L_COMMAND;
  402. }
  403. }
  404. }
  405. }
  406. break;
  407. case SCE_L_VERBATIM :
  408. if (ch == '\\') {
  409. int match = i + 3;
  410. if (latexLastWordIs(match, styler, "\\end")) {
  411. match++;
  412. if (latexIsTagValid(match, lengthDoc, styler)) {
  413. if (latexLastWordIs(match, styler, "{verbatim}")) {
  414. styler.ColourTo(i - 1, state);
  415. state = SCE_L_COMMAND;
  416. }
  417. }
  418. }
  419. } else if (chNext == chVerbatimDelim) {
  420. styler.ColourTo(i + 1, state);
  421. latexStateReset(mode, state);
  422. chVerbatimDelim = '\0';
  423. i++;
  424. chNext = styler.SafeGetCharAt(i + 1);
  425. } else if (chVerbatimDelim != '\0' && (ch == '\n' || ch == '\r')) {
  426. styler.ColourTo(i, SCE_L_ERROR);
  427. latexStateReset(mode, state);
  428. chVerbatimDelim = '\0';
  429. }
  430. break;
  431. }
  432. }
  433. if (lengthDoc == styler.Length()) truncModes(styler.GetLine(lengthDoc - 1));
  434. styler.ColourTo(lengthDoc - 1, state);
  435. styler.Flush();
  436. }
  437. static int latexFoldSaveToInt(const latexFoldSave &save) {
  438. int sum = 0;
  439. for (int i = 0; i <= save.structLev; ++i)
  440. sum += save.openBegins[i];
  441. return ((sum + save.structLev + SC_FOLDLEVELBASE) & SC_FOLDLEVELNUMBERMASK);
  442. }
  443. // Change folding state while processing a line
  444. // Return the level before the first relevant command
  445. void SCI_METHOD LexerLaTeX::Fold(unsigned int startPos, int length, int, IDocument *pAccess) {
  446. const char *structWords[7] = {"part", "chapter", "section", "subsection",
  447. "subsubsection", "paragraph", "subparagraph"};
  448. Accessor styler(pAccess, &props);
  449. unsigned int endPos = startPos + length;
  450. int curLine = styler.GetLine(startPos);
  451. latexFoldSave save;
  452. getSave(curLine - 1, save);
  453. do {
  454. char ch, buf[16];
  455. int i, j, lev = -1;
  456. bool needFold = false;
  457. for (i = static_cast<int>(startPos); i < static_cast<int>(endPos); ++i) {
  458. ch = styler.SafeGetCharAt(i);
  459. if (ch == '\r' || ch == '\n') break;
  460. if (ch != '\\' || styler.StyleAt(i) != SCE_L_COMMAND) continue;
  461. for (j = 0; j < 15 && i + 1 < static_cast<int>(endPos); ++j, ++i) {
  462. buf[j] = styler.SafeGetCharAt(i + 1);
  463. if (!latexIsLetter(buf[j])) break;
  464. }
  465. buf[j] = '\0';
  466. if (strcmp(buf, "begin") == 0) {
  467. if (lev < 0) lev = latexFoldSaveToInt(save);
  468. ++save.openBegins[save.structLev];
  469. needFold = true;
  470. }
  471. else if (strcmp(buf, "end") == 0) {
  472. while (save.structLev > 0 && save.openBegins[save.structLev] == 0)
  473. --save.structLev;
  474. if (lev < 0) lev = latexFoldSaveToInt(save);
  475. if (save.openBegins[save.structLev] > 0) --save.openBegins[save.structLev];
  476. }
  477. else {
  478. for (j = 0; j < 7; ++j)
  479. if (strcmp(buf, structWords[j]) == 0) break;
  480. if (j >= 7) continue;
  481. save.structLev = j; // level before the command
  482. for (j = save.structLev + 1; j < 8; ++j) {
  483. save.openBegins[save.structLev] += save.openBegins[j];
  484. save.openBegins[j] = 0;
  485. }
  486. if (lev < 0) lev = latexFoldSaveToInt(save);
  487. ++save.structLev; // level after the command
  488. needFold = true;
  489. }
  490. }
  491. if (lev < 0) lev = latexFoldSaveToInt(save);
  492. if (needFold) lev |= SC_FOLDLEVELHEADERFLAG;
  493. styler.SetLevel(curLine, lev);
  494. setSave(curLine, save);
  495. ++curLine;
  496. startPos = styler.LineStart(curLine);
  497. if (static_cast<int>(startPos) == styler.Length()) {
  498. lev = latexFoldSaveToInt(save);
  499. styler.SetLevel(curLine, lev);
  500. setSave(curLine, save);
  501. truncSaves(curLine);
  502. }
  503. } while (startPos < endPos);
  504. styler.Flush();
  505. }
  506. static const char *const emptyWordListDesc[] = {
  507. 0
  508. };
  509. LexerModule lmLatex(SCLEX_LATEX, LexerLaTeX::LexerFactoryLaTeX, "latex", emptyWordListDesc);