PageRenderTime 135ms CodeModel.GetById 72ms RepoModel.GetById 1ms app.codeStats 1ms

/ext/scintilla/lexers/LexCPP.cxx

https://gitlab.com/JeevRobinson/tortoisegit
C++ | 1626 lines | 1436 code | 108 blank | 82 comment | 626 complexity | 63c6e9a93dc89df322b5898300ec0b24 MD5 | raw file
Possible License(s): GPL-3.0, LGPL-3.0, MPL-2.0-no-copyleft-exception, GPL-2.0, LGPL-2.0
  1. // Scintilla source code edit control
  2. /** @file LexCPP.cxx
  3. ** Lexer for C++, C, Java, and JavaScript.
  4. ** Further folding features and configuration properties added by "Udo Lechner" <dlchnr(at)gmx(dot)net>
  5. **/
  6. // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
  7. // The License.txt file describes the conditions under which this software may be distributed.
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <stdio.h>
  11. #include <stdarg.h>
  12. #include <assert.h>
  13. #include <ctype.h>
  14. #include <string>
  15. #include <vector>
  16. #include <map>
  17. #include <algorithm>
  18. #include "ILexer.h"
  19. #include "Scintilla.h"
  20. #include "SciLexer.h"
  21. #include "WordList.h"
  22. #include "LexAccessor.h"
  23. #include "Accessor.h"
  24. #include "StyleContext.h"
  25. #include "CharacterSet.h"
  26. #include "LexerModule.h"
  27. #include "OptionSet.h"
  28. #include "SparseState.h"
  29. #include "SubStyles.h"
  30. #ifdef SCI_NAMESPACE
  31. using namespace Scintilla;
  32. #endif
  33. namespace {
  34. // Use an unnamed namespace to protect the functions and classes from name conflicts
  35. bool IsSpaceEquiv(int state) {
  36. return (state <= SCE_C_COMMENTDOC) ||
  37. // including SCE_C_DEFAULT, SCE_C_COMMENT, SCE_C_COMMENTLINE
  38. (state == SCE_C_COMMENTLINEDOC) || (state == SCE_C_COMMENTDOCKEYWORD) ||
  39. (state == SCE_C_COMMENTDOCKEYWORDERROR);
  40. }
  41. // Preconditions: sc.currentPos points to a character after '+' or '-'.
  42. // The test for pos reaching 0 should be redundant,
  43. // and is in only for safety measures.
  44. // Limitation: this code will give the incorrect answer for code like
  45. // a = b+++/ptn/...
  46. // Putting a space between the '++' post-inc operator and the '+' binary op
  47. // fixes this, and is highly recommended for readability anyway.
  48. bool FollowsPostfixOperator(StyleContext &sc, LexAccessor &styler) {
  49. int pos = (int) sc.currentPos;
  50. while (--pos > 0) {
  51. char ch = styler[pos];
  52. if (ch == '+' || ch == '-') {
  53. return styler[pos - 1] == ch;
  54. }
  55. }
  56. return false;
  57. }
  58. bool followsReturnKeyword(StyleContext &sc, LexAccessor &styler) {
  59. // Don't look at styles, so no need to flush.
  60. int pos = (int) sc.currentPos;
  61. int currentLine = styler.GetLine(pos);
  62. int lineStartPos = styler.LineStart(currentLine);
  63. while (--pos > lineStartPos) {
  64. char ch = styler.SafeGetCharAt(pos);
  65. if (ch != ' ' && ch != '\t') {
  66. break;
  67. }
  68. }
  69. const char *retBack = "nruter";
  70. const char *s = retBack;
  71. while (*s
  72. && pos >= lineStartPos
  73. && styler.SafeGetCharAt(pos) == *s) {
  74. s++;
  75. pos--;
  76. }
  77. return !*s;
  78. }
  79. bool IsSpaceOrTab(int ch) {
  80. return ch == ' ' || ch == '\t';
  81. }
  82. bool OnlySpaceOrTab(const std::string &s) {
  83. for (std::string::const_iterator it = s.begin(); it != s.end(); ++it) {
  84. if (!IsSpaceOrTab(*it))
  85. return false;
  86. }
  87. return true;
  88. }
  89. std::vector<std::string> StringSplit(const std::string &text, int separator) {
  90. std::vector<std::string> vs(text.empty() ? 0 : 1);
  91. for (std::string::const_iterator it = text.begin(); it != text.end(); ++it) {
  92. if (*it == separator) {
  93. vs.push_back(std::string());
  94. } else {
  95. vs.back() += *it;
  96. }
  97. }
  98. return vs;
  99. }
  100. struct BracketPair {
  101. std::vector<std::string>::iterator itBracket;
  102. std::vector<std::string>::iterator itEndBracket;
  103. };
  104. BracketPair FindBracketPair(std::vector<std::string> &tokens) {
  105. BracketPair bp;
  106. std::vector<std::string>::iterator itTok = std::find(tokens.begin(), tokens.end(), "(");
  107. bp.itBracket = tokens.end();
  108. bp.itEndBracket = tokens.end();
  109. if (itTok != tokens.end()) {
  110. bp.itBracket = itTok;
  111. size_t nest = 0;
  112. while (itTok != tokens.end()) {
  113. if (*itTok == "(") {
  114. nest++;
  115. } else if (*itTok == ")") {
  116. nest--;
  117. if (nest == 0) {
  118. bp.itEndBracket = itTok;
  119. return bp;
  120. }
  121. }
  122. ++itTok;
  123. }
  124. }
  125. bp.itBracket = tokens.end();
  126. return bp;
  127. }
  128. void highlightTaskMarker(StyleContext &sc, LexAccessor &styler,
  129. int activity, WordList &markerList, bool caseSensitive){
  130. if ((isoperator(sc.chPrev) || IsASpace(sc.chPrev)) && markerList.Length()) {
  131. const int lengthMarker = 50;
  132. char marker[lengthMarker+1];
  133. int currPos = (int) sc.currentPos;
  134. int i = 0;
  135. while (i < lengthMarker) {
  136. char ch = styler.SafeGetCharAt(currPos + i);
  137. if (IsASpace(ch) || isoperator(ch)) {
  138. break;
  139. }
  140. if (caseSensitive)
  141. marker[i] = ch;
  142. else
  143. marker[i] = static_cast<char>(tolower(ch));
  144. i++;
  145. }
  146. marker[i] = '\0';
  147. if (markerList.InList(marker)) {
  148. sc.SetState(SCE_C_TASKMARKER|activity);
  149. }
  150. }
  151. }
  152. struct EscapeSequence {
  153. int digitsLeft;
  154. CharacterSet setHexDigits;
  155. CharacterSet setOctDigits;
  156. CharacterSet setNoneNumeric;
  157. CharacterSet *escapeSetValid;
  158. EscapeSequence() {
  159. digitsLeft = 0;
  160. escapeSetValid = 0;
  161. setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
  162. setOctDigits = CharacterSet(CharacterSet::setNone, "01234567");
  163. }
  164. void resetEscapeState(int nextChar) {
  165. digitsLeft = 0;
  166. escapeSetValid = &setNoneNumeric;
  167. if (nextChar == 'U') {
  168. digitsLeft = 9;
  169. escapeSetValid = &setHexDigits;
  170. } else if (nextChar == 'u') {
  171. digitsLeft = 5;
  172. escapeSetValid = &setHexDigits;
  173. } else if (nextChar == 'x') {
  174. digitsLeft = 5;
  175. escapeSetValid = &setHexDigits;
  176. } else if (setOctDigits.Contains(nextChar)) {
  177. digitsLeft = 3;
  178. escapeSetValid = &setOctDigits;
  179. }
  180. }
  181. bool atEscapeEnd(int currChar) const {
  182. return (digitsLeft <= 0) || !escapeSetValid->Contains(currChar);
  183. }
  184. };
  185. std::string GetRestOfLine(LexAccessor &styler, int start, bool allowSpace) {
  186. std::string restOfLine;
  187. int i =0;
  188. char ch = styler.SafeGetCharAt(start, '\n');
  189. int endLine = styler.LineEnd(styler.GetLine(start));
  190. while (((start+i) < endLine) && (ch != '\r')) {
  191. char chNext = styler.SafeGetCharAt(start + i + 1, '\n');
  192. if (ch == '/' && (chNext == '/' || chNext == '*'))
  193. break;
  194. if (allowSpace || (ch != ' '))
  195. restOfLine += ch;
  196. i++;
  197. ch = chNext;
  198. }
  199. return restOfLine;
  200. }
  201. bool IsStreamCommentStyle(int style) {
  202. return style == SCE_C_COMMENT ||
  203. style == SCE_C_COMMENTDOC ||
  204. style == SCE_C_COMMENTDOCKEYWORD ||
  205. style == SCE_C_COMMENTDOCKEYWORDERROR;
  206. }
  207. struct PPDefinition {
  208. int line;
  209. std::string key;
  210. std::string value;
  211. bool isUndef;
  212. std::string arguments;
  213. PPDefinition(int line_, const std::string &key_, const std::string &value_, bool isUndef_ = false, std::string arguments_="") :
  214. line(line_), key(key_), value(value_), isUndef(isUndef_), arguments(arguments_) {
  215. }
  216. };
  217. class LinePPState {
  218. int state;
  219. int ifTaken;
  220. int level;
  221. bool ValidLevel() const {
  222. return level >= 0 && level < 32;
  223. }
  224. int maskLevel() const {
  225. return 1 << level;
  226. }
  227. public:
  228. LinePPState() : state(0), ifTaken(0), level(-1) {
  229. }
  230. bool IsInactive() const {
  231. return state != 0;
  232. }
  233. bool CurrentIfTaken() const {
  234. return (ifTaken & maskLevel()) != 0;
  235. }
  236. void StartSection(bool on) {
  237. level++;
  238. if (ValidLevel()) {
  239. if (on) {
  240. state &= ~maskLevel();
  241. ifTaken |= maskLevel();
  242. } else {
  243. state |= maskLevel();
  244. ifTaken &= ~maskLevel();
  245. }
  246. }
  247. }
  248. void EndSection() {
  249. if (ValidLevel()) {
  250. state &= ~maskLevel();
  251. ifTaken &= ~maskLevel();
  252. }
  253. level--;
  254. }
  255. void InvertCurrentLevel() {
  256. if (ValidLevel()) {
  257. state ^= maskLevel();
  258. ifTaken |= maskLevel();
  259. }
  260. }
  261. };
  262. // Hold the preprocessor state for each line seen.
  263. // Currently one entry per line but could become sparse with just one entry per preprocessor line.
  264. class PPStates {
  265. std::vector<LinePPState> vlls;
  266. public:
  267. LinePPState ForLine(int line) const {
  268. if ((line > 0) && (vlls.size() > static_cast<size_t>(line))) {
  269. return vlls[line];
  270. } else {
  271. return LinePPState();
  272. }
  273. }
  274. void Add(int line, LinePPState lls) {
  275. vlls.resize(line+1);
  276. vlls[line] = lls;
  277. }
  278. };
  279. // An individual named option for use in an OptionSet
  280. // Options used for LexerCPP
  281. struct OptionsCPP {
  282. bool stylingWithinPreprocessor;
  283. bool identifiersAllowDollars;
  284. bool trackPreprocessor;
  285. bool updatePreprocessor;
  286. bool verbatimStringsAllowEscapes;
  287. bool triplequotedStrings;
  288. bool hashquotedStrings;
  289. bool backQuotedStrings;
  290. bool escapeSequence;
  291. bool fold;
  292. bool foldSyntaxBased;
  293. bool foldComment;
  294. bool foldCommentMultiline;
  295. bool foldCommentExplicit;
  296. std::string foldExplicitStart;
  297. std::string foldExplicitEnd;
  298. bool foldExplicitAnywhere;
  299. bool foldPreprocessor;
  300. bool foldCompact;
  301. bool foldAtElse;
  302. OptionsCPP() {
  303. stylingWithinPreprocessor = false;
  304. identifiersAllowDollars = true;
  305. trackPreprocessor = true;
  306. updatePreprocessor = true;
  307. verbatimStringsAllowEscapes = false;
  308. triplequotedStrings = false;
  309. hashquotedStrings = false;
  310. backQuotedStrings = false;
  311. escapeSequence = false;
  312. fold = false;
  313. foldSyntaxBased = true;
  314. foldComment = false;
  315. foldCommentMultiline = true;
  316. foldCommentExplicit = true;
  317. foldExplicitStart = "";
  318. foldExplicitEnd = "";
  319. foldExplicitAnywhere = false;
  320. foldPreprocessor = false;
  321. foldCompact = false;
  322. foldAtElse = false;
  323. }
  324. };
  325. const char *const cppWordLists[] = {
  326. "Primary keywords and identifiers",
  327. "Secondary keywords and identifiers",
  328. "Documentation comment keywords",
  329. "Global classes and typedefs",
  330. "Preprocessor definitions",
  331. "Task marker and error marker keywords",
  332. 0,
  333. };
  334. struct OptionSetCPP : public OptionSet<OptionsCPP> {
  335. OptionSetCPP() {
  336. DefineProperty("styling.within.preprocessor", &OptionsCPP::stylingWithinPreprocessor,
  337. "For C++ code, determines whether all preprocessor code is styled in the "
  338. "preprocessor style (0, the default) or only from the initial # to the end "
  339. "of the command word(1).");
  340. DefineProperty("lexer.cpp.allow.dollars", &OptionsCPP::identifiersAllowDollars,
  341. "Set to 0 to disallow the '$' character in identifiers with the cpp lexer.");
  342. DefineProperty("lexer.cpp.track.preprocessor", &OptionsCPP::trackPreprocessor,
  343. "Set to 1 to interpret #if/#else/#endif to grey out code that is not active.");
  344. DefineProperty("lexer.cpp.update.preprocessor", &OptionsCPP::updatePreprocessor,
  345. "Set to 1 to update preprocessor definitions when #define found.");
  346. DefineProperty("lexer.cpp.verbatim.strings.allow.escapes", &OptionsCPP::verbatimStringsAllowEscapes,
  347. "Set to 1 to allow verbatim strings to contain escape sequences.");
  348. DefineProperty("lexer.cpp.triplequoted.strings", &OptionsCPP::triplequotedStrings,
  349. "Set to 1 to enable highlighting of triple-quoted strings.");
  350. DefineProperty("lexer.cpp.hashquoted.strings", &OptionsCPP::hashquotedStrings,
  351. "Set to 1 to enable highlighting of hash-quoted strings.");
  352. DefineProperty("lexer.cpp.backquoted.strings", &OptionsCPP::backQuotedStrings,
  353. "Set to 1 to enable highlighting of back-quoted raw strings .");
  354. DefineProperty("lexer.cpp.escape.sequence", &OptionsCPP::escapeSequence,
  355. "Set to 1 to enable highlighting of escape sequences in strings");
  356. DefineProperty("fold", &OptionsCPP::fold);
  357. DefineProperty("fold.cpp.syntax.based", &OptionsCPP::foldSyntaxBased,
  358. "Set this property to 0 to disable syntax based folding.");
  359. DefineProperty("fold.comment", &OptionsCPP::foldComment,
  360. "This option enables folding multi-line comments and explicit fold points when using the C++ lexer. "
  361. "Explicit fold points allows adding extra folding by placing a //{ comment at the start and a //} "
  362. "at the end of a section that should fold.");
  363. DefineProperty("fold.cpp.comment.multiline", &OptionsCPP::foldCommentMultiline,
  364. "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
  365. DefineProperty("fold.cpp.comment.explicit", &OptionsCPP::foldCommentExplicit,
  366. "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
  367. DefineProperty("fold.cpp.explicit.start", &OptionsCPP::foldExplicitStart,
  368. "The string to use for explicit fold start points, replacing the standard //{.");
  369. DefineProperty("fold.cpp.explicit.end", &OptionsCPP::foldExplicitEnd,
  370. "The string to use for explicit fold end points, replacing the standard //}.");
  371. DefineProperty("fold.cpp.explicit.anywhere", &OptionsCPP::foldExplicitAnywhere,
  372. "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
  373. DefineProperty("fold.preprocessor", &OptionsCPP::foldPreprocessor,
  374. "This option enables folding preprocessor directives when using the C++ lexer. "
  375. "Includes C#'s explicit #region and #endregion folding directives.");
  376. DefineProperty("fold.compact", &OptionsCPP::foldCompact);
  377. DefineProperty("fold.at.else", &OptionsCPP::foldAtElse,
  378. "This option enables C++ folding on a \"} else {\" line of an if statement.");
  379. DefineWordListSets(cppWordLists);
  380. }
  381. };
  382. const char styleSubable[] = {SCE_C_IDENTIFIER, SCE_C_COMMENTDOCKEYWORD, 0};
  383. }
  384. class LexerCPP : public ILexerWithSubStyles {
  385. bool caseSensitive;
  386. CharacterSet setWord;
  387. CharacterSet setNegationOp;
  388. CharacterSet setArithmethicOp;
  389. CharacterSet setRelOp;
  390. CharacterSet setLogicalOp;
  391. CharacterSet setWordStart;
  392. PPStates vlls;
  393. std::vector<PPDefinition> ppDefineHistory;
  394. WordList keywords;
  395. WordList keywords2;
  396. WordList keywords3;
  397. WordList keywords4;
  398. WordList ppDefinitions;
  399. WordList markerList;
  400. struct SymbolValue {
  401. std::string value;
  402. std::string arguments;
  403. SymbolValue(const std::string &value_="", const std::string &arguments_="") : value(value_), arguments(arguments_) {
  404. }
  405. SymbolValue &operator = (const std::string &value_) {
  406. value = value_;
  407. arguments.clear();
  408. return *this;
  409. }
  410. bool IsMacro() const {
  411. return !arguments.empty();
  412. }
  413. };
  414. typedef std::map<std::string, SymbolValue> SymbolTable;
  415. SymbolTable preprocessorDefinitionsStart;
  416. OptionsCPP options;
  417. OptionSetCPP osCPP;
  418. EscapeSequence escapeSeq;
  419. SparseState<std::string> rawStringTerminators;
  420. enum { activeFlag = 0x40 };
  421. enum { ssIdentifier, ssDocKeyword };
  422. SubStyles subStyles;
  423. public:
  424. explicit LexerCPP(bool caseSensitive_) :
  425. caseSensitive(caseSensitive_),
  426. setWord(CharacterSet::setAlphaNum, "._", 0x80, true),
  427. setNegationOp(CharacterSet::setNone, "!"),
  428. setArithmethicOp(CharacterSet::setNone, "+-/*%"),
  429. setRelOp(CharacterSet::setNone, "=!<>"),
  430. setLogicalOp(CharacterSet::setNone, "|&"),
  431. subStyles(styleSubable, 0x80, 0x40, activeFlag) {
  432. }
  433. virtual ~LexerCPP() {
  434. }
  435. void SCI_METHOD Release() {
  436. delete this;
  437. }
  438. int SCI_METHOD Version() const {
  439. return lvSubStyles;
  440. }
  441. const char * SCI_METHOD PropertyNames() {
  442. return osCPP.PropertyNames();
  443. }
  444. int SCI_METHOD PropertyType(const char *name) {
  445. return osCPP.PropertyType(name);
  446. }
  447. const char * SCI_METHOD DescribeProperty(const char *name) {
  448. return osCPP.DescribeProperty(name);
  449. }
  450. int SCI_METHOD PropertySet(const char *key, const char *val);
  451. const char * SCI_METHOD DescribeWordListSets() {
  452. return osCPP.DescribeWordListSets();
  453. }
  454. int SCI_METHOD WordListSet(int n, const char *wl);
  455. void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
  456. void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
  457. void * SCI_METHOD PrivateCall(int, void *) {
  458. return 0;
  459. }
  460. int SCI_METHOD LineEndTypesSupported() {
  461. return SC_LINE_END_TYPE_UNICODE;
  462. }
  463. int SCI_METHOD AllocateSubStyles(int styleBase, int numberStyles) {
  464. return subStyles.Allocate(styleBase, numberStyles);
  465. }
  466. int SCI_METHOD SubStylesStart(int styleBase) {
  467. return subStyles.Start(styleBase);
  468. }
  469. int SCI_METHOD SubStylesLength(int styleBase) {
  470. return subStyles.Length(styleBase);
  471. }
  472. int SCI_METHOD StyleFromSubStyle(int subStyle) {
  473. int styleBase = subStyles.BaseStyle(MaskActive(subStyle));
  474. int active = subStyle & activeFlag;
  475. return styleBase | active;
  476. }
  477. int SCI_METHOD PrimaryStyleFromStyle(int style) {
  478. return MaskActive(style);
  479. }
  480. void SCI_METHOD FreeSubStyles() {
  481. subStyles.Free();
  482. }
  483. void SCI_METHOD SetIdentifiers(int style, const char *identifiers) {
  484. subStyles.SetIdentifiers(style, identifiers);
  485. }
  486. int SCI_METHOD DistanceToSecondaryStyles() {
  487. return activeFlag;
  488. }
  489. const char * SCI_METHOD GetSubStyleBases() {
  490. return styleSubable;
  491. }
  492. static ILexer *LexerFactoryCPP() {
  493. return new LexerCPP(true);
  494. }
  495. static ILexer *LexerFactoryCPPInsensitive() {
  496. return new LexerCPP(false);
  497. }
  498. static int MaskActive(int style) {
  499. return style & ~activeFlag;
  500. }
  501. void EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions);
  502. std::vector<std::string> Tokenize(const std::string &expr) const;
  503. bool EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions);
  504. };
  505. int SCI_METHOD LexerCPP::PropertySet(const char *key, const char *val) {
  506. if (osCPP.PropertySet(&options, key, val)) {
  507. if (strcmp(key, "lexer.cpp.allow.dollars") == 0) {
  508. setWord = CharacterSet(CharacterSet::setAlphaNum, "._", 0x80, true);
  509. if (options.identifiersAllowDollars) {
  510. setWord.Add('$');
  511. }
  512. }
  513. return 0;
  514. }
  515. return -1;
  516. }
  517. int SCI_METHOD LexerCPP::WordListSet(int n, const char *wl) {
  518. WordList *wordListN = 0;
  519. switch (n) {
  520. case 0:
  521. wordListN = &keywords;
  522. break;
  523. case 1:
  524. wordListN = &keywords2;
  525. break;
  526. case 2:
  527. wordListN = &keywords3;
  528. break;
  529. case 3:
  530. wordListN = &keywords4;
  531. break;
  532. case 4:
  533. wordListN = &ppDefinitions;
  534. break;
  535. case 5:
  536. wordListN = &markerList;
  537. break;
  538. }
  539. int firstModification = -1;
  540. if (wordListN) {
  541. WordList wlNew;
  542. wlNew.Set(wl);
  543. if (*wordListN != wlNew) {
  544. wordListN->Set(wl);
  545. firstModification = 0;
  546. if (n == 4) {
  547. // Rebuild preprocessorDefinitions
  548. preprocessorDefinitionsStart.clear();
  549. for (int nDefinition = 0; nDefinition < ppDefinitions.Length(); nDefinition++) {
  550. const char *cpDefinition = ppDefinitions.WordAt(nDefinition);
  551. const char *cpEquals = strchr(cpDefinition, '=');
  552. if (cpEquals) {
  553. std::string name(cpDefinition, cpEquals - cpDefinition);
  554. std::string val(cpEquals+1);
  555. size_t bracket = name.find('(');
  556. size_t bracketEnd = name.find(')');
  557. if ((bracket != std::string::npos) && (bracketEnd != std::string::npos)) {
  558. // Macro
  559. std::string args = name.substr(bracket + 1, bracketEnd - bracket - 1);
  560. name = name.substr(0, bracket);
  561. preprocessorDefinitionsStart[name] = SymbolValue(val, args);
  562. } else {
  563. preprocessorDefinitionsStart[name] = val;
  564. }
  565. } else {
  566. std::string name(cpDefinition);
  567. std::string val("1");
  568. preprocessorDefinitionsStart[name] = val;
  569. }
  570. }
  571. }
  572. }
  573. }
  574. return firstModification;
  575. }
  576. // Functor used to truncate history
  577. struct After {
  578. int line;
  579. explicit After(int line_) : line(line_) {}
  580. bool operator()(PPDefinition &p) const {
  581. return p.line > line;
  582. }
  583. };
  584. void SCI_METHOD LexerCPP::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
  585. LexAccessor styler(pAccess);
  586. CharacterSet setOKBeforeRE(CharacterSet::setNone, "([{=,:;!%^&*|?~+-");
  587. CharacterSet setCouldBePostOp(CharacterSet::setNone, "+-");
  588. CharacterSet setDoxygen(CharacterSet::setAlpha, "$@\\&<>#{}[]");
  589. setWordStart = CharacterSet(CharacterSet::setAlpha, "_", 0x80, true);
  590. CharacterSet setInvalidRawFirst(CharacterSet::setNone, " )\\\t\v\f\n");
  591. if (options.identifiersAllowDollars) {
  592. setWordStart.Add('$');
  593. }
  594. int chPrevNonWhite = ' ';
  595. int visibleChars = 0;
  596. bool lastWordWasUUID = false;
  597. int styleBeforeDCKeyword = SCE_C_DEFAULT;
  598. int styleBeforeTaskMarker = SCE_C_DEFAULT;
  599. bool continuationLine = false;
  600. bool isIncludePreprocessor = false;
  601. bool isStringInPreprocessor = false;
  602. bool inRERange = false;
  603. bool seenDocKeyBrace = false;
  604. int lineCurrent = styler.GetLine(startPos);
  605. if ((MaskActive(initStyle) == SCE_C_PREPROCESSOR) ||
  606. (MaskActive(initStyle) == SCE_C_COMMENTLINE) ||
  607. (MaskActive(initStyle) == SCE_C_COMMENTLINEDOC)) {
  608. // Set continuationLine if last character of previous line is '\'
  609. if (lineCurrent > 0) {
  610. int endLinePrevious = styler.LineEnd(lineCurrent - 1);
  611. if (endLinePrevious > 0) {
  612. continuationLine = styler.SafeGetCharAt(endLinePrevious-1) == '\\';
  613. }
  614. }
  615. }
  616. // look back to set chPrevNonWhite properly for better regex colouring
  617. if (startPos > 0) {
  618. int back = startPos;
  619. while (--back && IsSpaceEquiv(MaskActive(styler.StyleAt(back))))
  620. ;
  621. if (MaskActive(styler.StyleAt(back)) == SCE_C_OPERATOR) {
  622. chPrevNonWhite = styler.SafeGetCharAt(back);
  623. }
  624. }
  625. StyleContext sc(startPos, length, initStyle, styler, static_cast<unsigned char>(0xff));
  626. LinePPState preproc = vlls.ForLine(lineCurrent);
  627. bool definitionsChanged = false;
  628. // Truncate ppDefineHistory before current line
  629. if (!options.updatePreprocessor)
  630. ppDefineHistory.clear();
  631. std::vector<PPDefinition>::iterator itInvalid = std::find_if(ppDefineHistory.begin(), ppDefineHistory.end(), After(lineCurrent-1));
  632. if (itInvalid != ppDefineHistory.end()) {
  633. ppDefineHistory.erase(itInvalid, ppDefineHistory.end());
  634. definitionsChanged = true;
  635. }
  636. SymbolTable preprocessorDefinitions = preprocessorDefinitionsStart;
  637. for (std::vector<PPDefinition>::iterator itDef = ppDefineHistory.begin(); itDef != ppDefineHistory.end(); ++itDef) {
  638. if (itDef->isUndef)
  639. preprocessorDefinitions.erase(itDef->key);
  640. else
  641. preprocessorDefinitions[itDef->key] = SymbolValue(itDef->value, itDef->arguments);
  642. }
  643. std::string rawStringTerminator = rawStringTerminators.ValueAt(lineCurrent-1);
  644. SparseState<std::string> rawSTNew(lineCurrent);
  645. int activitySet = preproc.IsInactive() ? activeFlag : 0;
  646. const WordClassifier &classifierIdentifiers = subStyles.Classifier(SCE_C_IDENTIFIER);
  647. const WordClassifier &classifierDocKeyWords = subStyles.Classifier(SCE_C_COMMENTDOCKEYWORD);
  648. int lineEndNext = styler.LineEnd(lineCurrent);
  649. for (; sc.More();) {
  650. if (sc.atLineStart) {
  651. // Using MaskActive() is not needed in the following statement.
  652. // Inside inactive preprocessor declaration, state will be reset anyway at the end of this block.
  653. if ((sc.state == SCE_C_STRING) || (sc.state == SCE_C_CHARACTER)) {
  654. // Prevent SCE_C_STRINGEOL from leaking back to previous line which
  655. // ends with a line continuation by locking in the state up to this position.
  656. sc.SetState(sc.state);
  657. }
  658. if ((MaskActive(sc.state) == SCE_C_PREPROCESSOR) && (!continuationLine)) {
  659. sc.SetState(SCE_C_DEFAULT|activitySet);
  660. }
  661. // Reset states to beginning of colourise so no surprises
  662. // if different sets of lines lexed.
  663. visibleChars = 0;
  664. lastWordWasUUID = false;
  665. isIncludePreprocessor = false;
  666. inRERange = false;
  667. if (preproc.IsInactive()) {
  668. activitySet = activeFlag;
  669. sc.SetState(sc.state | activitySet);
  670. }
  671. }
  672. if (sc.atLineEnd) {
  673. lineCurrent++;
  674. lineEndNext = styler.LineEnd(lineCurrent);
  675. vlls.Add(lineCurrent, preproc);
  676. if (rawStringTerminator != "") {
  677. rawSTNew.Set(lineCurrent-1, rawStringTerminator);
  678. }
  679. }
  680. // Handle line continuation generically.
  681. if (sc.ch == '\\') {
  682. if (static_cast<int>((sc.currentPos+1)) >= lineEndNext) {
  683. lineCurrent++;
  684. lineEndNext = styler.LineEnd(lineCurrent);
  685. vlls.Add(lineCurrent, preproc);
  686. sc.Forward();
  687. if (sc.ch == '\r' && sc.chNext == '\n') {
  688. // Even in UTF-8, \r and \n are separate
  689. sc.Forward();
  690. }
  691. continuationLine = true;
  692. sc.Forward();
  693. continue;
  694. }
  695. }
  696. const bool atLineEndBeforeSwitch = sc.atLineEnd;
  697. // Determine if the current state should terminate.
  698. switch (MaskActive(sc.state)) {
  699. case SCE_C_OPERATOR:
  700. sc.SetState(SCE_C_DEFAULT|activitySet);
  701. break;
  702. case SCE_C_NUMBER:
  703. // We accept almost anything because of hex. and number suffixes
  704. if (sc.ch == '_') {
  705. sc.ChangeState(SCE_C_USERLITERAL|activitySet);
  706. } else if (!(setWord.Contains(sc.ch)
  707. || (sc.ch == '\'')
  708. || ((sc.ch == '+' || sc.ch == '-') && (sc.chPrev == 'e' || sc.chPrev == 'E' ||
  709. sc.chPrev == 'p' || sc.chPrev == 'P')))) {
  710. sc.SetState(SCE_C_DEFAULT|activitySet);
  711. }
  712. break;
  713. case SCE_C_USERLITERAL:
  714. if (!(setWord.Contains(sc.ch)))
  715. sc.SetState(SCE_C_DEFAULT|activitySet);
  716. break;
  717. case SCE_C_IDENTIFIER:
  718. if (sc.atLineStart || sc.atLineEnd || !setWord.Contains(sc.ch) || (sc.ch == '.')) {
  719. char s[1000];
  720. if (caseSensitive) {
  721. sc.GetCurrent(s, sizeof(s));
  722. } else {
  723. sc.GetCurrentLowered(s, sizeof(s));
  724. }
  725. if (keywords.InList(s)) {
  726. lastWordWasUUID = strcmp(s, "uuid") == 0;
  727. sc.ChangeState(SCE_C_WORD|activitySet);
  728. } else if (keywords2.InList(s)) {
  729. sc.ChangeState(SCE_C_WORD2|activitySet);
  730. } else if (keywords4.InList(s)) {
  731. sc.ChangeState(SCE_C_GLOBALCLASS|activitySet);
  732. } else {
  733. int subStyle = classifierIdentifiers.ValueFor(s);
  734. if (subStyle >= 0) {
  735. sc.ChangeState(subStyle|activitySet);
  736. }
  737. }
  738. const bool literalString = sc.ch == '\"';
  739. if (literalString || sc.ch == '\'') {
  740. size_t lenS = strlen(s);
  741. const bool raw = literalString && sc.chPrev == 'R' && !setInvalidRawFirst.Contains(sc.chNext);
  742. if (raw)
  743. s[lenS--] = '\0';
  744. bool valid =
  745. (lenS == 0) ||
  746. ((lenS == 1) && ((s[0] == 'L') || (s[0] == 'u') || (s[0] == 'U'))) ||
  747. ((lenS == 2) && literalString && (s[0] == 'u') && (s[1] == '8'));
  748. if (valid) {
  749. if (literalString) {
  750. if (raw) {
  751. // Set the style of the string prefix to SCE_C_STRINGRAW but then change to
  752. // SCE_C_DEFAULT as that allows the raw string start code to run.
  753. sc.ChangeState(SCE_C_STRINGRAW|activitySet);
  754. sc.SetState(SCE_C_DEFAULT|activitySet);
  755. } else {
  756. sc.ChangeState(SCE_C_STRING|activitySet);
  757. }
  758. } else {
  759. sc.ChangeState(SCE_C_CHARACTER|activitySet);
  760. }
  761. } else {
  762. sc.SetState(SCE_C_DEFAULT | activitySet);
  763. }
  764. } else {
  765. sc.SetState(SCE_C_DEFAULT|activitySet);
  766. }
  767. }
  768. break;
  769. case SCE_C_PREPROCESSOR:
  770. if (options.stylingWithinPreprocessor) {
  771. if (IsASpace(sc.ch)) {
  772. sc.SetState(SCE_C_DEFAULT|activitySet);
  773. }
  774. } else if (isStringInPreprocessor && (sc.Match('>') || sc.Match('\"') || sc.atLineEnd)) {
  775. isStringInPreprocessor = false;
  776. } else if (!isStringInPreprocessor) {
  777. if ((isIncludePreprocessor && sc.Match('<')) || sc.Match('\"')) {
  778. isStringInPreprocessor = true;
  779. } else if (sc.Match('/', '*')) {
  780. if (sc.Match("/**") || sc.Match("/*!")) {
  781. sc.SetState(SCE_C_PREPROCESSORCOMMENTDOC|activitySet);
  782. } else {
  783. sc.SetState(SCE_C_PREPROCESSORCOMMENT|activitySet);
  784. }
  785. sc.Forward(); // Eat the *
  786. } else if (sc.Match('/', '/')) {
  787. sc.SetState(SCE_C_DEFAULT|activitySet);
  788. }
  789. }
  790. break;
  791. case SCE_C_PREPROCESSORCOMMENT:
  792. case SCE_C_PREPROCESSORCOMMENTDOC:
  793. if (sc.Match('*', '/')) {
  794. sc.Forward();
  795. sc.ForwardSetState(SCE_C_PREPROCESSOR|activitySet);
  796. continue; // Without advancing in case of '\'.
  797. }
  798. break;
  799. case SCE_C_COMMENT:
  800. if (sc.Match('*', '/')) {
  801. sc.Forward();
  802. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  803. } else {
  804. styleBeforeTaskMarker = SCE_C_COMMENT;
  805. highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
  806. }
  807. break;
  808. case SCE_C_COMMENTDOC:
  809. if (sc.Match('*', '/')) {
  810. sc.Forward();
  811. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  812. } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
  813. // Verify that we have the conditions to mark a comment-doc-keyword
  814. if ((IsASpace(sc.chPrev) || sc.chPrev == '*') && (!IsASpace(sc.chNext))) {
  815. styleBeforeDCKeyword = SCE_C_COMMENTDOC;
  816. sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
  817. }
  818. }
  819. break;
  820. case SCE_C_COMMENTLINE:
  821. if (sc.atLineStart && !continuationLine) {
  822. sc.SetState(SCE_C_DEFAULT|activitySet);
  823. } else {
  824. styleBeforeTaskMarker = SCE_C_COMMENTLINE;
  825. highlightTaskMarker(sc, styler, activitySet, markerList, caseSensitive);
  826. }
  827. break;
  828. case SCE_C_COMMENTLINEDOC:
  829. if (sc.atLineStart && !continuationLine) {
  830. sc.SetState(SCE_C_DEFAULT|activitySet);
  831. } else if (sc.ch == '@' || sc.ch == '\\') { // JavaDoc and Doxygen support
  832. // Verify that we have the conditions to mark a comment-doc-keyword
  833. if ((IsASpace(sc.chPrev) || sc.chPrev == '/' || sc.chPrev == '!') && (!IsASpace(sc.chNext))) {
  834. styleBeforeDCKeyword = SCE_C_COMMENTLINEDOC;
  835. sc.SetState(SCE_C_COMMENTDOCKEYWORD|activitySet);
  836. }
  837. }
  838. break;
  839. case SCE_C_COMMENTDOCKEYWORD:
  840. if ((styleBeforeDCKeyword == SCE_C_COMMENTDOC) && sc.Match('*', '/')) {
  841. sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR);
  842. sc.Forward();
  843. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  844. seenDocKeyBrace = false;
  845. } else if (sc.ch == '[' || sc.ch == '{') {
  846. seenDocKeyBrace = true;
  847. } else if (!setDoxygen.Contains(sc.ch)
  848. && !(seenDocKeyBrace && (sc.ch == ',' || sc.ch == '.'))) {
  849. char s[100];
  850. if (caseSensitive) {
  851. sc.GetCurrent(s, sizeof(s));
  852. } else {
  853. sc.GetCurrentLowered(s, sizeof(s));
  854. }
  855. if (!(IsASpace(sc.ch) || (sc.ch == 0))) {
  856. sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
  857. } else if (!keywords3.InList(s + 1)) {
  858. int subStyleCDKW = classifierDocKeyWords.ValueFor(s+1);
  859. if (subStyleCDKW >= 0) {
  860. sc.ChangeState(subStyleCDKW|activitySet);
  861. } else {
  862. sc.ChangeState(SCE_C_COMMENTDOCKEYWORDERROR|activitySet);
  863. }
  864. }
  865. sc.SetState(styleBeforeDCKeyword|activitySet);
  866. seenDocKeyBrace = false;
  867. }
  868. break;
  869. case SCE_C_STRING:
  870. if (sc.atLineEnd) {
  871. sc.ChangeState(SCE_C_STRINGEOL|activitySet);
  872. } else if (isIncludePreprocessor) {
  873. if (sc.ch == '>') {
  874. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  875. isIncludePreprocessor = false;
  876. }
  877. } else if (sc.ch == '\\') {
  878. if (options.escapeSequence) {
  879. sc.SetState(SCE_C_ESCAPESEQUENCE|activitySet);
  880. escapeSeq.resetEscapeState(sc.chNext);
  881. }
  882. sc.Forward(); // Skip all characters after the backslash
  883. } else if (sc.ch == '\"') {
  884. if (sc.chNext == '_') {
  885. sc.ChangeState(SCE_C_USERLITERAL|activitySet);
  886. } else {
  887. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  888. }
  889. }
  890. break;
  891. case SCE_C_ESCAPESEQUENCE:
  892. escapeSeq.digitsLeft--;
  893. if (!escapeSeq.atEscapeEnd(sc.ch)) {
  894. break;
  895. }
  896. if (sc.ch == '"') {
  897. sc.SetState(SCE_C_STRING|activitySet);
  898. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  899. } else if (sc.ch == '\\') {
  900. escapeSeq.resetEscapeState(sc.chNext);
  901. sc.Forward();
  902. } else {
  903. sc.SetState(SCE_C_STRING|activitySet);
  904. if (sc.atLineEnd) {
  905. sc.ChangeState(SCE_C_STRINGEOL|activitySet);
  906. }
  907. }
  908. break;
  909. case SCE_C_HASHQUOTEDSTRING:
  910. if (sc.ch == '\\') {
  911. if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
  912. sc.Forward();
  913. }
  914. } else if (sc.ch == '\"') {
  915. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  916. }
  917. break;
  918. case SCE_C_STRINGRAW:
  919. if (sc.Match(rawStringTerminator.c_str())) {
  920. for (size_t termPos=rawStringTerminator.size(); termPos; termPos--)
  921. sc.Forward();
  922. sc.SetState(SCE_C_DEFAULT|activitySet);
  923. rawStringTerminator = "";
  924. }
  925. break;
  926. case SCE_C_CHARACTER:
  927. if (sc.atLineEnd) {
  928. sc.ChangeState(SCE_C_STRINGEOL|activitySet);
  929. } else if (sc.ch == '\\') {
  930. if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
  931. sc.Forward();
  932. }
  933. } else if (sc.ch == '\'') {
  934. if (sc.chNext == '_') {
  935. sc.ChangeState(SCE_C_USERLITERAL|activitySet);
  936. } else {
  937. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  938. }
  939. }
  940. break;
  941. case SCE_C_REGEX:
  942. if (sc.atLineStart) {
  943. sc.SetState(SCE_C_DEFAULT|activitySet);
  944. } else if (! inRERange && sc.ch == '/') {
  945. sc.Forward();
  946. while ((sc.ch < 0x80) && islower(sc.ch))
  947. sc.Forward(); // gobble regex flags
  948. sc.SetState(SCE_C_DEFAULT|activitySet);
  949. } else if (sc.ch == '\\' && (static_cast<int>(sc.currentPos+1) < lineEndNext)) {
  950. // Gobble up the escaped character
  951. sc.Forward();
  952. } else if (sc.ch == '[') {
  953. inRERange = true;
  954. } else if (sc.ch == ']') {
  955. inRERange = false;
  956. }
  957. break;
  958. case SCE_C_STRINGEOL:
  959. if (sc.atLineStart) {
  960. sc.SetState(SCE_C_DEFAULT|activitySet);
  961. }
  962. break;
  963. case SCE_C_VERBATIM:
  964. if (options.verbatimStringsAllowEscapes && (sc.ch == '\\')) {
  965. sc.Forward(); // Skip all characters after the backslash
  966. } else if (sc.ch == '\"') {
  967. if (sc.chNext == '\"') {
  968. sc.Forward();
  969. } else {
  970. sc.ForwardSetState(SCE_C_DEFAULT|activitySet);
  971. }
  972. }
  973. break;
  974. case SCE_C_TRIPLEVERBATIM:
  975. if (sc.Match("\"\"\"")) {
  976. while (sc.Match('"')) {
  977. sc.Forward();
  978. }
  979. sc.SetState(SCE_C_DEFAULT|activitySet);
  980. }
  981. break;
  982. case SCE_C_UUID:
  983. if (sc.atLineEnd || sc.ch == ')') {
  984. sc.SetState(SCE_C_DEFAULT|activitySet);
  985. }
  986. break;
  987. case SCE_C_TASKMARKER:
  988. if (isoperator(sc.ch) || IsASpace(sc.ch)) {
  989. sc.SetState(styleBeforeTaskMarker|activitySet);
  990. styleBeforeTaskMarker = SCE_C_DEFAULT;
  991. }
  992. }
  993. if (sc.atLineEnd && !atLineEndBeforeSwitch) {
  994. // State exit processing consumed characters up to end of line.
  995. lineCurrent++;
  996. lineEndNext = styler.LineEnd(lineCurrent);
  997. vlls.Add(lineCurrent, preproc);
  998. }
  999. // Determine if a new state should be entered.
  1000. if (MaskActive(sc.state) == SCE_C_DEFAULT) {
  1001. if (sc.Match('@', '\"')) {
  1002. sc.SetState(SCE_C_VERBATIM|activitySet);
  1003. sc.Forward();
  1004. } else if (options.triplequotedStrings && sc.Match("\"\"\"")) {
  1005. sc.SetState(SCE_C_TRIPLEVERBATIM|activitySet);
  1006. sc.Forward(2);
  1007. } else if (options.hashquotedStrings && sc.Match('#', '\"')) {
  1008. sc.SetState(SCE_C_HASHQUOTEDSTRING|activitySet);
  1009. sc.Forward();
  1010. } else if (options.backQuotedStrings && sc.Match('`')) {
  1011. sc.SetState(SCE_C_STRINGRAW|activitySet);
  1012. rawStringTerminator = "`";
  1013. } else if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
  1014. if (lastWordWasUUID) {
  1015. sc.SetState(SCE_C_UUID|activitySet);
  1016. lastWordWasUUID = false;
  1017. } else {
  1018. sc.SetState(SCE_C_NUMBER|activitySet);
  1019. }
  1020. } else if (!sc.atLineEnd && (setWordStart.Contains(sc.ch) || (sc.ch == '@'))) {
  1021. if (lastWordWasUUID) {
  1022. sc.SetState(SCE_C_UUID|activitySet);
  1023. lastWordWasUUID = false;
  1024. } else {
  1025. sc.SetState(SCE_C_IDENTIFIER|activitySet);
  1026. }
  1027. } else if (sc.Match('/', '*')) {
  1028. if (sc.Match("/**") || sc.Match("/*!")) { // Support of Qt/Doxygen doc. style
  1029. sc.SetState(SCE_C_COMMENTDOC|activitySet);
  1030. } else {
  1031. sc.SetState(SCE_C_COMMENT|activitySet);
  1032. }
  1033. sc.Forward(); // Eat the * so it isn't used for the end of the comment
  1034. } else if (sc.Match('/', '/')) {
  1035. if ((sc.Match("///") && !sc.Match("////")) || sc.Match("//!"))
  1036. // Support of Qt/Doxygen doc. style
  1037. sc.SetState(SCE_C_COMMENTLINEDOC|activitySet);
  1038. else
  1039. sc.SetState(SCE_C_COMMENTLINE|activitySet);
  1040. } else if (sc.ch == '/'
  1041. && (setOKBeforeRE.Contains(chPrevNonWhite)
  1042. || followsReturnKeyword(sc, styler))
  1043. && (!setCouldBePostOp.Contains(chPrevNonWhite)
  1044. || !FollowsPostfixOperator(sc, styler))) {
  1045. sc.SetState(SCE_C_REGEX|activitySet); // JavaScript's RegEx
  1046. inRERange = false;
  1047. } else if (sc.ch == '\"') {
  1048. if (sc.chPrev == 'R') {
  1049. styler.Flush();
  1050. if (MaskActive(styler.StyleAt(sc.currentPos - 1)) == SCE_C_STRINGRAW) {
  1051. sc.SetState(SCE_C_STRINGRAW|activitySet);
  1052. rawStringTerminator = ")";
  1053. for (int termPos = sc.currentPos + 1;; termPos++) {
  1054. char chTerminator = styler.SafeGetCharAt(termPos, '(');
  1055. if (chTerminator == '(')
  1056. break;
  1057. rawStringTerminator += chTerminator;
  1058. }
  1059. rawStringTerminator += '\"';
  1060. } else {
  1061. sc.SetState(SCE_C_STRING|activitySet);
  1062. }
  1063. } else {
  1064. sc.SetState(SCE_C_STRING|activitySet);
  1065. }
  1066. isIncludePreprocessor = false; // ensure that '>' won't end the string
  1067. } else if (isIncludePreprocessor && sc.ch == '<') {
  1068. sc.SetState(SCE_C_STRING|activitySet);
  1069. } else if (sc.ch == '\'') {
  1070. sc.SetState(SCE_C_CHARACTER|activitySet);
  1071. } else if (sc.ch == '#' && visibleChars == 0) {
  1072. // Preprocessor commands are alone on their line
  1073. sc.SetState(SCE_C_PREPROCESSOR|activitySet);
  1074. // Skip whitespace between # and preprocessor word
  1075. do {
  1076. sc.Forward();
  1077. } while ((sc.ch == ' ' || sc.ch == '\t') && sc.More());
  1078. if (sc.atLineEnd) {
  1079. sc.SetState(SCE_C_DEFAULT|activitySet);
  1080. } else if (sc.Match("include")) {
  1081. isIncludePreprocessor = true;
  1082. } else {
  1083. if (options.trackPreprocessor) {
  1084. if (sc.Match("ifdef") || sc.Match("ifndef")) {
  1085. bool isIfDef = sc.Match("ifdef");
  1086. int i = isIfDef ? 5 : 6;
  1087. std::string restOfLine = GetRestOfLine(styler, sc.currentPos + i + 1, false);
  1088. bool foundDef = preprocessorDefinitions.find(restOfLine) != preprocessorDefinitions.end();
  1089. preproc.StartSection(isIfDef == foundDef);
  1090. } else if (sc.Match("if")) {
  1091. std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
  1092. bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
  1093. preproc.StartSection(ifGood);
  1094. } else if (sc.Match("else")) {
  1095. if (!preproc.CurrentIfTaken()) {
  1096. preproc.InvertCurrentLevel();
  1097. activitySet = preproc.IsInactive() ? activeFlag : 0;
  1098. if (!activitySet)
  1099. sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
  1100. } else if (!preproc.IsInactive()) {
  1101. preproc.InvertCurrentLevel();
  1102. activitySet = preproc.IsInactive() ? activeFlag : 0;
  1103. if (!activitySet)
  1104. sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
  1105. }
  1106. } else if (sc.Match("elif")) {
  1107. // Ensure only one chosen out of #if .. #elif .. #elif .. #else .. #endif
  1108. if (!preproc.CurrentIfTaken()) {
  1109. // Similar to #if
  1110. std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 2, true);
  1111. bool ifGood = EvaluateExpression(restOfLine, preprocessorDefinitions);
  1112. if (ifGood) {
  1113. preproc.InvertCurrentLevel();
  1114. activitySet = preproc.IsInactive() ? activeFlag : 0;
  1115. if (!activitySet)
  1116. sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
  1117. }
  1118. } else if (!preproc.IsInactive()) {
  1119. preproc.InvertCurrentLevel();
  1120. activitySet = preproc.IsInactive() ? activeFlag : 0;
  1121. if (!activitySet)
  1122. sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
  1123. }
  1124. } else if (sc.Match("endif")) {
  1125. preproc.EndSection();
  1126. activitySet = preproc.IsInactive() ? activeFlag : 0;
  1127. sc.ChangeState(SCE_C_PREPROCESSOR|activitySet);
  1128. } else if (sc.Match("define")) {
  1129. if (options.updatePreprocessor && !preproc.IsInactive()) {
  1130. std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 6, true);
  1131. size_t startName = 0;
  1132. while ((startName < restOfLine.length()) && IsSpaceOrTab(restOfLine[startName]))
  1133. startName++;
  1134. size_t endName = startName;
  1135. while ((endName < restOfLine.length()) && setWord.Contains(static_cast<unsigned char>(restOfLine[endName])))
  1136. endName++;
  1137. std::string key = restOfLine.substr(startName, endName-startName);
  1138. if ((endName < restOfLine.length()) && (restOfLine.at(endName) == '(')) {
  1139. // Macro
  1140. size_t endArgs = endName;
  1141. while ((endArgs < restOfLine.length()) && (restOfLine[endArgs] != ')'))
  1142. endArgs++;
  1143. std::string args = restOfLine.substr(endName + 1, endArgs - endName - 1);
  1144. size_t startValue = endArgs+1;
  1145. while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
  1146. startValue++;
  1147. std::string value;
  1148. if (startValue < restOfLine.length())
  1149. value = restOfLine.substr(startValue);
  1150. preprocessorDefinitions[key] = SymbolValue(value, args);
  1151. ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value, false, args));
  1152. definitionsChanged = true;
  1153. } else {
  1154. // Value
  1155. size_t startValue = endName;
  1156. while ((startValue < restOfLine.length()) && IsSpaceOrTab(restOfLine[startValue]))
  1157. startValue++;
  1158. std::string value = restOfLine.substr(startValue);
  1159. preprocessorDefinitions[key] = value;
  1160. ppDefineHistory.push_back(PPDefinition(lineCurrent, key, value));
  1161. definitionsChanged = true;
  1162. }
  1163. }
  1164. } else if (sc.Match("undef")) {
  1165. if (options.updatePreprocessor && !preproc.IsInactive()) {
  1166. const std::string restOfLine = GetRestOfLine(styler, sc.currentPos + 5, false);
  1167. std::vector<std::string> tokens = Tokenize(restOfLine);
  1168. if (tokens.size() >= 1) {
  1169. const std::string key = tokens[0];
  1170. preprocessorDefinitions.erase(key);
  1171. ppDefineHistory.push_back(PPDefinition(lineCurrent, key, "", true));
  1172. definitionsChanged = true;
  1173. }
  1174. }
  1175. }
  1176. }
  1177. }
  1178. } else if (isoperator(sc.ch)) {
  1179. sc.SetState(SCE_C_OPERATOR|activitySet);
  1180. }
  1181. }
  1182. if (!IsASpace(sc.ch) && !IsSpaceEquiv(MaskActive(sc.state))) {
  1183. chPrevNonWhite = sc.ch;
  1184. visibleChars++;
  1185. }
  1186. continuationLine = false;
  1187. sc.Forward();
  1188. }
  1189. const bool rawStringsChanged = rawStringTerminators.Merge(rawSTNew, lineCurrent);
  1190. if (definitionsChanged || rawStringsChanged)
  1191. styler.ChangeLexerState(startPos, startPos + length);
  1192. sc.Complete();
  1193. }
  1194. // Store both the current line's fold level and the next lines in the
  1195. // level store to make it easy to pick up with each increment
  1196. // and to make it possible to fiddle the current level for "} else {".
  1197. void SCI_METHOD LexerCPP::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
  1198. if (!options.fold)
  1199. return;
  1200. LexAccessor styler(pAccess);
  1201. unsigned int endPos = startPos + length;
  1202. int visibleChars = 0;
  1203. bool inLineComment = false;
  1204. int lineCurrent = styler.GetLine(startPos);
  1205. int levelCurrent = SC_FOLDLEVELBASE;
  1206. if (lineCurrent > 0)
  1207. levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
  1208. unsigned int lineStartNext = styler.LineStart(lineCurrent+1);
  1209. int levelMinCurrent = levelCurrent;
  1210. int levelNext = levelCurrent;
  1211. char chNext = styler[startPos];
  1212. int styleNext = MaskActive(styler.StyleAt(startPos));
  1213. int style = MaskActive(initStyle);
  1214. const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
  1215. for (unsigned int i = startPos; i < endPos; i++) {
  1216. char ch = chNext;
  1217. chNext = styler.SafeGetCharAt(i + 1);
  1218. int stylePrev = style;
  1219. style = styleNext;
  1220. styleNext = MaskActive(styler.StyleAt(i + 1));
  1221. bool atEOL = i == (lineStartNext-1);
  1222. if ((style == SCE_C_COMMENTLINE) || (style == SCE_C_COMMENTLINEDOC))
  1223. inLineComment = true;
  1224. if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
  1225. if (!IsStreamCommentStyle(stylePrev)) {
  1226. levelNext++;
  1227. } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
  1228. // Comments don't end at end of line and the next character may be unstyled.
  1229. levelNext--;
  1230. }
  1231. }
  1232. if (options.foldComment && options.foldCommentExplicit && ((style == SCE_C_COMMENTLINE) || options.foldExplicitAnywhere)) {
  1233. if (userDefinedFoldMarkers) {
  1234. if (styler.Match(i, options.foldExplicitStart.c_str())) {
  1235. levelNext++;
  1236. } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
  1237. levelNext--;
  1238. }
  1239. } else {
  1240. if ((ch == '/') && (chNext == '/')) {
  1241. char chNext2 = styler.SafeGetCharAt(i + 2);
  1242. if (chNext2 == '{') {
  1243. levelNext++;
  1244. } else if (chNext2 == '}') {
  1245. levelNext--;
  1246. }
  1247. }
  1248. }
  1249. }
  1250. if (options.foldPreprocessor && (style == SCE_C_PREPROCESSOR)) {
  1251. if (ch == '#') {
  1252. unsigned int j = i + 1;
  1253. while ((j < endPos) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
  1254. j++;
  1255. }
  1256. if (styler.Match(j, "region") || styler.Match(j, "if")) {
  1257. levelNext++;
  1258. } else if (styler.Match(j, "end")) {
  1259. levelNext--;
  1260. }
  1261. }
  1262. }
  1263. if (options.foldSyntaxBased && (style == SCE_C_OPERATOR)) {
  1264. if (ch == '{' || ch == '[') {
  1265. // Measure the minimum before a '{' to allow
  1266. // folding on "} else {"
  1267. if (levelMinCurrent > levelNext) {
  1268. levelMinCurrent = levelNext;
  1269. }
  1270. levelNext++;
  1271. } else if (ch == '}' || ch == ']') {
  1272. levelNext--;
  1273. }
  1274. }
  1275. if (!IsASpace(ch))
  1276. visibleChars++;
  1277. if (atEOL || (i == endPos-1)) {
  1278. int levelUse = levelCurrent;
  1279. if (options.foldSyntaxBased && options.foldAtElse) {
  1280. levelUse = levelMinCurrent;
  1281. }
  1282. int lev = levelUse | levelNext << 16;
  1283. if (visibleChars == 0 && options.foldCompact)
  1284. lev |= SC_FOLDLEVELWHITEFLAG;
  1285. if (levelUse < levelNext)
  1286. lev |= SC_FOLDLEVELHEADERFLAG;
  1287. if (lev != styler.LevelAt(lineCurrent)) {
  1288. styler.SetLevel(lineCurrent, lev);
  1289. }
  1290. lineCurrent++;
  1291. lineStartNext = styler.LineStart(lineCurrent+1);
  1292. levelCurrent = levelNext;
  1293. levelMinCurrent = levelCurrent;
  1294. if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
  1295. // There is an empty line at end of file so give it same level and empty
  1296. styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
  1297. }
  1298. visibleChars = 0;
  1299. inLineComment = false;
  1300. }
  1301. }
  1302. }
  1303. void LexerCPP::EvaluateTokens(std::vector<std::string> &tokens, const SymbolTable &preprocessorDefinitions) {
  1304. // Remove whitespace tokens
  1305. tokens.erase(std::remove_if(tokens.begin(), tokens.end(), OnlySpaceOrTab), tokens.end());
  1306. // Evaluate defined statements to either 0 or 1
  1307. for (size_t i=0; (i+1)<tokens.size();) {
  1308. if (tokens[i] == "defined") {
  1309. const char *val = "0";
  1310. if (tokens[i+1] == "(") {
  1311. if (((i + 2)<tokens.size()) && (tokens[i + 2] == ")")) {
  1312. // defined()
  1313. tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 3);
  1314. } else if (((i+3)<tokens.size()) && (tokens[i+3] == ")")) {
  1315. // defined(<identifier>)
  1316. SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+2]);
  1317. if (it != preprocessorDefinitions.end()) {
  1318. val = "1";
  1319. }
  1320. tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 4);
  1321. } else {
  1322. // Spurious '(' so erase as more likely to result in false
  1323. tokens.erase(tokens.begin() + i + 1, tokens.begin() + i + 2);
  1324. }
  1325. } else {
  1326. // defined <identifier>
  1327. SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i+1]);
  1328. if (it != preprocessorDefinitions.end()) {
  1329. val = "1";
  1330. }
  1331. }
  1332. tokens[i] = val;
  1333. } else {
  1334. i++;
  1335. }
  1336. }
  1337. // Evaluate identifiers
  1338. const size_t maxIterations = 100;
  1339. size_t iterations = 0; // Limit number of iterations in case there is a recursive macro.
  1340. for (size_t i = 0; (i<tokens.size()) && (iterations < maxIterations);) {
  1341. iterations++;
  1342. if (setWordStart.Contains(static_cast<unsigned char>(tokens[i][0]))) {
  1343. SymbolTable::const_iterator it = preprocessorDefinitions.find(tokens[i]);
  1344. if (it != preprocessorDefinitions.end()) {
  1345. // Tokenize value
  1346. std::vector<std::string> macroTokens = Tokenize(it->second.value);
  1347. if (it->second.IsMacro()) {
  1348. if ((i + 1 < tokens.size()) && (tokens.at(i + 1) == "(")) {
  1349. // Create map of argument name to value
  1350. std::vector<std::string> argumentNames = StringSplit(it->second.arguments, ',');
  1351. std::map<std::string, std::string> arguments;
  1352. size_t arg = 0;
  1353. size_t tok = i+2;
  1354. while ((tok < tokens.size()) && (arg < argumentNames.size()) && (tokens.at(tok) != ")")) {
  1355. if (tokens.at(tok) != ",") {
  1356. arguments[argumentNames.at(arg)] = tokens.at(tok);
  1357. arg++;
  1358. }
  1359. tok++;
  1360. }
  1361. // Remove invocation
  1362. tokens.erase(tokens.begin() + i, tokens.begin() + tok + 1);
  1363. // Substitute values into macro
  1364. macroTokens.erase(std::remove_if(macroTokens.begin(), macroTokens.end(), OnlySpaceOrTab), macroTokens.end());
  1365. for (size_t iMacro = 0; iMacro < macroTokens.size();) {
  1366. if (setWordStart.Contains(static_cast<unsigned char>(macroTokens[iMacro][0]))) {
  1367. std::map<std::string, std::string>::const_iterator itFind = arguments.find(macroTokens[iMacro]);
  1368. if (itFind != arguments.end()) {
  1369. // TODO: Possible that value will be expression so should insert tokenized form
  1370. macroTokens[iMacro] = itFind->second;
  1371. }
  1372. }
  1373. iMacro++;
  1374. }
  1375. // Insert results back into tokens
  1376. tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
  1377. } else {
  1378. i++;
  1379. }
  1380. } else {
  1381. // Remove invocation
  1382. tokens.erase(tokens.begin() + i);
  1383. // Insert results back into tokens
  1384. tokens.insert(tokens.begin() + i, macroTokens.begin(), macroTokens.end());
  1385. }
  1386. } else {
  1387. // Identifier not found
  1388. tokens.erase(tokens.begin() + i);
  1389. }
  1390. } else {
  1391. i++;
  1392. }
  1393. }
  1394. // Find bracketed subexpressions and recurse on them
  1395. BracketPair bracketPair = FindBracketPair(tokens);
  1396. while (bracketPair.itBracket != tokens.end()) {
  1397. std::vector<std::string> inBracket(bracketPair.itBracket + 1, bracketPair.itEndBracket);
  1398. EvaluateTokens(inBracket, preprocessorDefinitions);
  1399. // The insertion is done before the removal because there were failures with the opposite approach
  1400. tokens.insert(bracketPair.itBracket, inBracket.begin(), inBracket.end());
  1401. bracketPair = FindBracketPair(tokens);
  1402. tokens.erase(bracketPair.itBracket, bracketPair.itEndBracket + 1);
  1403. bracketPair = FindBracketPair(tokens);
  1404. }
  1405. // Evaluate logical negations
  1406. for (size_t j=0; (j+1)<tokens.size();) {
  1407. if (setNegationOp.Contains(tokens[j][0])) {
  1408. int isTrue = atoi(tokens[j+1].c_str());
  1409. if (tokens[j] == "!")
  1410. isTrue = !isTrue;
  1411. std::vector<std::string>::iterator itInsert =
  1412. tokens.erase(tokens.begin() + j, tokens.begin() + j + 2);
  1413. tokens.insert(itInsert, isTrue ? "1" : "0");
  1414. } else {
  1415. j++;
  1416. }
  1417. }
  1418. // Evaluate expressions in precedence order
  1419. enum precedence { precArithmetic, precRelative, precLogical };
  1420. for (int prec=precArithmetic; prec <= precLogical; prec++) {
  1421. // Looking at 3 tokens at a time so end at 2 before end
  1422. for (size_t k=0; (k+2)<tokens.size();) {
  1423. char chOp = tokens[k+1][0];
  1424. if (
  1425. ((prec==precArithmetic) && setArithmethicOp.Contains(chOp)) ||
  1426. ((prec==precRelative) && setRelOp.Contains(chOp)) ||
  1427. ((prec==precLogical) && setLogicalOp.Contains(chOp))
  1428. ) {
  1429. int valA = atoi(tokens[k].c_str());
  1430. int valB = atoi(tokens[k+2].c_str());
  1431. int result = 0;
  1432. if (tokens[k+1] == "+")
  1433. result = valA + valB;
  1434. else if (tokens[k+1] == "-")
  1435. result = valA - valB;
  1436. else if (tokens[k+1] == "*")
  1437. result = valA * valB;
  1438. else if (tokens[k+1] == "/")
  1439. result = valA / (valB ? valB : 1);
  1440. else if (tokens[k+1] == "%")
  1441. result = valA % (valB ? valB : 1);
  1442. else if (tokens[k+1] == "<")
  1443. result = valA < valB;
  1444. else if (tokens[k+1] == "<=")
  1445. result = valA <= valB;
  1446. else if (tokens[k+1] == ">")
  1447. result = valA > valB;
  1448. else if (tokens[k+1] == ">=")
  1449. result = valA >= valB;
  1450. else if (tokens[k+1] == "==")
  1451. result = valA == valB;
  1452. else if (tokens[k+1] == "!=")
  1453. result = valA != valB;
  1454. else if (tokens[k+1] == "||")
  1455. result = valA || valB;
  1456. else if (tokens[k+1] == "&&")
  1457. result = valA && valB;
  1458. char sResult[30];
  1459. sprintf(sResult, "%d", result);
  1460. std::vector<std::string>::iterator itInsert =
  1461. tokens.erase(tokens.begin() + k, tokens.begin() + k + 3);
  1462. tokens.insert(itInsert, sResult);
  1463. } else {
  1464. k++;
  1465. }
  1466. }
  1467. }
  1468. }
  1469. std::vector<std::string> LexerCPP::Tokenize(const std::string &expr) const {
  1470. // Break into tokens
  1471. std::vector<std::string> tokens;
  1472. const char *cp = expr.c_str();
  1473. while (*cp) {
  1474. std::string word;
  1475. if (setWord.Contains(static_cast<unsigned char>(*cp))) {
  1476. // Identifiers and numbers
  1477. while (setWord.Contains(static_cast<unsigned char>(*cp))) {
  1478. word += *cp;
  1479. cp++;
  1480. }
  1481. } else if (IsSpaceOrTab(*cp)) {
  1482. while (IsSpaceOrTab(*cp)) {
  1483. word += *cp;
  1484. cp++;
  1485. }
  1486. } else if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
  1487. word += *cp;
  1488. cp++;
  1489. if (setRelOp.Contains(static_cast<unsigned char>(*cp))) {
  1490. word += *cp;
  1491. cp++;
  1492. }
  1493. } else if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
  1494. word += *cp;
  1495. cp++;
  1496. if (setLogicalOp.Contains(static_cast<unsigned char>(*cp))) {
  1497. word += *cp;
  1498. cp++;
  1499. }
  1500. } else {
  1501. // Should handle strings, characters, and comments here
  1502. word += *cp;
  1503. cp++;
  1504. }
  1505. tokens.push_back(word);
  1506. }
  1507. return tokens;
  1508. }
  1509. bool LexerCPP::EvaluateExpression(const std::string &expr, const SymbolTable &preprocessorDefinitions) {
  1510. std::vector<std::string> tokens = Tokenize(expr);
  1511. EvaluateTokens(tokens, preprocessorDefinitions);
  1512. // "0" or "" -> false else true
  1513. bool isFalse = tokens.empty() ||
  1514. ((tokens.size() == 1) && ((tokens[0] == "") || tokens[0] == "0"));
  1515. return !isFalse;
  1516. }
  1517. LexerModule lmCPP(SCLEX_CPP, LexerCPP::LexerFactoryCPP, "cpp", cppWordLists);
  1518. LexerModule lmCPPNoCase(SCLEX_CPPNOCASE, LexerCPP::LexerFactoryCPPInsensitive, "cppnocase", cppWordLists);