PageRenderTime 60ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/src/main/java/com/laytonsmith/core/MethodScriptCompiler.java

https://github.com/sk89q/commandhelper
Java | 2934 lines | 2163 code | 120 blank | 651 comment | 569 complexity | 50b12a10ffc4b42fd9564cdad46af309 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. package com.laytonsmith.core;
  2. import com.laytonsmith.PureUtilities.Common.FileUtil;
  3. import com.laytonsmith.PureUtilities.Common.StringUtils;
  4. import com.laytonsmith.PureUtilities.SmartComment;
  5. import com.laytonsmith.annotations.OperatorPreferred;
  6. import com.laytonsmith.annotations.breakable;
  7. import com.laytonsmith.annotations.nolinking;
  8. import com.laytonsmith.annotations.unbreakable;
  9. import com.laytonsmith.core.Optimizable.OptimizationOption;
  10. import com.laytonsmith.core.compiler.BranchStatement;
  11. import com.laytonsmith.core.compiler.CompilerEnvironment;
  12. import com.laytonsmith.core.compiler.CompilerWarning;
  13. import com.laytonsmith.core.compiler.FileOptions;
  14. import com.laytonsmith.core.compiler.KeywordList;
  15. import com.laytonsmith.core.compiler.TokenStream;
  16. import com.laytonsmith.core.compiler.analysis.StaticAnalysis;
  17. import com.laytonsmith.core.compiler.keywords.ObjectDefinitionKeyword;
  18. import com.laytonsmith.core.constructs.CBareString;
  19. import com.laytonsmith.core.constructs.CDecimal;
  20. import com.laytonsmith.core.constructs.CDouble;
  21. import com.laytonsmith.core.constructs.CFunction;
  22. import com.laytonsmith.core.constructs.CInt;
  23. import com.laytonsmith.core.constructs.CKeyword;
  24. import com.laytonsmith.core.constructs.CLabel;
  25. import com.laytonsmith.core.constructs.CNull;
  26. import com.laytonsmith.core.constructs.CPreIdentifier;
  27. import com.laytonsmith.core.constructs.CSlice;
  28. import com.laytonsmith.core.constructs.CString;
  29. import com.laytonsmith.core.constructs.CSymbol;
  30. import com.laytonsmith.core.constructs.CVoid;
  31. import com.laytonsmith.core.constructs.Construct;
  32. import com.laytonsmith.core.constructs.IVariable;
  33. import com.laytonsmith.core.constructs.Target;
  34. import com.laytonsmith.core.constructs.Token;
  35. import com.laytonsmith.core.constructs.Token.TType;
  36. import com.laytonsmith.core.constructs.Variable;
  37. import com.laytonsmith.core.environments.Environment;
  38. import com.laytonsmith.core.environments.GlobalEnv;
  39. import com.laytonsmith.core.exceptions.CRE.CRECastException;
  40. import com.laytonsmith.core.exceptions.CRE.CRERangeException;
  41. import com.laytonsmith.core.exceptions.ConfigCompileException;
  42. import com.laytonsmith.core.exceptions.ConfigCompileGroupException;
  43. import com.laytonsmith.core.exceptions.ConfigRuntimeException;
  44. import com.laytonsmith.core.exceptions.ProgramFlowManipulationException;
  45. import com.laytonsmith.core.extensions.ExtensionManager;
  46. import com.laytonsmith.core.extensions.ExtensionTracker;
  47. import com.laytonsmith.core.functions.Compiler;
  48. import com.laytonsmith.core.functions.Compiler.__autoconcat__;
  49. import com.laytonsmith.core.functions.Compiler.__cbrace__;
  50. import com.laytonsmith.core.functions.Compiler.p;
  51. import com.laytonsmith.core.functions.Compiler.__smart_string__;
  52. import com.laytonsmith.core.functions.Math.neg;
  53. import com.laytonsmith.core.functions.ControlFlow;
  54. import com.laytonsmith.core.functions.DataHandling;
  55. import com.laytonsmith.core.functions.Function;
  56. import com.laytonsmith.core.functions.FunctionBase;
  57. import com.laytonsmith.core.functions.FunctionList;
  58. import com.laytonsmith.core.functions.IncludeCache;
  59. import com.laytonsmith.core.functions.ArrayHandling.array_get;
  60. import com.laytonsmith.core.natives.interfaces.Mixed;
  61. import com.laytonsmith.persistence.DataSourceException;
  62. import java.io.File;
  63. import java.io.IOException;
  64. import java.net.URISyntaxException;
  65. import java.util.ArrayList;
  66. import java.util.Arrays;
  67. import java.util.Collection;
  68. import java.util.Collections;
  69. import java.util.EmptyStackException;
  70. import java.util.EnumSet;
  71. import java.util.HashMap;
  72. import java.util.HashSet;
  73. import java.util.Iterator;
  74. import java.util.LinkedList;
  75. import java.util.List;
  76. import java.util.ListIterator;
  77. import java.util.Map;
  78. import java.util.NoSuchElementException;
  79. import java.util.Objects;
  80. import java.util.Set;
  81. import java.util.Stack;
  82. import java.util.concurrent.atomic.AtomicInteger;
  83. import java.util.regex.Pattern;
  84. /**
  85. * The MethodScriptCompiler class handles the various stages of compilation and provides helper methods for execution of
  86. * the compiled trees.
  87. */
  88. public final class MethodScriptCompiler {
  89. private static final EnumSet<Optimizable.OptimizationOption> NO_OPTIMIZATIONS = EnumSet.noneOf(Optimizable.OptimizationOption.class);
  90. private MethodScriptCompiler() {
  91. }
  92. private static final Pattern VAR_PATTERN = Pattern.compile("\\$[\\p{L}0-9_]+");
  93. private static final Pattern IVAR_PATTERN = Pattern.compile(IVariable.VARIABLE_NAME_REGEX);
  94. /**
  95. * Lexes the script, and turns it into a token stream. This looks through the script character by character.
  96. *
  97. * @param script The script to lex
  98. * @param file The file this script came from, or potentially null if the code is from a dynamic source
  99. * @param inPureMScript If the script is in pure MethodScript, this should be true. Pure MethodScript is defined as
  100. * code that doesn't have command alias wrappers.
  101. * @return A stream of tokens
  102. * @throws ConfigCompileException If compilation fails due to bad syntax
  103. */
  104. public static TokenStream lex(String script, Environment env, File file, boolean inPureMScript)
  105. throws ConfigCompileException {
  106. return lex(script, env, file, inPureMScript, false);
  107. }
  108. /**
  109. * Lexes the script, and turns it into a token stream. This looks through the script character by character.
  110. *
  111. * @param script The script to lex
  112. * @param env
  113. * @param file The file this script came from, or potentially null if the code is from a dynamic source
  114. * @param inPureMScript If the script is in pure MethodScript, this should be true. Pure MethodScript is defined as
  115. * code that doesn't have command alias wrappers.
  116. * @param saveAllTokens If this script is planning to be compiled, then this value should always be false, however,
  117. * if the calling code needs all tokens for informational purposes (and doesn't plan on actually compiling the code)
  118. * then this can be true. If true, all tokens are saved, including comments and (some) whitespace. Given this lexing
  119. * stream, the exact source code could be re-constructed.
  120. *
  121. * A note on whitespace: The whitespace tokens are not guaranteed to be accurate, however, the column information
  122. * is. If you have two tokens t1 and t2, each with a value of length 1, where the columns are 1 and 5, then that
  123. * means there are 4 spaces between the two.
  124. * @return A stream of tokens
  125. * @throws ConfigCompileException If compilation fails due to bad syntax
  126. */
  127. public static TokenStream lex(String script, Environment env, File file,
  128. boolean inPureMScript, boolean saveAllTokens) throws ConfigCompileException {
  129. if(env == null) {
  130. // We MUST have a CompilerEnvironment, but it doesn't need to be used, but we have to create it at this
  131. // stage.
  132. env = Environment.createEnvironment(new CompilerEnvironment());
  133. }
  134. if(!env.hasEnv(CompilerEnvironment.class)) {
  135. env = env.cloneAndAdd(new CompilerEnvironment());
  136. }
  137. if(script.isEmpty()) {
  138. return new TokenStream(new LinkedList<>(), "", new HashMap<>());
  139. }
  140. if((int) script.charAt(0) == 65279) {
  141. // Remove the UTF-8 Byte Order Mark, if present.
  142. script = script.substring(1);
  143. }
  144. final StringBuilder fileOptions = new StringBuilder();
  145. script = script.replaceAll("\r\n", "\n");
  146. script = script + "\n";
  147. final Set<String> keywords = KeywordList.getKeywordNames();
  148. final TokenStream tokenList = new TokenStream();
  149. // Set our state variables.
  150. boolean stateInQuote = false;
  151. int quoteLineNumberStart = 1;
  152. boolean inSmartQuote = false;
  153. int smartQuoteLineNumberStart = 1;
  154. boolean inComment = false;
  155. int commentLineNumberStart = 1;
  156. boolean commentIsBlock = false;
  157. boolean inOptVar = false;
  158. boolean inCommand = (!inPureMScript);
  159. boolean inMultiline = false;
  160. boolean inSmartComment = false;
  161. boolean inFileOptions = false;
  162. boolean inAnnotation = false;
  163. int fileOptionsLineNumberStart = 1;
  164. StringBuilder buf = new StringBuilder();
  165. int lineNum = 1;
  166. int column = 1;
  167. int lastColumn = 0;
  168. Target target = Target.UNKNOWN;
  169. // Lex the script character by character.
  170. for(int i = 0; i < script.length(); i++) {
  171. Character c = script.charAt(i);
  172. Character c2 = null;
  173. if(i < script.length() - 1) {
  174. c2 = script.charAt(i + 1);
  175. }
  176. column += i - lastColumn;
  177. lastColumn = i;
  178. if(c == '\n') {
  179. lineNum++;
  180. column = 1;
  181. if(!inMultiline && !inPureMScript) {
  182. inCommand = true;
  183. }
  184. }
  185. if(buf.length() == 0) {
  186. target = new Target(lineNum, file, column);
  187. }
  188. // If we are in file options, add the character to the buffer if it's not a file options end character.
  189. if(inFileOptions) {
  190. // For a '>' character outside of a comment, '\>' would have to be used in file options.
  191. // Other characters than '>'cannot be escaped.
  192. // If support for more escaped characters would be desired in the future, it could be added here.
  193. switch(c) {
  194. case '\\': {
  195. if(c2 == '>') { // "\>".
  196. fileOptions.append('>');
  197. i++;
  198. continue;
  199. }
  200. break;
  201. }
  202. case '>': {
  203. if(saveAllTokens) {
  204. tokenList.add(new Token(TType.FILE_OPTIONS_STRING,
  205. fileOptions.toString(), target));
  206. tokenList.add(new Token(TType.FILE_OPTIONS_END, ">", target));
  207. }
  208. inFileOptions = false;
  209. continue;
  210. }
  211. }
  212. fileOptions.append(c);
  213. continue;
  214. }
  215. // Comment handling. This is bypassed if we are in a string.
  216. if(!stateInQuote && !inSmartQuote) {
  217. switch(c) {
  218. // Block comments start (/* and /**) and Double slash line comment start (//).
  219. case '/': {
  220. if(!inComment) {
  221. if(c2 == '*') { // "/*" or "/**".
  222. buf.append("/*");
  223. inComment = true;
  224. commentIsBlock = true;
  225. if(i < script.length() - 2 && script.charAt(i + 2) == '*') { // "/**".
  226. inSmartComment = true;
  227. buf.append("*");
  228. i++;
  229. }
  230. commentLineNumberStart = lineNum;
  231. i++;
  232. continue;
  233. } else if(c2 == '/') { // "//".
  234. buf.append("//");
  235. inComment = true;
  236. i++;
  237. continue;
  238. }
  239. }
  240. break;
  241. }
  242. // Line comment start (#).
  243. case '#': {
  244. if(!inComment) { // "#".
  245. buf.append("#");
  246. inComment = true;
  247. continue;
  248. }
  249. break;
  250. }
  251. // Block comment end (*/).
  252. case '*': {
  253. if(inComment && commentIsBlock && c2 == '/') { // "*/".
  254. if(saveAllTokens || inSmartComment) {
  255. buf.append("*/");
  256. validateTerminatedBidiSequence(buf.toString(), target);
  257. tokenList.add(new Token(inSmartComment ? TType.SMART_COMMENT : TType.COMMENT,
  258. buf.toString(), target));
  259. }
  260. buf = new StringBuilder();
  261. target = new Target(lineNum, file, column);
  262. inComment = false;
  263. commentIsBlock = false;
  264. inSmartComment = false;
  265. i++;
  266. continue;
  267. }
  268. break;
  269. }
  270. // Line comment end (\n).
  271. case '\n': {
  272. if(inComment && !commentIsBlock) { // "\n".
  273. inComment = false;
  274. if(saveAllTokens) {
  275. validateTerminatedBidiSequence(buf.toString(), target);
  276. tokenList.add(new Token(TType.COMMENT, buf.toString(), target));
  277. tokenList.add(new Token(TType.NEWLINE, "\n", new Target(lineNum + 1, file, 0)));
  278. }
  279. buf = new StringBuilder();
  280. target = new Target(lineNum, file, column);
  281. continue;
  282. }
  283. break;
  284. }
  285. }
  286. }
  287. // If we are in a comment, add the character to the buffer.
  288. if(inComment || (inAnnotation && c != '}')) {
  289. buf.append(c);
  290. continue;
  291. }
  292. // Handle non-comment non-quoted characters.
  293. if(!stateInQuote) {
  294. // We're not in a comment or quoted string, handle: +=, -=, *=, /=, .=, ->, ++, --, %, **, *, +, -, /,
  295. // >=, <=, <<<, >>>, <, >, ===, !==, ==, !=, &&&, |||, &&, ||, !, {, }, .., ., ::, [, =, ], :, comma,
  296. // (, ), ;, and whitespace.
  297. matched:
  298. {
  299. Token token;
  300. switch(c) {
  301. case '+': {
  302. if(c2 == '=') { // "+=".
  303. token = new Token(TType.PLUS_ASSIGNMENT, "+=", target);
  304. i++;
  305. } else if(c2 == '+') { // "++".
  306. token = new Token(TType.INCREMENT, "++", target);
  307. i++;
  308. } else { // "+".
  309. token = new Token(TType.PLUS, "+", target);
  310. }
  311. break;
  312. }
  313. case '-': {
  314. if(c2 == '=') { // "-=".
  315. token = new Token(TType.MINUS_ASSIGNMENT, "-=", target);
  316. i++;
  317. } else if(c2 == '-') { // "--".
  318. token = new Token(TType.DECREMENT, "--", target);
  319. i++;
  320. } else if(c2 == '>') { // "->".
  321. token = new Token(TType.DEREFERENCE, "->", target);
  322. i++;
  323. } else { // "-".
  324. token = new Token(TType.MINUS, "-", target);
  325. }
  326. break;
  327. }
  328. case '*': {
  329. if(c2 == '=') { // "*=".
  330. token = new Token(TType.MULTIPLICATION_ASSIGNMENT, "*=", target);
  331. i++;
  332. } else if(c2 == '*') { // "**".
  333. token = new Token(TType.EXPONENTIAL, "**", target);
  334. i++;
  335. } else { // "*".
  336. token = new Token(TType.MULTIPLICATION, "*", target);
  337. }
  338. break;
  339. }
  340. case '/': {
  341. if(c2 == '=') { // "/=".
  342. token = new Token(TType.DIVISION_ASSIGNMENT, "/=", target);
  343. i++;
  344. } else { // "/".
  345. // Protect against matching commands.
  346. if(Character.isLetter(c2)) {
  347. break matched; // Pretend that division didn't match.
  348. }
  349. token = new Token(TType.DIVISION, "/", target);
  350. }
  351. break;
  352. }
  353. case '.': {
  354. if(c2 == '=') { // ".=".
  355. token = new Token(TType.CONCAT_ASSIGNMENT, ".=", target);
  356. i++;
  357. } else if(c2 == '.') { // "..".
  358. token = new Token(TType.SLICE, "..", target);
  359. i++;
  360. } else { // ".".
  361. token = new Token(TType.DOT, ".", target);
  362. }
  363. break;
  364. }
  365. case '%': {
  366. token = new Token(TType.MODULO, "%", target);
  367. break;
  368. }
  369. case '>': {
  370. if(c2 == '=') { // ">=".
  371. token = new Token(TType.GTE, ">=", target);
  372. i++;
  373. } else if(c2 == '>' && i < script.length() - 2 && script.charAt(i + 2) == '>') { // ">>>".
  374. token = new Token(TType.MULTILINE_START, ">>>", target);
  375. inMultiline = true;
  376. i += 2;
  377. } else { // ">".
  378. token = new Token(TType.GT, ">", target);
  379. }
  380. break;
  381. }
  382. case '<': {
  383. if(c2 == '!') { // "<!".
  384. if(buf.length() > 0) {
  385. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  386. buf = new StringBuilder();
  387. target = new Target(lineNum, file, column);
  388. }
  389. if(saveAllTokens) {
  390. tokenList.add(new Token(TType.FILE_OPTIONS_START, "<!", target));
  391. }
  392. inFileOptions = true;
  393. fileOptionsLineNumberStart = lineNum;
  394. i++;
  395. continue;
  396. } else if(c2 == '=') { // "<=".
  397. token = new Token(TType.LTE, "<=", target);
  398. i++;
  399. } else if(c2 == '<' && i < script.length() - 2 && script.charAt(i + 2) == '<') { // "<<<".
  400. token = new Token(TType.MULTILINE_END, "<<<", target);
  401. inMultiline = false;
  402. i += 2;
  403. } else { // "<".
  404. token = new Token(TType.LT, "<", target);
  405. }
  406. break;
  407. }
  408. case '=': {
  409. if(c2 == '=') {
  410. if(i < script.length() - 2 && script.charAt(i + 2) == '=') { // "===".
  411. token = new Token(TType.STRICT_EQUALS, "===", target);
  412. i += 2;
  413. } else { // "==".
  414. token = new Token(TType.EQUALS, "==", target);
  415. i++;
  416. }
  417. } else { // "=".
  418. if(inCommand) {
  419. if(inOptVar) {
  420. token = new Token(TType.OPT_VAR_ASSIGN, "=", target);
  421. } else {
  422. token = new Token(TType.ALIAS_END, "=", target);
  423. inCommand = false;
  424. }
  425. } else {
  426. token = new Token(TType.ASSIGNMENT, "=", target);
  427. }
  428. }
  429. break;
  430. }
  431. case '!': {
  432. if(c2 == '=') {
  433. if(i < script.length() - 2 && script.charAt(i + 2) == '=') { // "!==".
  434. token = new Token(TType.STRICT_NOT_EQUALS, "!==", target);
  435. i += 2;
  436. } else { // "!=".
  437. token = new Token(TType.NOT_EQUALS, "!=", target);
  438. i++;
  439. }
  440. } else { // "!".
  441. token = new Token(TType.LOGICAL_NOT, "!", target);
  442. }
  443. break;
  444. }
  445. case '&': {
  446. if(c2 == '&') {
  447. if(i < script.length() - 2 && script.charAt(i + 2) == '&') { // "&&&".
  448. token = new Token(TType.DEFAULT_AND, "&&&", target);
  449. i += 2;
  450. } else { // "&&".
  451. token = new Token(TType.LOGICAL_AND, "&&", target);
  452. i++;
  453. }
  454. } else { // "&".
  455. // Bitwise symbols are not used yet.
  456. break matched; // Pretend that bitwise AND didn't match.
  457. // token = new Token(TType.BIT_AND, "&", target);
  458. }
  459. break;
  460. }
  461. case '|': {
  462. if(c2 == '|') {
  463. if(i < script.length() - 2 && script.charAt(i + 2) == '|') { // "|||".
  464. token = new Token(TType.DEFAULT_OR, "|||", target);
  465. i += 2;
  466. } else { // "||".
  467. token = new Token(TType.LOGICAL_OR, "||", target);
  468. i++;
  469. }
  470. } else { // "|".
  471. // Bitwise symbols are not used yet.
  472. break matched; // Pretend that bitwise OR didn't match.
  473. // token = new Token(TType.BIT_OR, "|", target);
  474. }
  475. break;
  476. }
  477. // Bitwise symbols are not used yet.
  478. // case '^': {
  479. // token = new Token(TType.BIT_XOR, "^", target);
  480. // break;
  481. // }
  482. case ':': {
  483. if(c2 == ':') { // "::".
  484. token = new Token(TType.DEREFERENCE, "::", target);
  485. i++;
  486. } else { // ":".
  487. token = new Token(TType.LABEL, ":", target);
  488. }
  489. break;
  490. }
  491. case '{': {
  492. token = new Token(TType.LCURLY_BRACKET, "{", target);
  493. break;
  494. }
  495. case '}': {
  496. if(inAnnotation) {
  497. // Eventually, this will no longer be a comment type, but for now, we just want
  498. // to totally ignore annotations, as if they were comments.
  499. inAnnotation = false;
  500. token = new Token(/*TType.ANNOTATION*/TType.COMMENT, "@{" + buf.toString() + "}", target);
  501. buf = new StringBuilder();
  502. break;
  503. }
  504. token = new Token(TType.RCURLY_BRACKET, "}", target);
  505. break;
  506. }
  507. case '[': {
  508. token = new Token(TType.LSQUARE_BRACKET, "[", target);
  509. inOptVar = true;
  510. break;
  511. }
  512. case ']': {
  513. token = new Token(TType.RSQUARE_BRACKET, "]", target);
  514. inOptVar = false;
  515. break;
  516. }
  517. case ',': {
  518. token = new Token(TType.COMMA, ",", target);
  519. break;
  520. }
  521. case ';': {
  522. token = new Token(TType.SEMICOLON, ";", target);
  523. break;
  524. }
  525. case '(': {
  526. token = new Token(TType.FUNC_START, "(", target);
  527. // Handle the buffer or previous token, with the knowledge that a FUNC_START follows.
  528. if(buf.length() > 0) {
  529. if(saveAllTokens) {
  530. // In this case, we need to check for keywords first, because we want to go ahead
  531. // and convert into that stage. In the future, we might want to do this
  532. // unconditionally, but for now, just go ahead and only do it if saveAllTokens is
  533. // true, because we know that won't be used by the compiler.
  534. if(KeywordList.getKeywordByName(buf.toString()) != null) {
  535. // It's a keyword.
  536. tokenList.add(new Token(TType.KEYWORD, buf.toString(), target));
  537. } else {
  538. // It's not a keyword, but a normal function.
  539. tokenList.add(new Token(TType.FUNC_NAME, buf.toString(), target));
  540. }
  541. } else {
  542. tokenList.add(new Token(TType.FUNC_NAME, buf.toString(), target));
  543. }
  544. buf = new StringBuilder();
  545. target = new Target(lineNum, file, column);
  546. } else {
  547. // The previous token, if unknown, should be changed to a FUNC_NAME. If it's not
  548. // unknown, we may be doing standalone parenthesis, so auto tack on the __autoconcat__
  549. // function.
  550. try {
  551. int count = 0;
  552. Iterator<Token> it = tokenList.descendingIterator();
  553. Token t;
  554. while((t = it.next()).type == TType.WHITESPACE) {
  555. count++;
  556. }
  557. if(t.type == TType.UNKNOWN) {
  558. t.type = TType.FUNC_NAME;
  559. // Go ahead and remove the whitespace here too, they break things.
  560. count--;
  561. for(int a = 0; a < count; a++) {
  562. tokenList.removeLast();
  563. }
  564. } else {
  565. tokenList.add(new Token(TType.FUNC_NAME, "__autoconcat__", target));
  566. }
  567. } catch (NoSuchElementException e) {
  568. // This is the first element on the list, so, it's another autoconcat.
  569. tokenList.add(new Token(TType.FUNC_NAME, "__autoconcat__", target));
  570. }
  571. }
  572. break;
  573. }
  574. case ')': {
  575. token = new Token(TType.FUNC_END, ")", target);
  576. break;
  577. }
  578. case ' ': { // Whitespace case #1.
  579. token = new Token(TType.WHITESPACE, " ", target);
  580. break;
  581. }
  582. case '\t': { // Whitespace case #2 (TAB).
  583. token = new Token(TType.WHITESPACE, "\t", target);
  584. break;
  585. }
  586. case '@': {
  587. if(c2 == '{') {
  588. inAnnotation = true;
  589. i++;
  590. continue;
  591. }
  592. break matched;
  593. }
  594. default: {
  595. // No match was found at this point, so continue matching below.
  596. break matched;
  597. }
  598. }
  599. // Add previous characters as UNKNOWN token.
  600. if(buf.length() > 0) {
  601. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  602. buf = new StringBuilder();
  603. target = new Target(lineNum, file, column);
  604. }
  605. // Add the new token to the token list.
  606. tokenList.add(token);
  607. // Continue lexing.
  608. continue;
  609. }
  610. }
  611. // Handle non-comment characters that might start or stop a quoted string.
  612. switch(c) {
  613. case '\'': {
  614. if(stateInQuote && !inSmartQuote) {
  615. validateTerminatedBidiSequence(buf.toString(), target);
  616. tokenList.add(new Token(TType.STRING, buf.toString(), target));
  617. buf = new StringBuilder();
  618. target = new Target(lineNum, file, column);
  619. stateInQuote = false;
  620. continue;
  621. } else if(!stateInQuote) {
  622. stateInQuote = true;
  623. quoteLineNumberStart = lineNum;
  624. inSmartQuote = false;
  625. if(buf.length() > 0) {
  626. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  627. buf = new StringBuilder();
  628. target = new Target(lineNum, file, column);
  629. }
  630. continue;
  631. } else {
  632. // We're in a smart quote.
  633. buf.append("'");
  634. }
  635. break;
  636. }
  637. case '"': {
  638. if(stateInQuote && inSmartQuote) {
  639. validateTerminatedBidiSequence(buf.toString(), target);
  640. tokenList.add(new Token(TType.SMART_STRING, buf.toString(), target));
  641. buf = new StringBuilder();
  642. target = new Target(lineNum, file, column);
  643. stateInQuote = false;
  644. inSmartQuote = false;
  645. continue;
  646. } else if(!stateInQuote) {
  647. stateInQuote = true;
  648. inSmartQuote = true;
  649. smartQuoteLineNumberStart = lineNum;
  650. if(buf.length() > 0) {
  651. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  652. buf = new StringBuilder();
  653. target = new Target(lineNum, file, column);
  654. }
  655. continue;
  656. } else {
  657. // We're in normal quotes.
  658. buf.append('"');
  659. }
  660. break;
  661. }
  662. case '\n': {
  663. // Append a newline to the buffer if it's quoted.
  664. if(stateInQuote) {
  665. buf.append(c);
  666. } else {
  667. // Newline is not quoted. Move the buffer to an UNKNOWN token and add a NEWLINE token.
  668. if(buf.length() > 0) {
  669. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  670. buf = new StringBuilder();
  671. target = new Target(lineNum, file, column);
  672. }
  673. tokenList.add(new Token(TType.NEWLINE, "\n", target));
  674. }
  675. continue;
  676. }
  677. case '\\': {
  678. // Handle escaped characters in quotes or a single "\" seperator token otherwise.
  679. // Handle backslash character outside of quotes.
  680. if(!stateInQuote) {
  681. tokenList.add(new Token(TType.SEPERATOR, "\\", target));
  682. break;
  683. }
  684. // Handle an escape sign in a quote.
  685. switch(c2) {
  686. case '\\':
  687. if(inSmartQuote) {
  688. // Escaping of '@' and '\' is handled within __smart_string__.
  689. buf.append('\\');
  690. }
  691. buf.append('\\');
  692. break;
  693. case '\'':
  694. case '"':
  695. buf.append(c2);
  696. break;
  697. case 'n':
  698. buf.append('\n');
  699. break;
  700. case 'r':
  701. buf.append('\r');
  702. break;
  703. case 't':
  704. buf.append('\t');
  705. break;
  706. case '0':
  707. buf.append('\0');
  708. break;
  709. case 'f':
  710. buf.append('\f');
  711. break; // Form feed.
  712. case 'v':
  713. buf.append('\u000B');
  714. break; // Vertical TAB.
  715. case 'a':
  716. buf.append('\u0007');
  717. break; // Alarm.
  718. case 'b':
  719. buf.append('\u0008');
  720. break; // Backspace.
  721. case 'u': { // Unicode (4 characters).
  722. // Grab the next 4 characters, and check to see if they are numbers.
  723. if(i + 5 >= script.length()) {
  724. throw new ConfigCompileException("Unrecognized unicode escape sequence", target);
  725. }
  726. String unicode = script.substring(i + 2, i + 6);
  727. int unicodeNum;
  728. try {
  729. unicodeNum = Integer.parseInt(unicode, 16);
  730. } catch (NumberFormatException e) {
  731. throw new ConfigCompileException(
  732. "Unrecognized unicode escape sequence: \\u" + unicode, target);
  733. }
  734. buf.append(Character.toChars(unicodeNum));
  735. i += 4;
  736. break;
  737. }
  738. case 'U': { // Unicode (8 characters).
  739. // Grab the next 8 characters and check to see if they are numbers.
  740. if(i + 9 >= script.length()) {
  741. throw new ConfigCompileException("Unrecognized unicode escape sequence", target);
  742. }
  743. String unicode = script.substring(i + 2, i + 10);
  744. int unicodeNum;
  745. try {
  746. unicodeNum = Integer.parseInt(unicode, 16);
  747. } catch (NumberFormatException e) {
  748. throw new ConfigCompileException(
  749. "Unrecognized unicode escape sequence: \\u" + unicode, target);
  750. }
  751. buf.append(Character.toChars(unicodeNum));
  752. i += 8;
  753. break;
  754. }
  755. case '@': {
  756. if(!inSmartQuote) {
  757. throw new ConfigCompileException("The escape sequence \\@ is not"
  758. + " a recognized escape sequence in a non-smart string", target);
  759. }
  760. buf.append("\\@");
  761. break;
  762. }
  763. default: {
  764. // Since we might expand this list later, don't let them use unescaped backslashes.
  765. throw new ConfigCompileException(
  766. "The escape sequence \\" + c2 + " is not a recognized escape sequence", target);
  767. }
  768. }
  769. i++;
  770. continue;
  771. }
  772. default: {
  773. // At this point, only non-comment and non-escaped characters that are not part of a
  774. // quote start/end are left.
  775. // Disallow Non-Breaking Space Characters.
  776. if(!stateInQuote && c == '\u00A0'/*nbsp*/) {
  777. throw new ConfigCompileException("NBSP character in script", target);
  778. }
  779. // Add the characters that didn't match anything to the buffer.
  780. buf.append(c);
  781. continue;
  782. }
  783. }
  784. } // End of lexing.
  785. // Handle unended file options.
  786. if(inFileOptions) {
  787. throw new ConfigCompileException("Unended file options. You started the the file options on line "
  788. + fileOptionsLineNumberStart, target);
  789. }
  790. // Handle unended string literals.
  791. if(stateInQuote) {
  792. if(inSmartQuote) {
  793. throw new ConfigCompileException("Unended string literal. You started the last double quote on line "
  794. + smartQuoteLineNumberStart, target);
  795. } else {
  796. throw new ConfigCompileException("Unended string literal. You started the last single quote on line "
  797. + quoteLineNumberStart, target);
  798. }
  799. }
  800. // Handle unended comment blocks. Since a newline is added to the end of the script, line comments are ended.
  801. if(inComment || commentIsBlock) {
  802. throw new ConfigCompileException("Unended block comment. You started the comment on line "
  803. + commentLineNumberStart, target);
  804. }
  805. // Look at the tokens and get meaning from them. Also, look for improper symbol locations
  806. // and go ahead and absorb unary +- into the token.
  807. ListIterator<Token> it = tokenList.listIterator(0);
  808. while(it.hasNext()) {
  809. Token t = it.next();
  810. // Combine whitespace tokens into one.
  811. if(t.type == TType.WHITESPACE && it.hasNext()) {
  812. Token next;
  813. if((next = it.next()).type == TType.WHITESPACE) {
  814. t.value += next.val();
  815. it.remove(); // Remove 'next'.
  816. } else {
  817. it.previous(); // Select 'next' <--.
  818. }
  819. it.previous(); // Select 't' <--.
  820. it.next(); // Select 't' -->.
  821. }
  822. // Convert "-" + number to -number if allowed.
  823. it.previous(); // Select 't' <--.
  824. if(it.hasPrevious() && t.type == TType.UNKNOWN) {
  825. Token prev1 = it.previous(); // Select 'prev1' <--.
  826. if(prev1.type.isPlusMinus()) {
  827. // Find the first non-whitespace token before the '-'.
  828. Token prevNonWhitespace = null;
  829. while(it.hasPrevious()) {
  830. if(it.previous().type != TType.WHITESPACE) {
  831. prevNonWhitespace = it.next();
  832. break;
  833. }
  834. }
  835. while(it.next() != prev1) { // Skip until selection is at 'prev1 -->'.
  836. }
  837. if(prevNonWhitespace != null) {
  838. // Convert "±UNKNOWN" if the '±' is used as a sign (and not an add/subtract operation).
  839. if(!prevNonWhitespace.type.isIdentifier() // Don't convert "number/string/var ± ...".
  840. && prevNonWhitespace.type != TType.FUNC_END // Don't convert "func() ± ...".
  841. && prevNonWhitespace.type != TType.RSQUARE_BRACKET // Don't convert "] ± ..." (arrays).
  842. && !IVAR_PATTERN.matcher(t.val()).matches() // Don't convert "± @var".
  843. && !VAR_PATTERN.matcher(t.val()).matches()) { // Don't convert "± $var".
  844. // It is a negative/positive number: Absorb the sign.
  845. t.value = prev1.value + t.value;
  846. it.remove(); // Remove 'prev1'.
  847. }
  848. }
  849. } else {
  850. it.next(); // Select 'prev1' -->.
  851. }
  852. }
  853. it.next(); // Select 't' -->.
  854. // Assign a type to all UNKNOWN tokens.
  855. if(t.type == TType.UNKNOWN) {
  856. if(t.val().charAt(0) == '/' && t.val().length() > 1) {
  857. t.type = TType.COMMAND;
  858. } else if(t.val().equals("$")) {
  859. t.type = TType.FINAL_VAR;
  860. } else if(VAR_PATTERN.matcher(t.val()).matches()) {
  861. t.type = TType.VARIABLE;
  862. } else if(IVAR_PATTERN.matcher(t.val()).matches()) {
  863. t.type = TType.IVARIABLE;
  864. } else if(t.val().charAt(0) == '@') {
  865. throw new ConfigCompileException("IVariables must match the regex: " + IVAR_PATTERN, t.getTarget());
  866. } else if(keywords.contains(t.val())) {
  867. t.type = TType.KEYWORD;
  868. } else if(t.val().matches("[\t ]*")) {
  869. t.type = TType.WHITESPACE;
  870. } else {
  871. t.type = TType.LIT;
  872. }
  873. }
  874. // Skip this check if we're not in pure mscript.
  875. if(inPureMScript) {
  876. if(it.hasNext()) {
  877. Token next = it.next(); // Select 'next' -->.
  878. it.previous(); // Select 'next' <--.
  879. it.previous(); // Select 't' <--.
  880. if(t.type.isSymbol() && !t.type.isUnary() && !next.type.isUnary()) {
  881. if(it.hasPrevious()) {
  882. Token prev1 = it.previous(); // Select 'prev1' <--.
  883. if(prev1.type.equals(TType.FUNC_START) || prev1.type.equals(TType.COMMA)
  884. || next.type.equals(TType.FUNC_END) || next.type.equals(TType.COMMA)
  885. || prev1.type.isSymbol() || next.type.isSymbol()) {
  886. throw new ConfigCompileException("Unexpected symbol (" + t.val() + ")", t.getTarget());
  887. }
  888. it.next(); // Select 'prev1' -->.
  889. }
  890. }
  891. it.next(); // Select 't' -->.
  892. }
  893. }
  894. }
  895. // Set file options
  896. {
  897. Map<String, String> defaults = new HashMap<>();
  898. List<File> dirs = new ArrayList<>();
  899. if(file != null) {
  900. File f = file.getParentFile();
  901. while(true) {
  902. if(f == null) {
  903. break;
  904. }
  905. File fileOptionDefaults = new File(f, ".msfileoptions");
  906. if(fileOptionDefaults.exists()) {
  907. dirs.add(fileOptionDefaults);
  908. }
  909. f = f.getParentFile();
  910. }
  911. }
  912. Collections.reverse(dirs);
  913. for(File d : dirs) {
  914. try {
  915. defaults.putAll(TokenStream.parseFileOptions(FileUtil.read(d), defaults).getRawOptions());
  916. } catch (IOException ex) {
  917. throw new ConfigCompileException("Cannot read " + d.getAbsolutePath(), Target.UNKNOWN, ex);
  918. }
  919. }
  920. tokenList.setFileOptions(fileOptions.toString(), defaults);
  921. }
  922. // Make sure that the file options are the first non-comment code in the file
  923. {
  924. boolean foundCode = false;
  925. for(Token t : tokenList) {
  926. if(t.type.isFileOption()) {
  927. if(foundCode) {
  928. throw new ConfigCompileException("File options must be the first non-comment section in the"
  929. + " code", t.target);
  930. }
  931. break;
  932. }
  933. if(!t.type.isComment() && !t.type.isWhitespace()) {
  934. foundCode = true;
  935. }
  936. }
  937. }
  938. {
  939. // Filename check
  940. String fileName = tokenList.getFileOptions().getName();
  941. if(!fileName.isEmpty()) {
  942. if(!file.getAbsolutePath().replace("\\", "/").endsWith(fileName.replace("\\", "/"))) {
  943. CompilerWarning warning = new CompilerWarning(file + " has the wrong file name in the file options ("
  944. + fileName + ")", new Target(0, file, 0), null);
  945. env.getEnv(CompilerEnvironment.class).addCompilerWarning(null, warning);
  946. }
  947. }
  948. }
  949. {
  950. // Required extension check
  951. // TODO: Add support for specifying required versions
  952. Collection<ExtensionTracker> exts = ExtensionManager.getTrackers().values();
  953. Set<String> notFound = new HashSet<>();
  954. for(String extension : tokenList.getFileOptions().getRequiredExtensions()) {
  955. boolean found = false;
  956. for(ExtensionTracker t : exts) {
  957. if(t.getIdentifier().equalsIgnoreCase(extension)) {
  958. found = true;
  959. break;
  960. }
  961. }
  962. if(!found) {
  963. notFound.add(extension);
  964. }
  965. }
  966. if(!notFound.isEmpty()) {
  967. throw new ConfigCompileException("Could not compile file, because one or more required"
  968. + " extensions are not loaded: " + StringUtils.Join(notFound, ", ")
  969. + ". These extensions must be provided before compilation can continue.",
  970. new Target(0, file, 0));
  971. }
  972. }
  973. return tokenList;
  974. }
  975. /**
  976. * This function breaks the token stream into parts, separating the aliases/MethodScript from the command triggers
  977. *
  978. * @param tokenStream
  979. * @param envs
  980. * @return
  981. * @throws ConfigCompileException
  982. */
  983. public static List<Script> preprocess(TokenStream tokenStream,
  984. Set<Class<? extends Environment.EnvironmentImpl>> envs) throws ConfigCompileException {
  985. if(tokenStream == null || tokenStream.isEmpty()) {
  986. return new ArrayList<>();
  987. }
  988. // Remove leading newlines.
  989. while(!tokenStream.isEmpty() && tokenStream.getFirst().type == TType.NEWLINE) {
  990. tokenStream.removeFirst(); // Remove leading newlines.
  991. }
  992. // Return an empty list if there were only newlines.
  993. if(tokenStream.isEmpty()) {
  994. return new ArrayList<>();
  995. }
  996. // Remove whitespaces and duplicate newlines.
  997. {
  998. ListIterator<Token> it = tokenStream.listIterator(0);
  999. Token token = it.next();
  1000. outerLoop:
  1001. while(true) {
  1002. switch(token.type) {
  1003. case WHITESPACE: {
  1004. it.remove(); // Remove whitespaces.
  1005. if(!it.hasNext()) {
  1006. break outerLoop;
  1007. }
  1008. token = it.next();
  1009. continue outerLoop;
  1010. }
  1011. case NEWLINE: {
  1012. while(it.hasNext()) {
  1013. if((token = it.next()).type == TType.NEWLINE) {
  1014. it.remove(); // Remove duplicate newlines.
  1015. } else {
  1016. continue outerLoop;
  1017. }
  1018. }
  1019. break outerLoop;
  1020. }
  1021. default: {
  1022. if(!it.hasNext()) {
  1023. break outerLoop;
  1024. }
  1025. token = it.next();
  1026. continue outerLoop;
  1027. }
  1028. }
  1029. }
  1030. }
  1031. // Handle multiline constructs.
  1032. // Take out newlines between the '= >>>' and '<<<' tokens (also removing the '>>>' and '<<<' tokens).
  1033. // Also remove comments and also remove newlines that are behind a '\'.
  1034. boolean insideMultiline = false;
  1035. ListIterator<Token> it = tokenStream.listIterator(0);
  1036. Token token = null;
  1037. while(it.hasNext()) {
  1038. token = it.next();
  1039. switch(token.type) {
  1040. case ALIAS_END: { // "=".
  1041. if(it.hasNext()) {
  1042. if(it.next().type == TType.MULTILINE_START) { // "= >>>".
  1043. insideMultiline = true;
  1044. it.remove(); // Remove multiline start (>>>).
  1045. it.previous(); // Select 'token' <---.
  1046. it.next(); // Select 'token' -->.
  1047. } else {
  1048. it.previous(); // Select 'next' <---.
  1049. }
  1050. }
  1051. continue;
  1052. }
  1053. case MULTILINE_END: { // "<<<".
  1054. // Handle multiline end token (<<<) without start.
  1055. if(!insideMultiline) {
  1056. throw new ConfigCompileException(
  1057. "Found multiline end symbol, and no multiline start found", token.target);
  1058. }
  1059. insideMultiline = false;
  1060. it.remove(); // Remove multiline end (<<<).
  1061. continue;
  1062. }
  1063. case MULTILINE_START: { // ">>>".
  1064. // Handle multiline start token (>>>) while already in multiline.
  1065. if(insideMultiline) {
  1066. throw new ConfigCompileException("Did not expect a multiline start symbol here,"
  1067. + " are you missing a multiline end symbol above this line?", token.target);
  1068. }
  1069. // Handle multiline start token (>>>) without alias end (=) in front.
  1070. it.previous(); // Select 'token' <--.
  1071. if(!it.hasPrevious() || it.previous().type != TType.ALIAS_END) {
  1072. throw new ConfigCompileException(
  1073. "Multiline symbol must follow the alias_end (=) symbol", token.target);
  1074. }
  1075. it.next(); // Select 'prev' -->.
  1076. it.next(); // Select 'token' -->.
  1077. continue;
  1078. }
  1079. case NEWLINE: { // "\n".
  1080. // Skip newlines that are inside a multiline construct.
  1081. if(insideMultiline) {
  1082. it.remove(); // Remove newline.
  1083. }
  1084. continue;
  1085. }
  1086. // Remove comments.
  1087. case COMMENT: {
  1088. it.remove(); // Remove comment.
  1089. continue;
  1090. }
  1091. default: {
  1092. // Remove newlines that are behind a '\'.
  1093. if(token.type != TType.STRING && token.val().equals("\\") && it.hasNext()) {
  1094. if(it.next().type == TType.NEWLINE) {
  1095. it.remove(); // Remove newline.
  1096. it.previous(); // Select 'token' <--.
  1097. it.next(); // Select 'token' -->.
  1098. } else {
  1099. it.previous(); // Select 'next' <--.
  1100. }
  1101. }
  1102. }
  1103. }
  1104. }
  1105. assert token != null;
  1106. // Handle missing multiline end token.
  1107. if(insideMultiline) {
  1108. throw new ConfigCompileException("Expecting a multiline end symbol, but your last multiline alias appears to be missing one.", token.target);
  1109. }
  1110. // Now that we have all lines minified, we should be able to split on newlines
  1111. // and easily find the left and right sides.
  1112. List<Token> left = new ArrayList<>();
  1113. List<Token> right = new ArrayList<>();
  1114. List<Script> scripts = new ArrayList<>();
  1115. SmartComment comment = null;
  1116. tokenLoop:
  1117. for(it = tokenStream.listIterator(0); it.hasNext();) {
  1118. Token t = it.next();
  1119. if(t.type == TType.SMART_COMMENT) {
  1120. if(comment != null) {
  1121. // TODO: Double smart comment, this should be an error case
  1122. }
  1123. comment = new SmartComment(t.val());
  1124. t = it.next();
  1125. }
  1126. // Add all tokens until ALIAS_END (=) or end of stream.
  1127. while(t.type != TType.ALIAS_END) {
  1128. if(!it.hasNext()) {
  1129. break tokenLoop; // End of stream.
  1130. }
  1131. left.add(t);
  1132. t = it.next();
  1133. }
  1134. // Add all tokens until NEWLINE (\n).
  1135. while(t.type != TType.NEWLINE) {
  1136. assert it.hasNext(); // All files end with a newline, so end of stream should be impossible here.
  1137. right.add(t);
  1138. t = it.next();
  1139. }
  1140. // Create a new script for the obtained left and right if end of stream has not been reached.
  1141. if(t.type == TType.NEWLINE) {
  1142. // Check for spurious symbols, which indicate an issue with the script, but ignore any whitespace.
  1143. for(int j = left.size() - 1; j >= 0; j--) {
  1144. if(left.get(j).type == TType.NEWLINE) {
  1145. if(j > 0 && left.get(j - 1).type != TType.WHITESPACE) {
  1146. throw new ConfigCompileException(
  1147. "Unexpected token: " + left.get(j - 1).val(), left.get(j - 1).getTarget());
  1148. }
  1149. }
  1150. }
  1151. // Create a new script from the command descriptor (left) and code (right) and add it to the list.
  1152. Script s = new Script(left, right, null, envs, tokenStream.getFileOptions(), comment);
  1153. scripts.add(s);
  1154. // Create new left and right array for the next script.
  1155. left = new ArrayList<>();
  1156. right = new ArrayList<>();
  1157. comment = null;
  1158. }
  1159. }
  1160. // Return the scripts.
  1161. return scripts;
  1162. }
  1163. /**
  1164. * Compiles the token stream into a valid ParseTree. This also includes optimization and reduction.
  1165. *
  1166. * @param stream The token stream, as generated by {@link #lex(String, Environment, File, boolean) lex}
  1167. * @param environment If an environment is already set up, it can be passed in here. The code will tolerate a null
  1168. * value, but if present, should be passed in. If the value is null, a standalone environment will be generated
  1169. * and used.
  1170. * @param envs The environments that are going to be present at runtime. Even if the {@code environment} parameter
  1171. * is null, this still must be non-null and populated with one or more values.
  1172. * @return A fully compiled, optimized, and reduced parse tree. If {@code stream} is null or empty, null is
  1173. * returned.
  1174. * @throws ConfigCompileException If the script contains syntax errors. Additionally, during optimization, certain
  1175. * methods may cause compile errors. Any function that can optimize static occurrences and throws a
  1176. * {@link ConfigRuntimeException} will have that exception converted to a ConfigCompileException.
  1177. * @throws com.laytonsmith.core.exceptions.ConfigCompileGroupException A ConfigCompileGroupException is just
  1178. * a collection of single {@link ConfigCompileException}s.
  1179. */
  1180. public static ParseTree compile(TokenStream stream, Environment environment,
  1181. Set<Class<? extends Environment.EnvironmentImpl>> envs) throws ConfigCompileException,
  1182. ConfigCompileGroupException {
  1183. return compile(stream, environment, envs, new StaticAnalysis(true));
  1184. }
  1185. /**
  1186. * Compiles the token stream into a valid ParseTree. This also includes optimization and reduction.
  1187. *
  1188. * @param stream The token stream, as generated by {@link #lex(String, Environment, File, boolean) lex}
  1189. * @param environment If an environment is already set up, it can be passed in here. The code will tolerate a null
  1190. * value, but if present, should be passed in. If the value is null, a standalone environment will be generated
  1191. * and used.
  1192. * @param envs The environments that are going to be present at runtime. Even if the {@code environment} parameter
  1193. * is null, this still must be non-null and populated with one or more values.
  1194. * @param staticAnalysis The static analysis object, or {@code null} to not perform static analysis. This object
  1195. * is used to perform static analysis on the AST that results from parsing, before any AST optimizations.
  1196. * this method has finished execution.
  1197. * @return A fully compiled, optimized, and reduced parse tree. If {@code stream} is null or empty, null is
  1198. * returned.
  1199. * @throws ConfigCompileException If the script contains syntax errors. Additionally, during optimization, certain
  1200. * methods may cause compile errors. Any function that can optimize static occurrences and throws a
  1201. * {@link ConfigRuntimeException} will have that exception converted to a ConfigCompileException.
  1202. * @throws com.laytonsmith.core.exceptions.ConfigCompileGroupException A ConfigCompileGroupException is just
  1203. * a collection of single {@link ConfigCompileException}s.
  1204. */
  1205. public static ParseTree compile(TokenStream stream, Environment environment,
  1206. Set<Class<? extends Environment.EnvironmentImpl>> envs, StaticAnalysis staticAnalysis)
  1207. throws ConfigCompileException, ConfigCompileGroupException {
  1208. Objects.requireNonNull(envs, () -> "envs parameter must not be null");
  1209. try {
  1210. if(environment == null) {
  1211. // We MUST have a CompilerEnvironment. It doesn't need to be used, but we have to create it at
  1212. // this stage.
  1213. environment = Static.GenerateStandaloneEnvironment(false);
  1214. }
  1215. if(!environment.hasEnv(CompilerEnvironment.class)) {
  1216. Environment e = Static.GenerateStandaloneEnvironment(false);
  1217. environment = environment.cloneAndAdd(e.getEnv(CompilerEnvironment.class));
  1218. }
  1219. } catch (IOException | DataSourceException | URISyntaxException | Profiles.InvalidProfileException ex) {
  1220. throw new RuntimeException(ex);
  1221. }
  1222. Set<ConfigCompileException> compilerErrors = new HashSet<>();
  1223. if(stream == null || stream.isEmpty()) {
  1224. return null;
  1225. }
  1226. Target unknown;
  1227. try {
  1228. //Instead of using Target.UNKNOWN, we can at least set the file.
  1229. unknown = new Target(0, stream.get(0).target.file(), 0);
  1230. } catch (Exception e) {
  1231. unknown = Target.UNKNOWN;
  1232. }
  1233. // Remove all newlines and whitespaces.
  1234. ListIterator<Token> it = stream.listIterator(0);
  1235. while(it.hasNext()) {
  1236. if(it.next().type.isWhitespace()) {
  1237. it.remove();
  1238. }
  1239. }
  1240. // Get the file options.
  1241. final FileOptions fileOptions = stream.getFileOptions();
  1242. final ParseTree rootNode = new ParseTree(fileOptions);
  1243. rootNode.setData(CNull.NULL);
  1244. ParseTree tree = rootNode;
  1245. Stack<ParseTree> parents = new Stack<>();
  1246. /**
  1247. * constructCount is used to determine if we need to use autoconcat when reaching a FUNC_END. The previous
  1248. * constructs, if the count is greater than 1, will be moved down into an autoconcat.
  1249. */
  1250. Stack<AtomicInteger> constructCount = new Stack<>();
  1251. constructCount.push(new AtomicInteger(0));
  1252. parents.push(tree);
  1253. tree.addChild(new ParseTree(new CFunction(__autoconcat__.NAME, unknown), fileOptions));
  1254. parents.push(tree.getChildAt(0));
  1255. tree = tree.getChildAt(0);
  1256. constructCount.push(new AtomicInteger(0));
  1257. /**
  1258. * The array stack is used to keep track of the number of square braces in use.
  1259. */
  1260. Stack<AtomicInteger> arrayStack = new Stack<>();
  1261. arrayStack.add(new AtomicInteger(-1));
  1262. Stack<AtomicInteger> minusArrayStack = new Stack<>();
  1263. Stack<AtomicInteger> minusFuncStack = new Stack<>();
  1264. int parens = 0;
  1265. Token t = null;
  1266. int braceCount = 0;
  1267. boolean inObjectDefinition = false;
  1268. // Create a Token array to iterate over, rather than using the LinkedList's O(n) get() method.
  1269. Token[] tokenArray = stream.toArray(new Token[stream.size()]);
  1270. for(int i = 0; i < tokenArray.length; i++) {
  1271. t = tokenArray[i];
  1272. Token prev1 = i - 1 >= 0 ? tokenArray[i - 1] : new Token(TType.UNKNOWN, "", t.target);
  1273. Token next1 = i + 1 < stream.size() ? tokenArray[i + 1] : new Token(TType.UNKNOWN, "", t.target);
  1274. Token next2 = i + 2 < stream.size() ? tokenArray[i + 2] : new Token(TType.UNKNOWN, "", t.target);
  1275. Token next3 = i + 3 < stream.size() ? tokenArray[i + 3] : new Token(TType.UNKNOWN, "", t.target);
  1276. // Brace handling
  1277. if(t.type == TType.LCURLY_BRACKET) {
  1278. inObjectDefinition = false;
  1279. ParseTree b = new ParseTree(new CFunction(__cbrace__.NAME, t.getTarget()), fileOptions);
  1280. tree.addChild(b);
  1281. tree = b;
  1282. parents.push(b);
  1283. braceCount++;
  1284. constructCount.push(new AtomicInteger(0));
  1285. continue;
  1286. }
  1287. if(t.type == TType.RCURLY_BRACKET) {
  1288. if(braceCount == 0) {
  1289. throw new ConfigCompileException("Unexpected end curly brace", t.target);
  1290. }
  1291. braceCount--;
  1292. if(constructCount.peek().get() > 1) {
  1293. //We need to autoconcat some stuff
  1294. int stacks = constructCount.peek().get();
  1295. int replaceAt = tree.getChildren().size() - stacks;
  1296. ParseTree c = new ParseTree(new CFunction(__autoconcat__.NAME, tree.getTarget()), fileOptions);
  1297. List<ParseTree> subChildren = new ArrayList<>();
  1298. for(int b = replaceAt; b < tree.numberOfChildren(); b++) {
  1299. subChildren.add(tree.getChildAt(b));
  1300. }
  1301. c.setChildren(subChildren);
  1302. if(replaceAt > 0) {
  1303. List<ParseTree> firstChildren = new ArrayList<>();
  1304. for(int d = 0; d < replaceAt; d++) {
  1305. firstChildren.add(tree.getChildAt(d));
  1306. }
  1307. tree.setChildren(firstChildren);
  1308. } else {
  1309. tree.removeChildren();
  1310. }
  1311. tree.addChild(c);
  1312. }
  1313. parents.pop();
  1314. tree = parents.peek();
  1315. constructCount.pop();
  1316. try {
  1317. constructCount.peek().incrementAndGet();
  1318. } catch (EmptyStackException e) {
  1319. throw new ConfigCompileException("Unexpected end curly brace", t.target);
  1320. }
  1321. continue;
  1322. }
  1323. if(t.type == TType.KEYWORD && KeywordList.getKeywordByName(t.value) instanceof ObjectDefinitionKeyword) {
  1324. inObjectDefinition = true;
  1325. }
  1326. //Associative array/label handling
  1327. if(t.type == TType.LABEL && tree.getChildren().size() > 0) {
  1328. //If it's not an atomic identifier it's an error.
  1329. if(!prev1.type.isAtomicLit() && prev1.type != TType.IVARIABLE && prev1.type != TType.KEYWORD) {
  1330. ConfigCompileException error = new ConfigCompileException("Invalid label specified", t.getTarget());
  1331. if(prev1.type == TType.FUNC_END) {
  1332. // This is a fairly common mistake, so we have special handling for this,
  1333. // because otherwise we would get a "Mismatched parenthesis" warning (which doesn't make sense),
  1334. // and potentially lots of other invalid errors down the line, so we go ahead
  1335. // and stop compilation at this point.
  1336. throw error;
  1337. }
  1338. compilerErrors.add(error);
  1339. }
  1340. // Wrap previous construct in a CLabel
  1341. ParseTree cc = tree.getChildren().get(tree.getChildren().size() - 1);
  1342. tree.removeChildAt(tree.getChildren().size() - 1);
  1343. tree.addChild(new ParseTree(new CLabel((Construct) cc.getData()), fileOptions));
  1344. continue;
  1345. }
  1346. //Array notation handling
  1347. if(t.type.equals(TType.LSQUARE_BRACKET)) {
  1348. //tree.addChild(new ParseTree(new CFunction("__cbracket__", t.getTarget()), fileOptions));
  1349. arrayStack.push(new AtomicInteger(tree.getChildren().size() - 1));
  1350. continue;
  1351. } else if(t.type.equals(TType.RSQUARE_BRACKET)) {
  1352. boolean emptyArray = false;
  1353. if(prev1.type.equals(TType.LSQUARE_BRACKET)) {
  1354. emptyArray = true;
  1355. }
  1356. if(arrayStack.size() == 1) {
  1357. throw new ConfigCompileException("Mismatched square bracket", t.target);
  1358. }
  1359. //array is the location of the array
  1360. int array = arrayStack.pop().get();
  1361. //index is the location of the first node with the index
  1362. int index = array + 1;
  1363. if(array == -1 || array >= tree.numberOfChildren()) {
  1364. throw new ConfigCompileException("Brackets are illegal here", t.target);
  1365. }
  1366. ParseTree myArray = tree.getChildAt(array);
  1367. ParseTree myIndex;
  1368. if(!emptyArray) {
  1369. myIndex = new ParseTree(new CFunction(__autoconcat__.NAME, myArray.getTarget()), fileOptions);
  1370. for(int j = index; j < tree.numberOfChildren(); j++) {
  1371. myIndex.addChild(tree.getChildAt(j));
  1372. }
  1373. } else {
  1374. myIndex = new ParseTree(new CSlice("0..-1", t.target), fileOptions);
  1375. }
  1376. tree.setChildren(tree.getChildren().subList(0, array));
  1377. ParseTree arrayGet = new ParseTree(new CFunction(array_get.NAME, t.target), fileOptions);
  1378. arrayGet.addChild(myArray);
  1379. arrayGet.addChild(myIndex);
  1380. // Check if the @var[...] had a negating "-" in front. If so, add a neg().
  1381. if(!minusArrayStack.isEmpty() && arrayStack.size() + 1 == minusArrayStack.peek().get()) {
  1382. if(!next1.type.equals(TType.LSQUARE_BRACKET)) { // Wait if there are more array_get's comming.
  1383. ParseTree negTree = new ParseTree(new CFunction(neg.NAME, unknown), fileOptions);
  1384. negTree.addChild(arrayGet);
  1385. tree.addChild(negTree);
  1386. minusArrayStack.pop();
  1387. } else {
  1388. // Negate the next array_get instead, so just add this one to the tree.
  1389. tree.addChild(arrayGet);
  1390. }
  1391. } else {
  1392. tree.addChild(arrayGet);
  1393. }
  1394. constructCount.peek().set(constructCount.peek().get() - myIndex.numberOfChildren());
  1395. continue;
  1396. }
  1397. //Smart strings
  1398. if(t.type == TType.SMART_STRING) {
  1399. if(t.val().contains("@")) {
  1400. ParseTree function = new ParseTree(fileOptions);
  1401. function.setData(new CFunction(__smart_string__.NAME, t.target));
  1402. ParseTree string = new ParseTree(fileOptions);
  1403. string.setData(new CString(t.value, t.target));
  1404. function.addChild(string);
  1405. tree.addChild(function);
  1406. } else {
  1407. tree.addChild(new ParseTree(new CString(t.val(), t.target), fileOptions));
  1408. }
  1409. constructCount.peek().incrementAndGet();
  1410. continue;
  1411. }
  1412. if(t.type == TType.DEREFERENCE) {
  1413. //Currently unimplemented, but going ahead and making it strict
  1414. compilerErrors.add(new ConfigCompileException("The '" + t.val() + "' symbol is not currently allowed in raw strings. You must quote all"
  1415. + " symbols.", t.target));
  1416. }
  1417. if(t.type.equals(TType.FUNC_NAME)) {
  1418. CFunction func = new CFunction(t.val(), t.target);
  1419. {
  1420. // Check for code upgrade warning
  1421. try {
  1422. OperatorPreferred opPref = func.getFunction().getClass().getAnnotation(OperatorPreferred.class);
  1423. if(opPref != null) {
  1424. String msg = "The operator \"" + opPref.value() + "\" is preferred over the functional"
  1425. + " usage.";
  1426. CompilerWarning warning = new CompilerWarning(msg, t.target,
  1427. FileOptions.SuppressWarning.CodeUpgradeNotices);
  1428. environment.getEnv(CompilerEnvironment.class).addCodeUpgradeNotice(fileOptions, warning);
  1429. }
  1430. } catch (ConfigCompileException ex) {
  1431. // The function doesn't exist. It may be a compile error later (or maybe not, if it's
  1432. // preprocessed out) but we don't want to handle that at this point either way. In any
  1433. // case, we can't find it, so don't report it.
  1434. }
  1435. }
  1436. ParseTree f = new ParseTree(func, fileOptions);
  1437. tree.addChild(f);
  1438. constructCount.push(new AtomicInteger(0));
  1439. tree = f;
  1440. parents.push(f);
  1441. } else if(t.type.equals(TType.FUNC_START)) {
  1442. if(!prev1.type.equals(TType.FUNC_NAME)) {
  1443. throw new ConfigCompileException("Unexpected parenthesis", t.target);
  1444. }
  1445. parens++;
  1446. } else if(t.type.equals(TType.FUNC_END)) {
  1447. if(parens <= 0) {
  1448. throw new ConfigCompileException("Unexpected parenthesis", t.target);
  1449. }
  1450. parens--;
  1451. parents.pop(); // Pop function.
  1452. if(constructCount.peek().get() > 1) {
  1453. //We need to autoconcat some stuff
  1454. int stacks = constructCount.peek().get();
  1455. int replaceAt = tree.getChildren().size() - stacks;
  1456. ParseTree c = new ParseTree(new CFunction(__autoconcat__.NAME, tree.getTarget()), fileOptions);
  1457. List<ParseTree> subChildren = new ArrayList<>();
  1458. for(int b = replaceAt; b < tree.numberOfChildren(); b++) {
  1459. subChildren.add(tree.getChildAt(b));
  1460. }
  1461. c.setChildren(subChildren);
  1462. if(replaceAt > 0) {
  1463. List<ParseTree> firstChildren = new ArrayList<>();
  1464. for(int d = 0; d < replaceAt; d++) {
  1465. firstChildren.add(tree.getChildAt(d));
  1466. }
  1467. tree.setChildren(firstChildren);
  1468. } else {
  1469. tree.removeChildren();
  1470. }
  1471. tree.addChild(c);
  1472. }
  1473. constructCount.pop();
  1474. try {
  1475. constructCount.peek().incrementAndGet();
  1476. } catch (EmptyStackException e) {
  1477. throw new ConfigCompileException("Unexpected end parenthesis", t.target);
  1478. }
  1479. try {
  1480. tree = parents.peek();
  1481. } catch (EmptyStackException e) {
  1482. throw new ConfigCompileException("Unexpected end parenthesis", t.target);
  1483. }
  1484. // Handle "-func(args)" and "-func(args)[index]".
  1485. if(!minusFuncStack.isEmpty() && minusFuncStack.peek().get() == parens + 1) {
  1486. if(next1.type.equals(TType.LSQUARE_BRACKET)) {
  1487. // Move the negation to the array_get which contains this function.
  1488. minusArrayStack.push(new AtomicInteger(arrayStack.size() + 1)); // +1 because the bracket isn't counted yet.
  1489. } else {
  1490. // Negate this function.
  1491. ParseTree negTree = new ParseTree(new CFunction(neg.NAME, unknown), fileOptions);
  1492. negTree.addChild(tree.getChildAt(tree.numberOfChildren() - 1));
  1493. tree.removeChildAt(tree.numberOfChildren() - 1);
  1494. tree.addChildAt(tree.numberOfChildren(), negTree);
  1495. }
  1496. minusFuncStack.pop();
  1497. }
  1498. } else if(t.type.equals(TType.COMMA)) {
  1499. if(inObjectDefinition) {
  1500. // This is not part of a function use, so we have special handling, just push this on, and
  1501. // carry on.
  1502. tree.addChild(new ParseTree(new CSymbol(",", TType.COMMA, unknown), fileOptions));
  1503. continue;
  1504. }
  1505. if(constructCount.peek().get() > 1) {
  1506. int stacks = constructCount.peek().get();
  1507. int replaceAt = tree.getChildren().size() - stacks;
  1508. ParseTree c = new ParseTree(new CFunction(__autoconcat__.NAME, unknown), fileOptions);
  1509. List<ParseTree> subChildren = new ArrayList<>();
  1510. for(int b = replaceAt; b < tree.numberOfChildren(); b++) {
  1511. subChildren.add(tree.getChildAt(b));
  1512. }
  1513. c.setChildren(subChildren);
  1514. if(replaceAt > 0) {
  1515. List<ParseTree> firstChildren = new ArrayList<>();
  1516. for(int d = 0; d < replaceAt; d++) {
  1517. firstChildren.add(tree.getChildAt(d));
  1518. }
  1519. tree.setChildren(firstChildren);
  1520. } else {
  1521. tree.removeChildren();
  1522. }
  1523. tree.addChild(c);
  1524. }
  1525. constructCount.peek().set(0);
  1526. continue;
  1527. }
  1528. if(t.type == TType.SLICE) {
  1529. //We got here because the previous token isn't being ignored, because it's
  1530. //actually a control character, instead of whitespace, but this is a
  1531. //"empty first" slice notation. Compare this to the code below.
  1532. try {
  1533. CSlice slice;
  1534. String value = next1.val();
  1535. if(next1.type == TType.MINUS || next1.type == TType.PLUS) {
  1536. value = next1.val() + next2.val();
  1537. i++;
  1538. }
  1539. slice = new CSlice(".." + value, t.getTarget());
  1540. i++;
  1541. tree.addChild(new ParseTree(slice, fileOptions));
  1542. constructCount.peek().incrementAndGet();
  1543. continue;
  1544. } catch (ConfigRuntimeException ex) {
  1545. //CSlice can throw CREs, but at this stage, we have to
  1546. //turn them into a CCE.
  1547. throw new ConfigCompileException(ex);
  1548. }
  1549. }
  1550. if(next1.type.equals(TType.SLICE)) {
  1551. //Slice notation handling
  1552. try {
  1553. CSlice slice;
  1554. if(t.type.isSeparator() || (t.type.isWhitespace() && prev1.type.isSeparator()) || t.type.isKeyword()) {
  1555. //empty first
  1556. String value = next2.val();
  1557. i++;
  1558. if(next2.type == TType.MINUS || next2.type == TType.PLUS) {
  1559. value = next2.val() + next3.val();
  1560. i++;
  1561. }
  1562. slice = new CSlice(".." + value, next1.getTarget());
  1563. if(t.type.isKeyword()) {
  1564. tree.addChild(new ParseTree(new CKeyword(t.val(), t.getTarget()), fileOptions));
  1565. constructCount.peek().incrementAndGet();
  1566. }
  1567. } else if(next2.type.isSeparator() || next2.type.isKeyword()) {
  1568. //empty last
  1569. String modifier = "";
  1570. if(prev1.type == TType.MINUS || prev1.type == TType.PLUS) {
  1571. //The negative would have already been inserted into the tree
  1572. modifier = prev1.val();
  1573. tree.removeChildAt(tree.getChildren().size() - 1);
  1574. }
  1575. slice = new CSlice(modifier + t.value + "..", t.target);
  1576. } else {
  1577. //both are provided
  1578. String modifier1 = "";
  1579. if(prev1.type == TType.MINUS || prev1.type == TType.PLUS) {
  1580. //It's a negative, incorporate that here, and remove the
  1581. //minus from the tree
  1582. modifier1 = prev1.val();
  1583. tree.removeChildAt(tree.getChildren().size() - 1);
  1584. }
  1585. Token first = t;
  1586. if(first.type.isWhitespace()) {
  1587. first = prev1;
  1588. }
  1589. Token second = next2;
  1590. i++;
  1591. String modifier2 = "";
  1592. if(next2.type == TType.MINUS || next2.type == TType.PLUS) {
  1593. modifier2 = next2.val();
  1594. second = next3;
  1595. i++;
  1596. }
  1597. slice = new CSlice(modifier1 + first.value + ".." + modifier2 + second.value, t.target);
  1598. }
  1599. i++;
  1600. tree.addChild(new ParseTree(slice, fileOptions));
  1601. constructCount.peek().incrementAndGet();
  1602. continue;
  1603. } catch (ConfigRuntimeException ex) {
  1604. //CSlice can throw CREs, but at this stage, we have to
  1605. //turn them into a CCE.
  1606. throw new ConfigCompileException(ex);
  1607. }
  1608. } else if(t.type == TType.LIT) {
  1609. Construct c = Static.resolveConstruct(t.val(), t.target, true);
  1610. // We need to consider other contexts, such as array(key: 'value'), which should be allowed. Thus
  1611. // this can't be implemented like this.
  1612. // if(c instanceof CBareString && StrictMode.isStrictMode(fileOptions, environment, unknown)) {
  1613. // compilerErrors.add(new ConfigCompileException("Bare strings are not allowed in strict mode",
  1614. // c.getTarget()));
  1615. // }
  1616. if((c instanceof CInt || c instanceof CDecimal) && next1.type == TType.DOT && next2.type == TType.LIT) {
  1617. // make CDouble/CDecimal here because otherwise Long.parseLong() will remove
  1618. // minus zero before decimals and leading zeroes after decimals
  1619. try {
  1620. if(t.value.startsWith("0m")) {
  1621. // CDecimal
  1622. String neg = "";
  1623. if(prev1.value.equals("-")) {
  1624. neg = "-";
  1625. }
  1626. c = new CDecimal(neg + t.value.substring(2) + '.' + next2.value, t.target);
  1627. } else {
  1628. // CDouble
  1629. c = new CDouble(Double.parseDouble(t.val() + '.' + next2.val()), t.target);
  1630. }
  1631. i += 2;
  1632. } catch (NumberFormatException e) {
  1633. // Not a double
  1634. }
  1635. }
  1636. tree.addChild(new ParseTree(c, fileOptions));
  1637. constructCount.peek().incrementAndGet();
  1638. } else if(t.type.equals(TType.STRING) || t.type.equals(TType.COMMAND)) {
  1639. tree.addChild(new ParseTree(new CString(t.val(), t.target), fileOptions));
  1640. constructCount.peek().incrementAndGet();
  1641. } else if(t.type.equals(TType.IDENTIFIER)) {
  1642. tree.addChild(new ParseTree(new CPreIdentifier(t.val(), t.target), fileOptions));
  1643. constructCount.peek().incrementAndGet();
  1644. } else if(t.type.isKeyword()) {
  1645. tree.addChild(new ParseTree(new CKeyword(t.val(), t.getTarget()), fileOptions));
  1646. constructCount.peek().incrementAndGet();
  1647. } else if(t.type.equals(TType.IVARIABLE)) {
  1648. tree.addChild(new ParseTree(new IVariable(t.val(), t.target), fileOptions));
  1649. constructCount.peek().incrementAndGet();
  1650. } else if(t.type.equals(TType.UNKNOWN)) {
  1651. tree.addChild(new ParseTree(Static.resolveConstruct(t.val(), t.target), fileOptions));
  1652. constructCount.peek().incrementAndGet();
  1653. } else if(t.type.isSymbol()) { //Logic and math symbols
  1654. // Attempt to find "-@var" and change it to "neg(@var)" if it's not @a - @b. Else just add the symbol.
  1655. // Also handles "-function()" and "-@var[index]".
  1656. if(t.type.equals(TType.MINUS) && !prev1.type.isAtomicLit() && !prev1.type.equals(TType.IVARIABLE)
  1657. && !prev1.type.equals(TType.VARIABLE) && !prev1.type.equals(TType.RCURLY_BRACKET)
  1658. && !prev1.type.equals(TType.RSQUARE_BRACKET) && !prev1.type.equals(TType.FUNC_END)
  1659. && (next1.type.equals(TType.IVARIABLE) || next1.type.equals(TType.VARIABLE) || next1.type.equals(TType.FUNC_NAME))) {
  1660. // Check if we are negating a value from an array, function or variable.
  1661. if(next2.type.equals(TType.LSQUARE_BRACKET)) {
  1662. minusArrayStack.push(new AtomicInteger(arrayStack.size() + 1)); // +1 because the bracket isn't counted yet.
  1663. } else if(next1.type.equals(TType.FUNC_NAME)) {
  1664. minusFuncStack.push(new AtomicInteger(parens + 1)); // +1 because the function isn't counted yet.
  1665. } else {
  1666. ParseTree negTree = new ParseTree(new CFunction(neg.NAME, unknown), fileOptions);
  1667. negTree.addChild(new ParseTree(new IVariable(next1.value, next1.target), fileOptions));
  1668. tree.addChild(negTree);
  1669. constructCount.peek().incrementAndGet();
  1670. i++; // Skip the next variable as we've just handled it.
  1671. }
  1672. } else {
  1673. tree.addChild(new ParseTree(new CSymbol(t.val(), t.type, t.target), fileOptions));
  1674. constructCount.peek().incrementAndGet();
  1675. }
  1676. } else if(t.type == TType.DOT) {
  1677. // Check for doubles that start with a decimal, otherwise concat
  1678. Construct c = null;
  1679. if(next1.type == TType.LIT && prev1.type != TType.STRING && prev1.type != TType.SMART_STRING) {
  1680. try {
  1681. c = new CDouble(Double.parseDouble('.' + next1.val()), t.target);
  1682. i++;
  1683. } catch (NumberFormatException e) {
  1684. // Not a double
  1685. }
  1686. }
  1687. if(c == null) {
  1688. c = new CSymbol(".", TType.CONCAT, t.target);
  1689. }
  1690. tree.addChild(new ParseTree(c, fileOptions));
  1691. constructCount.peek().incrementAndGet();
  1692. } else if(t.type.equals(TType.VARIABLE) || t.type.equals(TType.FINAL_VAR)) {
  1693. tree.addChild(new ParseTree(new Variable(t.val(), null, false, t.type.equals(TType.FINAL_VAR), t.target), fileOptions));
  1694. constructCount.peek().incrementAndGet();
  1695. //right_vars.add(new Variable(t.val(), null, t.line_num));
  1696. }
  1697. }
  1698. assert t != null || stream.size() == 0;
  1699. // Handle mismatching square brackets "[]".
  1700. assert arrayStack.size() != 0 : "The last element of arrayStack should be present, but it was popped.";
  1701. if(arrayStack.size() != 1) {
  1702. // Some starting square bracket '[' was not closed at the end of the script.
  1703. // Find the last '[' that was not closed and use that as target instead of the last line of the script.
  1704. Target target = traceMismatchedOpenToken(stream, TType.LSQUARE_BRACKET, TType.RSQUARE_BRACKET);
  1705. assert target != null : "Mismatched bracket was detected, but target-finding code could not find it.";
  1706. if(target == null) {
  1707. target = t.target;
  1708. }
  1709. // Throw a CRE.
  1710. throw new ConfigCompileException("Mismatched square brackets", target);
  1711. }
  1712. // Handle mismatching parentheses "()".
  1713. if(parens != 0) {
  1714. // Some starting parentheses '(' was not closed at the end of the script.
  1715. // Find the last '(' that was not closed and use that as target instead of the last line of the script.
  1716. Target target = traceMismatchedOpenToken(stream, TType.FUNC_START, TType.FUNC_END);
  1717. assert target != null : "Mismatched parentheses was detected, but target-finding code could not find it.";
  1718. if(target == null) {
  1719. target = t.target;
  1720. }
  1721. // Throw a CRE.
  1722. throw new ConfigCompileException("Mismatched parentheses", target);
  1723. }
  1724. // Handle mismatching curly braces "{}".
  1725. if(braceCount != 0) {
  1726. // Some starting curly brace '{' was not closed at the end of the script.
  1727. // Find the last '{' that was not closed and use that as target instead of the last line of the script.
  1728. Target target = traceMismatchedOpenToken(stream, TType.LCURLY_BRACKET, TType.RCURLY_BRACKET);
  1729. assert target != null : "Mismatched curly brace was detected, but target-finding code could not find it.";
  1730. if(target == null) {
  1731. target = t.target;
  1732. }
  1733. // Throw a CRE.
  1734. throw new ConfigCompileException("Mismatched curly braces", target);
  1735. }
  1736. // Assert that the parents stack does not have unexpected unhandled elements remaining.
  1737. assert parents.size() == 2 : "Expected exactly the root and autoconcat nodes on parents stack.";
  1738. assert parents.pop() == tree : "Mismatching stack element.";
  1739. assert parents.pop() == rootNode : "Expected the last element of the stack to be the root node.";
  1740. assert rootNode.getChildAt(0) == tree : "Expected tree to be the first child of the root node.";
  1741. // Process the AST.
  1742. Stack<List<Procedure>> procs = new Stack<>();
  1743. procs.add(new ArrayList<>());
  1744. processKeywords(tree, environment, compilerErrors);
  1745. rewriteAutoconcats(tree, environment, envs, compilerErrors);
  1746. checkLinearComponents(tree, environment, compilerErrors);
  1747. postParseRewrite(rootNode, environment, envs, compilerErrors); // Pass rootNode since this might rewrite 'tree'.
  1748. tree = rootNode.getChildAt(0);
  1749. if(staticAnalysis != null) {
  1750. staticAnalysis.analyze(tree, environment, envs, compilerErrors);
  1751. }
  1752. optimize(tree, environment, envs, procs, compilerErrors);
  1753. link(tree, compilerErrors);
  1754. checkFunctionsExist(tree, compilerErrors, envs);
  1755. checkLabels(tree, compilerErrors);
  1756. checkBreaks(tree, compilerErrors);
  1757. if(staticAnalysis == null) {
  1758. checkUnhandledCompilerConstructs(tree, environment, compilerErrors);
  1759. }
  1760. if(!compilerErrors.isEmpty()) {
  1761. if(compilerErrors.size() == 1) {
  1762. // Just throw the one CCE
  1763. throw compilerErrors.iterator().next();
  1764. } else {
  1765. throw new ConfigCompileGroupException(compilerErrors);
  1766. }
  1767. }
  1768. eliminateDeadCode(tree, environment, envs);
  1769. return rootNode;
  1770. }
  1771. private static void checkLinearComponents(ParseTree tree, Environment env,
  1772. Set<ConfigCompileException> compilerErrors) {
  1773. for(ParseTree m : tree.getAllNodes()) {
  1774. if(m.getData() instanceof CBareString && !(m.getData() instanceof CKeyword)) {
  1775. if(m.getFileOptions().isStrict()) {
  1776. compilerErrors.add(new ConfigCompileException("Use of bare strings in strict mode is not"
  1777. + " allowed.", m.getTarget()));
  1778. } else {
  1779. env.getEnv(CompilerEnvironment.class).addCompilerWarning(m.getFileOptions(),
  1780. new CompilerWarning("Use of bare string", m.getTarget(),
  1781. FileOptions.SuppressWarning.UseBareStrings));
  1782. return; // for now, only one warning per file
  1783. }
  1784. }
  1785. }
  1786. }
  1787. /**
  1788. * Trace target of mismatching open tokens such as '(' in '()' or '{' in '{}'. This should be used when it is
  1789. * known that there are more start than close tokens, but no target is known for the extra start token.
  1790. * @param stream - The token stream to scan.
  1791. * @param openType - The open type, which would be {@link TType#FUNC_START (} for a parentheses check.
  1792. * @param closeType - The close type, which would be {@link TType#FUNC_END )} for a parentheses check.
  1793. * @return The target of the last occurrence of the opening type that did not have a matching closing type.
  1794. * Returns null of no target was found.
  1795. */
  1796. private static Target traceMismatchedOpenToken(TokenStream stream, TType openType, TType closeType) {
  1797. // Some starting parentheses '(' was not closed at the end of the script.
  1798. // Find the last '(' that was not closed and use that as target instead of the last line of the script.
  1799. Iterator<Token> iterator = stream.descendingIterator();
  1800. int closingCount = 0;
  1801. while(iterator.hasNext()) {
  1802. Token token = iterator.next();
  1803. if(token.type == closeType) {
  1804. closingCount++;
  1805. } else if(token.type == openType) {
  1806. if(closingCount <= 0) {
  1807. return token.target;
  1808. }
  1809. closingCount--;
  1810. }
  1811. }
  1812. return null;
  1813. }
  1814. /**
  1815. * Recurses down the tree and ensures that breaks don't bubble up past procedures or the root code tree.
  1816. *
  1817. * @param tree
  1818. * @throws ConfigCompileException
  1819. */
  1820. private static void checkBreaks(ParseTree tree, Set<ConfigCompileException> compilerExceptions) {
  1821. checkBreaks0(tree, 0, null, compilerExceptions);
  1822. }
  1823. private static void checkBreaks0(ParseTree tree, long currentLoops, String lastUnbreakable, Set<ConfigCompileException> compilerErrors) {
  1824. if(!(tree.getData() instanceof CFunction)) {
  1825. //Don't care about these
  1826. return;
  1827. }
  1828. if(!((CFunction) tree.getData()).hasFunction()) {
  1829. //We need to recurse, but this is not expected to be a function
  1830. for(ParseTree child : tree.getChildren()) {
  1831. checkBreaks0(child, currentLoops, lastUnbreakable, compilerErrors);
  1832. }
  1833. return;
  1834. }
  1835. Function func;
  1836. try {
  1837. func = ((CFunction) tree.getData()).getFunction();
  1838. } catch (ConfigCompileException ex) {
  1839. compilerErrors.add(ex);
  1840. return;
  1841. }
  1842. if(func.getClass().getAnnotation(nolinking.class) != null) {
  1843. // Don't link here
  1844. return;
  1845. }
  1846. // We have special handling for procs and closures, and of course break and the loops.
  1847. // If any of these are here, we kick into special handling mode. Otherwise, we recurse.
  1848. if(func instanceof ControlFlow._break) {
  1849. // First grab the counter in the break function. If the break function doesn't
  1850. // have any children, then 1 is implied. break() requires the argument to be
  1851. // a CInt, so if it weren't, there should be a compile error.
  1852. long breakCounter = 1;
  1853. if(tree.getChildren().size() == 1) {
  1854. try {
  1855. breakCounter = ArgumentValidation.getInt32(tree.getChildAt(0).getData(), tree.getChildAt(0).getTarget());
  1856. } catch (CRECastException | CRERangeException e) {
  1857. compilerErrors.add(new ConfigCompileException(e));
  1858. return;
  1859. }
  1860. }
  1861. if(breakCounter > currentLoops) {
  1862. // Throw an exception, as this would break above a loop. Different error messages
  1863. // are applied to different cases
  1864. if(currentLoops == 0) {
  1865. compilerErrors.add(new ConfigCompileException("The break() function can only break out of loops" + (lastUnbreakable == null ? "."
  1866. : ", but an attempt to break out of a " + lastUnbreakable + " was detected."), tree.getTarget()));
  1867. } else {
  1868. compilerErrors.add(new ConfigCompileException("Too many breaks"
  1869. + " detected. Check your loop nesting, and set the break count to an appropriate value.", tree.getTarget()));
  1870. }
  1871. }
  1872. return;
  1873. }
  1874. if(func.getClass().getAnnotation(unbreakable.class) != null) {
  1875. // Parse the children like normal, but reset the counter to 0.
  1876. for(ParseTree child : tree.getChildren()) {
  1877. checkBreaks0(child, 0, func.getName(), compilerErrors);
  1878. }
  1879. return;
  1880. }
  1881. if(func.getClass().getAnnotation(breakable.class) != null) {
  1882. // Don't break yet, still recurse, but up our current loops counter.
  1883. currentLoops++;
  1884. }
  1885. for(ParseTree child : tree.getChildren()) {
  1886. checkBreaks0(child, currentLoops, lastUnbreakable, compilerErrors);
  1887. }
  1888. }
  1889. // private static void processLinearComponents(ParseTree tree, Set<ConfigCompileException> compilerErrors) {
  1890. // if(tree.hasChildren()) {
  1891. // for(ParseTree child : tree.getChildren()) {
  1892. // processLinearComponents(child, compilerErrors);
  1893. // }
  1894. // // Process bare string "concatenation"
  1895. // for(int i = 0; i < tree.getChildren().size(); i++) {
  1896. // ParseTree data = tree.getChildAt(i);
  1897. // ParseTree data2 = null;
  1898. // if(i < tree.getChildren().size() - 1) {
  1899. // data2 = tree.getChildAt(i + 1);
  1900. // }
  1901. // if(data2 != null) {
  1902. // if(data.getData() instanceof CBareString && data2.getData() instanceof CSymbol
  1903. // && ((CSymbol) data2.getData()).isConcatenation()) {
  1904. //
  1905. // }
  1906. // }
  1907. // }
  1908. // }
  1909. // // If there are no children, there's nothing to do right now, so just skip this invocation
  1910. // }
  1911. // private static void processBareStrings(ParseTree root, Set<ConfigCompileException> compilerExceptions) {
  1912. // if(root.hasChildren()) {
  1913. // for(ParseTree child : root.getChildren()) {
  1914. // processBareStrings(child, compilerExceptions);
  1915. // }
  1916. // }
  1917. // // We need to first remove the CBareStrings, and convert them to CStrings (or CClassType or issue a compiler
  1918. // // warning, depending on the case), as the rest of these methods assume CStrings.
  1919. // List<ParseTree> temp = new ArrayList<>(root.getChildren());
  1920. // checkClassType: for(int i = 0; i < temp.size() - 1; i++) {
  1921. // ParseTree node = temp.get(i);
  1922. // ParseTree next = temp.get(i + 1);
  1923. // if(node.getData() instanceof CBareString && next.getData() instanceof CSymbol
  1924. // && ((CSymbol) next.getData()).isConcatenation()) {
  1925. // // Concatenation of bare strings. We need to look at the whole chain and see if it's a valid
  1926. // // type or not, and if not, issue an error.
  1927. // String type = node.getData().val() + ".";
  1928. // temp.remove(i);
  1929. // temp.remove(i);
  1930. // for(int j = i; j < temp.size(); j++) {
  1931. // ParseTree jNode = temp.get(j);
  1932. // ParseTree jNext = null;
  1933. // if(j < temp.size() - 1) {
  1934. // jNext = temp.get(j + 1);
  1935. // }
  1936. // if(jNode.getData() instanceof CBareString) {
  1937. // type += jNode.getData().val();
  1938. // temp.remove(j);
  1939. // if(jNext != null && jNext.getData() instanceof CSymbol
  1940. // && ((CSymbol) jNext.getData()).isConcatenation()) {
  1941. // // Continue the chain
  1942. // type += ".";
  1943. // temp.remove(j);
  1944. // j--;
  1945. // } else {
  1946. // // End of the chain, break here.
  1947. // break;
  1948. // }
  1949. // } else {
  1950. // // This is completely unexpected, and means that we are concatenating a bare string with
  1951. // // some other data type. We'll reset list, and let the rest of the code take over.
  1952. // temp = root.getChildren();
  1953. // break checkClassType;
  1954. // }
  1955. // }
  1956. // // TODO: Once compiler environments are added, we would need to check to see if the value here is a custom
  1957. // // type. However, as it stands, since we only support the native types, we will just hardcode the check here.
  1958. // String fqType = NativeTypeList.resolveNativeType(type);
  1959. // if(fqType != null) {
  1960. // try {
  1961. // temp.add(i, new ParseTree(CClassType.get(FullyQualifiedClassName
  1962. // .forFullyQualifiedClass(fqType)), node.getFileOptions()));
  1963. // } catch(ClassNotFoundException ex) {
  1964. // throw new RuntimeException(ex);
  1965. // }
  1966. // } else {
  1967. // compilerExceptions.add(new ConfigCompileException("Invalid/Unknown type: " + type, node.getTarget()));
  1968. // return;
  1969. // }
  1970. // i--;
  1971. // }
  1972. // }
  1973. // root.setChildren(temp);
  1974. // // Now, any bare strings that remain are an error in strict mode, or need to be converted to CStrings
  1975. // // in non-strict mode. There is one exception though, if the string is a class type, then it was a
  1976. // // not fully qualified class name, which is allowed, so in that case, we convert it to CClassType.
  1977. // for(int i = 0; i < root.getChildren().size(); i++) {
  1978. // ParseTree node = root.getChildren().get(i);
  1979. // if(node.getData() instanceof CBareString) {
  1980. // String fqType = NativeTypeList.resolveNativeType(node.getData().val());
  1981. // if(fqType != null) {
  1982. // root.getChildren().remove(i);
  1983. // try {
  1984. // root.getChildren().add(i, new ParseTree(CClassType.get(FullyQualifiedClassName
  1985. // .forFullyQualifiedClass(fqType)), node.getFileOptions()));
  1986. // } catch(ClassNotFoundException ex) {
  1987. // throw new RuntimeException(ex);
  1988. // }
  1989. // continue;
  1990. // }
  1991. // if(node.getFileOptions().isStrict()) {
  1992. // compilerExceptions.add(new ConfigCompileException("Bare strings are not allowed in strict mode.",
  1993. // node.getTarget()));
  1994. // } else {
  1995. // root.getChildren().remove(i);
  1996. // root.getChildren().add(i, new ParseTree(new CString(node.getData().val(), node.getTarget()),
  1997. // node.getFileOptions()));
  1998. // }
  1999. // }
  2000. // }
  2001. // }
  2002. /**
  2003. * Rewrites __autoconcat__ AST nodes to executable AST nodes. This should be called before AST optimization,
  2004. * static analysis and anything else that requires a fully executable AST.
  2005. * When this method returns, any __autoconcat__ that did not contain compile errors has been rewritten.
  2006. *
  2007. * @param root
  2008. * @param env
  2009. * @param envs
  2010. * @param compilerExceptions
  2011. */
  2012. private static void rewriteAutoconcats(ParseTree root, Environment env,
  2013. Set<Class<? extends Environment.EnvironmentImpl>> envs, Set<ConfigCompileException> compilerExceptions) {
  2014. for(ParseTree child : root.getChildren()) {
  2015. if(child.hasChildren()) {
  2016. rewriteAutoconcats(child, env, envs, compilerExceptions);
  2017. }
  2018. }
  2019. if(root.getData() instanceof CFunction && root.getData().val().equals(__autoconcat__.NAME)) {
  2020. // In non-strict mode, let __autoconcat__ glue arguments together with sconcat.
  2021. boolean returnSConcat = !root.getFileOptions().isStrict();
  2022. try {
  2023. ParseTree ret = ((Compiler.__autoconcat__) ((CFunction) root.getData()).getFunction())
  2024. .rewrite(root.getChildren(), returnSConcat, envs);
  2025. root.setData(ret.getData());
  2026. root.setChildren(ret.getChildren());
  2027. } catch (ConfigCompileException ex) {
  2028. compilerExceptions.add(ex);
  2029. return;
  2030. }
  2031. // __autoconcat__'s AST rewrite can include new __autoconcat__'s, so handle them again.
  2032. rewriteAutoconcats(root, env, envs, compilerExceptions);
  2033. }
  2034. }
  2035. /**
  2036. * Allows functions to perform a rewrite step to rewrite the AST as received from the parser to a valid
  2037. * executable AST. Optimizations should not yet be performed in this rewrite step.
  2038. * Additionally, this step traverses all {@link CFunction} nodes and ensures that they either have their represented
  2039. * function cached or are unknown by the compiler.
  2040. * Traversal is pre-order depth-first.
  2041. * @param ast - The abstract syntax tree representing this function.
  2042. * @param env - The environment.
  2043. * @param envs - The set of expected environment classes at runtime.
  2044. * @param exceptions - A set to put compile errors in.
  2045. * @return The rewritten AST node that should completely replace the AST node representing this function, or
  2046. * {@code null} to not replace this AST node. Note that the rewrite will be called on this newly returned AST node
  2047. * if it is different from the passed node.
  2048. */
  2049. private static ParseTree postParseRewrite(ParseTree ast, Environment env,
  2050. Set<Class<? extends Environment.EnvironmentImpl>> envs, Set<ConfigCompileException> exceptions) {
  2051. Mixed node = ast.getData();
  2052. if(node instanceof CFunction) {
  2053. CFunction cFunc = (CFunction) node;
  2054. if(cFunc.hasFunction()) {
  2055. try {
  2056. Function func = cFunc.getFunction();
  2057. ParseTree newAst = func.postParseRewrite(ast, env, envs, exceptions);
  2058. if(newAst != null) {
  2059. ast = newAst;
  2060. }
  2061. } catch (ConfigCompileException ex) {
  2062. // Unknown function. This will be handled later.
  2063. }
  2064. }
  2065. }
  2066. for(int i = 0; i < ast.numberOfChildren(); i++) {
  2067. ParseTree child = ast.getChildAt(i);
  2068. ParseTree newChild = postParseRewrite(child, env, envs, exceptions);
  2069. if(newChild != null && child != newChild) {
  2070. ast.getChildren().set(i, newChild);
  2071. i--; // Allow the new child to do a rewrite step as well.
  2072. }
  2073. }
  2074. return ast;
  2075. }
  2076. /**
  2077. * Recurses down the tree and ensures that there are no dynamic labels. This has to finish completely after
  2078. * optimization, because the optimizer has no good hook to know when optimization for a unit is fully completed,
  2079. * until ALL units are fully complete, so this happens separately after optimization, but as apart of the normal
  2080. * compile process.
  2081. *
  2082. * @param tree
  2083. * @throws ConfigCompileException
  2084. */
  2085. private static void checkLabels(ParseTree tree, Set<ConfigCompileException> compilerErrors) throws ConfigCompileException {
  2086. // for(ParseTree t : tree.getChildren()){
  2087. // if(t.getData() instanceof CLabel){
  2088. // if(((CLabel)t.getData()).cVal() instanceof IVariable){
  2089. // throw new ConfigCompileException("Variables may not be used as labels", t.getTarget());
  2090. // }
  2091. // }
  2092. // checkLabels(t);
  2093. // }
  2094. }
  2095. /**
  2096. * Recurses down the tree and
  2097. * <ul>
  2098. * <li>Links functions</li>
  2099. * <li>Validates function argument size</li>
  2100. * </ul>
  2101. * This should be called after {@link #optimize(ParseTree, Environment, Set, Stack, Set)} so that functions with
  2102. * custom linkage only get linked when they are not removed during optimization.
  2103. *
  2104. *
  2105. * @param tree
  2106. * @param compilerErrors
  2107. */
  2108. private static void link(ParseTree tree, Set<ConfigCompileException> compilerErrors) {
  2109. if(tree.getData() instanceof CFunction) {
  2110. Function function = ((CFunction) tree.getData()).getCachedFunction();
  2111. // Check the argument count, and do any custom linking the function may have.
  2112. if(function != null) {
  2113. if(function.getClass().getAnnotation(nolinking.class) != null) {
  2114. // Don't link children of a nolinking function.
  2115. return;
  2116. }
  2117. Integer[] numArgs = function.numArgs();
  2118. if(!Arrays.asList(numArgs).contains(Integer.MAX_VALUE)
  2119. && !Arrays.asList(numArgs).contains(tree.getChildren().size())) {
  2120. compilerErrors.add(new ConfigCompileException("Incorrect number of arguments passed to "
  2121. + tree.getData().val(), tree.getData().getTarget()));
  2122. }
  2123. if(function instanceof Optimizable) {
  2124. Optimizable op = (Optimizable) function;
  2125. if(op.optimizationOptions().contains(OptimizationOption.CUSTOM_LINK)) {
  2126. try {
  2127. op.link(tree.getData().getTarget(), tree.getChildren());
  2128. } catch (ConfigRuntimeException ex) {
  2129. compilerErrors.add(new ConfigCompileException(ex));
  2130. } catch (ConfigCompileException ex) {
  2131. compilerErrors.add(ex);
  2132. }
  2133. }
  2134. }
  2135. }
  2136. }
  2137. // Walk the children.
  2138. for(ParseTree child : tree.getChildren()) {
  2139. if(child.getData() instanceof CFunction) {
  2140. link(child, compilerErrors);
  2141. }
  2142. }
  2143. }
  2144. /**
  2145. * Recurses down the tree and checks whether functions exist in the given environments, generating compile errors
  2146. * if they don't. This should be called after optimization, since it's okay to use undefined functions as long as
  2147. * static optimization can determine that they are never called in the current environment.
  2148. * This check ignores child nodes of functions with the {@link nolinking} annotation.
  2149. * @param tree
  2150. */
  2151. private static void checkFunctionsExist(ParseTree tree, Set<ConfigCompileException> compilerErrors,
  2152. Set<Class<? extends Environment.EnvironmentImpl>> envs) {
  2153. // Ignore non-CFunction nodes.
  2154. if(tree.getData() instanceof CFunction) {
  2155. // Check current node, returning if it is a 'nolinking' function.
  2156. CFunction cFunc = (CFunction) tree.getData();
  2157. if(cFunc.hasFunction()) {
  2158. FunctionBase func = cFunc.getCachedFunction(envs);
  2159. lookup: {
  2160. if(func == null) {
  2161. // Technically, we could be dealing with a FunctionBase that isn't cached. So do another lookup.
  2162. try {
  2163. func = FunctionList.getFunction(cFunc, envs);
  2164. } catch (ConfigCompileException ex) {
  2165. compilerErrors.add(ex);
  2166. break lookup;
  2167. }
  2168. }
  2169. if(func.getClass().getAnnotation(nolinking.class) != null) {
  2170. return; // Don't check children of 'nolinking' functions.
  2171. }
  2172. }
  2173. }
  2174. // Recursively check children.
  2175. for(ParseTree child : tree.getChildren()) {
  2176. checkFunctionsExist(child, compilerErrors, envs);
  2177. }
  2178. }
  2179. }
  2180. /**
  2181. * Recurses down into the tree, attempting to optimize where possible. A few things have strong coupling, for
  2182. * information on these items, see the documentation included in the source.
  2183. *
  2184. * @param tree
  2185. * @return
  2186. */
  2187. private static void optimize(ParseTree tree, Environment env,
  2188. Set<Class<? extends Environment.EnvironmentImpl>> envs, Stack<List<Procedure>> procs,
  2189. Set<ConfigCompileException> compilerErrors) {
  2190. if(tree.isOptimized()) {
  2191. return; //Don't need to re-run this
  2192. }
  2193. // if(tree.getData() instanceof CIdentifier) {
  2194. // optimize(((CIdentifier) tree.getData()).contained(), procs);
  2195. // return;
  2196. // }
  2197. if(!(tree.getData() instanceof CFunction)) {
  2198. //There's no way to optimize something that's not a function
  2199. return;
  2200. }
  2201. //If it is a proc definition, we need to go ahead and see if we can add it to the const proc stack
  2202. if(tree.getData().val().equals("proc")) {
  2203. procs.push(new ArrayList<>());
  2204. }
  2205. CFunction cFunction = (CFunction) tree.getData();
  2206. Function func = cFunction.getCachedFunction(envs);
  2207. if(func != null) {
  2208. if(func.getClass().getAnnotation(nolinking.class) != null) {
  2209. //It's an unlinking function, so we need to stop at this point
  2210. return;
  2211. }
  2212. }
  2213. List<ParseTree> children = tree.getChildren();
  2214. if(func instanceof Optimizable && ((Optimizable) func).optimizationOptions()
  2215. .contains(OptimizationOption.PRIORITY_OPTIMIZATION)) {
  2216. // This is a priority optimization function, meaning it needs to be optimized before its children are.
  2217. // This is required when optimization of the children could cause different internal behavior, for instance
  2218. // if this function is expecting the precense of soem code element, but the child gets optimized out, this
  2219. // would cause an error, even though the user did in fact provide code in that section.
  2220. try {
  2221. ((Optimizable) func).optimizeDynamic(tree.getTarget(), env, envs, children, tree.getFileOptions());
  2222. } catch (ConfigCompileException ex) {
  2223. // If an error occurs, we will skip the rest of this element
  2224. compilerErrors.add(ex);
  2225. return;
  2226. } catch (ConfigRuntimeException ex) {
  2227. compilerErrors.add(new ConfigCompileException(ex));
  2228. return;
  2229. }
  2230. }
  2231. boolean fullyStatic = true;
  2232. boolean hasIVars = false;
  2233. for(ParseTree node : children) {
  2234. if(node.getData() instanceof CFunction) {
  2235. optimize(node, env, envs, procs, compilerErrors);
  2236. }
  2237. if(node.getData() instanceof Construct) {
  2238. Construct d = (Construct) node.getData();
  2239. if(d.isDynamic() || (d instanceof IVariable)) {
  2240. fullyStatic = false;
  2241. }
  2242. }
  2243. if(node.getData() instanceof IVariable) {
  2244. hasIVars = true;
  2245. }
  2246. }
  2247. //In all cases, at this point, we are either unable to optimize, or we will
  2248. //optimize, so set our optimized variable at this point.
  2249. tree.setOptimized(true);
  2250. if(func == null) {
  2251. //It's a proc call. Let's see if we can optimize it
  2252. Procedure p = null;
  2253. loop:
  2254. for(List<Procedure> proc : procs) {
  2255. for(Procedure pp : proc) {
  2256. if(pp.getName().equals(cFunction.val())) {
  2257. p = pp;
  2258. break loop;
  2259. }
  2260. }
  2261. }
  2262. if(p != null) {
  2263. try {
  2264. Mixed c = DataHandling.proc.optimizeProcedure(p.getTarget(), p, children);
  2265. if(c != null) {
  2266. tree.setData(c);
  2267. tree.removeChildren();
  2268. return;
  2269. } //else Nope, couldn't optimize.
  2270. } catch (ConfigRuntimeException ex) {
  2271. //Cool. Caught a runtime error at compile time :D
  2272. compilerErrors.add(new ConfigCompileException(ex));
  2273. }
  2274. }
  2275. //else this procedure isn't listed yet. Maybe a compiler error, maybe not, depends,
  2276. //so we can't for sure say, but we do know we can't optimize this
  2277. return;
  2278. }
  2279. if(tree.getData().val().equals("proc")) {
  2280. //Check for too few arguments
  2281. if(children.size() < 2) {
  2282. compilerErrors.add(new ConfigCompileException("Incorrect number of arguments passed to proc",
  2283. tree.getData().getTarget()));
  2284. return;
  2285. }
  2286. //We just went out of scope, so we need to pop the layer of Procedures that
  2287. //are internal to us
  2288. procs.pop();
  2289. //However, as a special function, we *might* be able to get a const proc out of this
  2290. //Let's see.
  2291. try {
  2292. ParseTree root = new ParseTree(
  2293. new CFunction(__autoconcat__.NAME, Target.UNKNOWN), tree.getFileOptions());
  2294. Script fakeScript = Script.GenerateScript(root, "*", null);
  2295. // Environment env = null;
  2296. // try {
  2297. // if(Implementation.GetServerType().equals(Implementation.Type.BUKKIT)) {
  2298. // CommandHelperPlugin plugin = CommandHelperPlugin.self;
  2299. // GlobalEnv gEnv = new GlobalEnv(plugin.executionQueue, plugin.profiler, plugin.persistenceNetwork,
  2300. // MethodScriptFileLocations.getDefault().getConfigDirectory(), plugin.profiles, new TaskManagerImpl());
  2301. // env = Environment.createEnvironment(gEnv, new CommandHelperEnvironment());
  2302. // } else {
  2303. // env = Static.GenerateStandaloneEnvironment(false);
  2304. // }
  2305. // } catch (IOException | DataSourceException | URISyntaxException | Profiles.InvalidProfileException e) {
  2306. // //
  2307. // }
  2308. if(env.hasEnv(GlobalEnv.class)) {
  2309. // For testing, we frequently set this to null, so check this first.
  2310. env.getEnv(GlobalEnv.class).SetFlag("no-check-undefined", true);
  2311. }
  2312. Procedure myProc = DataHandling.proc.getProcedure(tree.getTarget(), env, fakeScript, children.toArray(new ParseTree[children.size()]));
  2313. if(env.hasEnv(GlobalEnv.class)) {
  2314. env.getEnv(GlobalEnv.class).ClearFlag("no-check-undefined");
  2315. }
  2316. procs.peek().add(myProc); //Yep. So, we can move on with our lives now, and if it's used later, it could possibly be static.
  2317. } catch (ConfigRuntimeException e) {
  2318. //Well, they have an error in there somewhere
  2319. compilerErrors.add(new ConfigCompileException(e));
  2320. } catch (NullPointerException e) {
  2321. //Nope, can't optimize.
  2322. return;
  2323. }
  2324. }
  2325. //the compiler trick functions know how to deal with it specially, even if everything isn't
  2326. //static, so do this first.
  2327. String oldFunctionName = func.getName();
  2328. Set<OptimizationOption> options = NO_OPTIMIZATIONS;
  2329. if(func instanceof Optimizable) {
  2330. options = ((Optimizable) func).optimizationOptions();
  2331. }
  2332. if(options.contains(OptimizationOption.OPTIMIZE_DYNAMIC)) {
  2333. try {
  2334. ParseTree tempNode;
  2335. try {
  2336. for(ParseTree child : tree.getChildren()) {
  2337. if(child.getData() instanceof CSymbol) {
  2338. throw new ConfigCompileException("Unexpected symbol", tree.getData().getTarget());
  2339. }
  2340. }
  2341. tempNode = ((Optimizable) func).optimizeDynamic(tree.getData().getTarget(), env, envs,
  2342. tree.getChildren(), tree.getFileOptions());
  2343. } catch (ConfigRuntimeException e) {
  2344. //Turn it into a compile exception, then rethrow
  2345. throw new ConfigCompileException(e);
  2346. } catch (Error t) {
  2347. throw new Error("The linked Error had a code target on or around " + tree.getData().getTarget(), t);
  2348. }
  2349. if(tempNode == Optimizable.PULL_ME_UP) {
  2350. if(tree.hasChildren()) {
  2351. tempNode = tree.getChildAt(0);
  2352. } else {
  2353. tempNode = null;
  2354. }
  2355. }
  2356. if(tempNode == Optimizable.REMOVE_ME) {
  2357. tree.setData(new CFunction(p.NAME, Target.UNKNOWN));
  2358. tree.removeChildren();
  2359. } else if(tempNode != null) {
  2360. tree.setData(tempNode.getData());
  2361. tree.setOptimized(tempNode.isOptimized());
  2362. tree.setChildren(tempNode.getChildren());
  2363. Construct.SetWasIdentifierHelper(tree.getData(), tempNode.getData(), false);
  2364. optimize(tree, env, envs, procs, compilerErrors);
  2365. tree.setOptimized(true);
  2366. //Some functions can actually make static the arguments, for instance, by pulling up a hardcoded
  2367. //array, so if they have reversed this, make note of that now
  2368. if(tempNode.hasBeenMadeStatic()) {
  2369. fullyStatic = true;
  2370. }
  2371. } //else it wasn't an optimization, but a compile check
  2372. } catch (ConfigCompileException ex) {
  2373. compilerErrors.add(ex);
  2374. // Also turn off optimizations for the rest of this flow, so we don't try the other optimization
  2375. // mechanisms, which are also bound to fail.
  2376. options = NO_OPTIMIZATIONS;
  2377. }
  2378. }
  2379. if(!fullyStatic) {
  2380. return;
  2381. }
  2382. //Otherwise, everything is static, or an IVariable and we can proceed.
  2383. //Note since we could still have IVariables, we have to handle those
  2384. //specially from here forward
  2385. if(func.preResolveVariables() && hasIVars) {
  2386. //Well, this function isn't equipped to deal with IVariables.
  2387. return;
  2388. }
  2389. //It could have optimized by changing the name, in that case, we
  2390. //don't want to run this now
  2391. if(tree.getData().val().equals(oldFunctionName)
  2392. && (options.contains(OptimizationOption.OPTIMIZE_CONSTANT) || options.contains(OptimizationOption.CONSTANT_OFFLINE))) {
  2393. Mixed[] constructs = new Mixed[tree.getChildren().size()];
  2394. for(int i = 0; i < tree.getChildren().size(); i++) {
  2395. constructs[i] = tree.getChildAt(i).getData();
  2396. }
  2397. try {
  2398. try {
  2399. Mixed result;
  2400. if(options.contains(OptimizationOption.CONSTANT_OFFLINE)) {
  2401. List<Integer> numArgsList = Arrays.asList(func.numArgs());
  2402. if(!numArgsList.contains(Integer.MAX_VALUE)
  2403. && !numArgsList.contains(tree.getChildren().size())) {
  2404. compilerErrors.add(new ConfigCompileException("Incorrect number of arguments passed to "
  2405. + tree.getData().val(), tree.getData().getTarget()));
  2406. result = null;
  2407. } else {
  2408. // // TODO: This should probably be moved up outside of this single method, and create a
  2409. // // compiler environment, which would be used by the functions that can do specific
  2410. // // optimizations, i.e. compile time type checking, etc. This is a good first start
  2411. // // though.
  2412. // Environment env = null;
  2413. // try {
  2414. // env = Static.GenerateStandaloneEnvironment(false);
  2415. // } catch (IOException | DataSourceException | URISyntaxException
  2416. // | Profiles.InvalidProfileException e) {
  2417. // // Print the stacktrace and move on. Not sure how to deal with this right now, or
  2418. // // what cases it would occur in.
  2419. // e.printStackTrace(System.err);
  2420. // }
  2421. result = func.exec(tree.getData().getTarget(), env, constructs);
  2422. }
  2423. } else {
  2424. result = ((Optimizable) func).optimize(tree.getData().getTarget(), env, constructs);
  2425. }
  2426. //If the result is null, it was just a check, it can't optimize further.
  2427. if(result != null) {
  2428. Construct.SetWasIdentifierHelper(tree.getData(), result, false);
  2429. tree.setData(result);
  2430. tree.removeChildren();
  2431. }
  2432. } catch (ConfigRuntimeException e) {
  2433. //Turn this into a ConfigCompileException, then rethrow
  2434. throw new ConfigCompileException(e);
  2435. }
  2436. } catch (ConfigCompileException ex) {
  2437. compilerErrors.add(ex);
  2438. }
  2439. }
  2440. //It doesn't know how to optimize. Oh well.
  2441. }
  2442. private static boolean eliminateDeadCode(ParseTree tree, Environment env, Set<Class<? extends Environment.EnvironmentImpl>> envs) {
  2443. //Loop through the children, and if any of them are functions that are terminal, truncate.
  2444. //To explain this further, consider the following:
  2445. //For the code: concat(die(), msg('')), this diagram shows the abstract syntax tree:
  2446. // (concat)
  2447. // / \
  2448. // / \
  2449. // (die) (msg)
  2450. //By looking at the code, we can tell that msg() will never be called, because die() will run first,
  2451. //and since it is a "terminal" function, any code after it will NEVER run. However, consider a more complex condition:
  2452. // if(@input){ die() msg('1') } else { msg('2') msg('3') }
  2453. // if(@input)
  2454. // [true]/ \[false]
  2455. // / \
  2456. // (sconcat) (sconcat)
  2457. // / \ / \
  2458. // / \ / \
  2459. // (die) (msg[1])(msg[2]) (msg[3])
  2460. //In this case, only msg('1') is guaranteed not to run, msg('2') and msg('3') will still run in some cases.
  2461. //So, we can optimize out msg('1') in this case, which would cause the tree to become much simpler, therefore a worthwile optimization:
  2462. // if(@input)
  2463. // [true]/ \[false]
  2464. // / \
  2465. // (die) (sconcat)
  2466. // / \
  2467. // / \
  2468. // (msg[2]) (msg[3])
  2469. //We do have to be careful though, because of functions like if, which actually work like this:
  2470. //if(@var){ die() } else { msg('') }
  2471. // (if)
  2472. // / | \
  2473. // / | \
  2474. // @var (die) (msg)
  2475. //We can't git rid of the msg() here, because it is actually in another branch.
  2476. //For the time being, we will simply say that if a function uses execs, it
  2477. //is a branch (branches always use execs, though using execs doesn't strictly
  2478. //mean you are a branch type function).
  2479. if(tree.getData() instanceof CFunction && ((CFunction) tree.getData()).hasFunction()) {
  2480. Function f = ((CFunction) tree.getData()).getCachedFunction(envs);
  2481. if(f == null) {
  2482. return false;
  2483. }
  2484. List<ParseTree> children = tree.getChildren();
  2485. List<Boolean> branches;
  2486. if(f instanceof BranchStatement) {
  2487. branches = ((BranchStatement) f).isBranch(children);
  2488. if(branches.size() != children.size()) {
  2489. List<Integer> numArgs = Arrays.asList(f.numArgs());
  2490. if(!numArgs.contains(Integer.MAX_VALUE) && !numArgs.contains(children.size())) {
  2491. // Incorrect number of arguments passed to the function, not a branch implementation error.
  2492. return false;
  2493. }
  2494. throw new Error(f.getName() + " does not properly implement isBranch. It does not return a value"
  2495. + " with the same count as the actual children. Children: " + children.size() + ";"
  2496. + " Branches: " + branches.size() + ";"
  2497. + " Code target causing this: "
  2498. + tree.getTarget());
  2499. }
  2500. } else {
  2501. branches = new ArrayList<>(children.size());
  2502. for(ParseTree child : children) {
  2503. branches.add(false);
  2504. }
  2505. }
  2506. boolean doDeletion = false;
  2507. for(int m = 0; m < children.size(); m++) {
  2508. boolean isBranch = branches.get(m);
  2509. if(doDeletion) {
  2510. if(isBranch) {
  2511. doDeletion = false;
  2512. } else {
  2513. env.getEnv(CompilerEnvironment.class)
  2514. .addCompilerWarning(tree.getFileOptions(), new CompilerWarning("Unreachable code. Consider"
  2515. + " removing this code.", children.get(m).getTarget(),
  2516. FileOptions.SuppressWarning.UnreachableCode));
  2517. children.remove(m);
  2518. m--;
  2519. continue;
  2520. }
  2521. }
  2522. ParseTree child = children.get(m);
  2523. if(child.getData() instanceof CFunction) {
  2524. if(!((CFunction) child.getData()).hasFunction()) {
  2525. continue;
  2526. }
  2527. Function c = ((CFunction) child.getData()).getCachedFunction(envs);
  2528. if(c == null) {
  2529. continue;
  2530. }
  2531. Set<OptimizationOption> options = NO_OPTIMIZATIONS;
  2532. if(c instanceof Optimizable) {
  2533. options = ((Optimizable) c).optimizationOptions();
  2534. }
  2535. doDeletion = options.contains(OptimizationOption.TERMINAL);
  2536. boolean subDoDelete = eliminateDeadCode(child, env, envs);
  2537. if(subDoDelete) {
  2538. doDeletion = true;
  2539. }
  2540. }
  2541. if(isBranch) {
  2542. doDeletion = false;
  2543. }
  2544. }
  2545. return doDeletion;
  2546. }
  2547. return false;
  2548. }
  2549. /**
  2550. * Runs keyword processing on the tree. Note that this is run before optimization, and is a depth first process.
  2551. *
  2552. * @param tree
  2553. */
  2554. private static void processKeywords(ParseTree tree, Environment env, Set<ConfigCompileException> compileErrors) {
  2555. // Keyword processing
  2556. List<ParseTree> children = tree.getChildren();
  2557. for(int i = 0; i < children.size(); i++) {
  2558. ParseTree node = children.get(i);
  2559. // Keywords can be standalone, or a function can double as a keyword. So we have to check for both
  2560. // conditions.
  2561. processKeywords(node, env, compileErrors);
  2562. Mixed m = node.getData();
  2563. if(m instanceof CKeyword
  2564. || (m instanceof CLabel && ((CLabel) m).cVal() instanceof CKeyword)
  2565. || (m instanceof CFunction && KeywordList.getKeywordByName(m.val()) != null)) {
  2566. // This looks a bit confusing, but is fairly straightforward. We want to process the child elements of all
  2567. // remaining nodes, so that subchildren that need processing will be finished, and our current tree level will
  2568. // be able to independently process it. We don't want to process THIS level though, just the children of this level.
  2569. for(int j = i + 1; j < children.size(); j++) {
  2570. processKeywords(children.get(j), env, compileErrors);
  2571. }
  2572. // Now that all the children of the rest of the chain are processed, we can do the processing of this level.
  2573. try {
  2574. i = KeywordList.getKeywordByName(m.val()).process(children, i);
  2575. } catch (ConfigCompileException ex) {
  2576. // Keyword processing failed, but the keyword might be part of some other syntax where it's valid.
  2577. // Store the compile error so that it can be thrown after all if the keyword won't be handled.
  2578. env.getEnv(CompilerEnvironment.class).potentialKeywordCompileErrors.put(m.getTarget(), ex);
  2579. }
  2580. }
  2581. }
  2582. }
  2583. /**
  2584. * Generates compile errors for unhandled compiler constructs that should not be present in the final AST,
  2585. * such as {@link CKeyword}.
  2586. * This is purely validation and should be called on the final AST.
  2587. * @param tree - The final abstract syntax tree.
  2588. * @param env - The environment.
  2589. * @param compilerErrors - A set to put compile errors in.
  2590. * @deprecated This is handled in {@link StaticAnalysis} and will no longer be useful when static analysis is
  2591. * permanently enabled.
  2592. */
  2593. @Deprecated
  2594. private static void checkUnhandledCompilerConstructs(ParseTree tree,
  2595. Environment env, Set<ConfigCompileException> compilerErrors) {
  2596. for(ParseTree node : tree.getAllNodes()) {
  2597. Mixed m = node.getData();
  2598. // Create compile error for unexpected keywords.
  2599. if(m instanceof CKeyword) {
  2600. ConfigCompileException ex =
  2601. env.getEnv(CompilerEnvironment.class).potentialKeywordCompileErrors.get(m.getTarget());
  2602. compilerErrors.add(ex != null ? ex
  2603. : new ConfigCompileException("Unexpected keyword: " + m.val(), m.getTarget()));
  2604. }
  2605. }
  2606. }
  2607. /**
  2608. * Shorthand for lexing, compiling, and executing a script.
  2609. *
  2610. * @param script The textual script to execute
  2611. * @param file The file it was located in
  2612. * @param inPureMScript If it is pure MScript, or aliases
  2613. * @param env The execution environment
  2614. * @param envs The environments that will be available at runtime
  2615. * @param done The MethodScriptComplete callback (may be null)
  2616. * @param s A script object (may be null)
  2617. * @param vars Any $vars (may be null)
  2618. * @return
  2619. * @throws ConfigCompileException
  2620. * @throws com.laytonsmith.core.exceptions.ConfigCompileGroupException This indicates that a group of compile errors
  2621. * occurred.
  2622. */
  2623. public static Mixed execute(String script, File file, boolean inPureMScript, Environment env,
  2624. Set<Class<? extends Environment.EnvironmentImpl>> envs,
  2625. MethodScriptComplete done, Script s, List<Variable> vars)
  2626. throws ConfigCompileException, ConfigCompileGroupException {
  2627. return execute(compile(lex(script, env, file, inPureMScript), env, envs), env, done, s, vars);
  2628. }
  2629. /**
  2630. * Executes a pre-compiled MethodScript, given the specified Script environment. Both done and script may be null,
  2631. * and if so, reasonable defaults will be provided. The value sent to done will also be returned, as a Mixed, so
  2632. * this one function may be used synchronously also.
  2633. *
  2634. * @param root
  2635. * @param env
  2636. * @param done
  2637. * @param script
  2638. * @return
  2639. */
  2640. public static Mixed execute(ParseTree root, Environment env, MethodScriptComplete done, Script script) {
  2641. return execute(root, env, done, script, null);
  2642. }
  2643. /**
  2644. * Executes a pre-compiled MethodScript, given the specified Script environment, but also provides a method to set
  2645. * the constants in the script.
  2646. *
  2647. * @param root
  2648. * @param env
  2649. * @param done
  2650. * @param script
  2651. * @param vars
  2652. * @return
  2653. */
  2654. public static Mixed execute(ParseTree root, Environment env, MethodScriptComplete done, Script script, List<Variable> vars) {
  2655. if(root == null) {
  2656. return CVoid.VOID;
  2657. }
  2658. if(script == null) {
  2659. script = new Script(null, null, env.getEnv(GlobalEnv.class).GetLabel(), env.getEnvClasses(),
  2660. new FileOptions(new HashMap<>()), null);
  2661. }
  2662. if(vars != null) {
  2663. Map<String, Variable> varMap = new HashMap<>();
  2664. for(Variable v : vars) {
  2665. varMap.put(v.getVariableName(), v);
  2666. }
  2667. for(Mixed tempNode : root.getAllData()) {
  2668. if(tempNode instanceof Variable) {
  2669. Variable vv = varMap.get(((Variable) tempNode).getVariableName());
  2670. if(vv != null) {
  2671. ((Variable) tempNode).setVal(vv.getDefault());
  2672. } else {
  2673. //The variable is unset. I'm not quite sure what cases would cause this
  2674. ((Variable) tempNode).setVal("");
  2675. }
  2676. }
  2677. }
  2678. }
  2679. StringBuilder b = new StringBuilder();
  2680. Mixed returnable = null;
  2681. for(ParseTree gg : root.getChildren()) {
  2682. Mixed retc = script.eval(gg, env);
  2683. if(root.numberOfChildren() == 1) {
  2684. returnable = retc;
  2685. }
  2686. String ret = retc instanceof CNull ? "null" : retc.val();
  2687. if(ret != null && !ret.trim().isEmpty()) {
  2688. b.append(ret).append(" ");
  2689. }
  2690. }
  2691. if(done != null) {
  2692. done.done(b.toString().trim());
  2693. }
  2694. if(returnable != null) {
  2695. return returnable;
  2696. }
  2697. return Static.resolveConstruct(b.toString().trim(), Target.UNKNOWN);
  2698. }
  2699. public static void registerAutoIncludes(Environment env, Script s) {
  2700. for(File f : Static.getAliasCore().autoIncludes) {
  2701. try {
  2702. MethodScriptCompiler.execute(
  2703. IncludeCache.get(f, env, env.getEnvClasses(), new Target(0, f, 0)), env, null, s);
  2704. } catch (ProgramFlowManipulationException e) {
  2705. ConfigRuntimeException.HandleUncaughtException(ConfigRuntimeException.CreateUncatchableException(
  2706. "Cannot break program flow in auto include files.", e.getTarget()), env);
  2707. } catch (ConfigRuntimeException e) {
  2708. e.setEnv(env);
  2709. ConfigRuntimeException.HandleUncaughtException(e, env);
  2710. }
  2711. }
  2712. }
  2713. private static final List<Character> PDF_STACK = Arrays.asList(
  2714. '\u202A', // LRE
  2715. '\u202B', // RLE
  2716. '\u202D', // LRO
  2717. '\u202E' // RLO
  2718. );
  2719. private static final List<Character> PDI_STACK = Arrays.asList(
  2720. '\u2066', // LRI
  2721. '\u2067' // RLI
  2722. );
  2723. private static final char PDF = '\u202C';
  2724. private static final char PDI = '\u2069';
  2725. /**
  2726. * A bidirectional control character (bidi character) is a unicode character which is used in legitimate
  2727. * circumstances to force right to left languages such as Arabic and Hebrew to format correctly, when used
  2728. * within the same encoding stream. There are a number of these characters, but in particular, there are two
  2729. * which can be used to hide the functionality, because they cause the rendering of the text to appear one way,
  2730. * but the compiler will read the code differently. In particular, if we insert "right to left isolation override"
  2731. * in the middle of a comment, it will cause the rest of the line to be reversed.
  2732. * <p>
  2733. * Say we have the following code, which appears like this in a text editor, where RLI is the text direction override.
  2734. * <pre>
  2735. * /* This is a comment RLI &#42;/ if(!@admin) {exit()}
  2736. * codeOnlyForAdmins();
  2737. * </pre>
  2738. *
  2739. * The editor will render that code as shown above, but the compiler will view the code as if it were
  2740. * written as such:
  2741. * <pre>
  2742. * /* This is a comment if(!@admin) {exit()} &#42;/
  2743. * codeOnlyForAdmins();
  2744. * </pre>
  2745. *
  2746. * Thus bypassing the apparently correct check for admin access. The key here is that the RLI indicator is not
  2747. * visible in the editor, and so cannot be checked for through simple code review without compiler or editor
  2748. * support. Some editors may add this in the future, but here, we simply disallow the attack at the compiler
  2749. * level, making such code uncompilable whether or not it is visible in the editor.
  2750. * <p>
  2751. * It's also worth noting that the end of a line implicitely terminates unbalanced flow modifiers.
  2752. * <p>
  2753. * The solution then is to disallow unterminated flow modifiers from being used in comments and strings, where
  2754. * the effects can flow across string or comment end markers. This allows for right-to-left languages to be used
  2755. * within strings and comments anyways, but prevents them from being used maliciously. Thus, this function should
  2756. * be called against the full string of the completed token.
  2757. * <p>
  2758. * For full details, see
  2759. * <a href="https://trojansource.codes/trojan-source.pdf">https://trojansource.codes/trojan-source.pdf</a>
  2760. * @param s The string to check
  2761. * @param t The code target of the token
  2762. * @throws ConfigCompileException If an unexpected sequence is detected
  2763. */
  2764. private static void validateTerminatedBidiSequence(String s, Target t) throws ConfigCompileException {
  2765. int pdfStack = 0;
  2766. int pdiStack = 0;
  2767. for(Character c : s.toCharArray()) {
  2768. if(PDF_STACK.contains(c)) {
  2769. pdfStack++;
  2770. }
  2771. if(c == PDF) {
  2772. pdfStack--;
  2773. }
  2774. if(PDI_STACK.contains(c)) {
  2775. pdiStack++;
  2776. }
  2777. if(c == PDI) {
  2778. pdiStack--;
  2779. }
  2780. }
  2781. if(pdfStack != 0 || pdiStack != 0) {
  2782. throw new ConfigCompileException("Incorrectly formatted unicode sequence", t);
  2783. }
  2784. }
  2785. }