PageRenderTime 69ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 1ms

/src/main/java/com/laytonsmith/core/MethodScriptCompiler.java

https://github.com/sk89q/commandhelper
Java | 2934 lines | 2163 code | 120 blank | 651 comment | 569 complexity | 50b12a10ffc4b42fd9564cdad46af309 MD5 | raw file
Possible License(s): BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. package com.laytonsmith.core;
  2. import com.laytonsmith.PureUtilities.Common.FileUtil;
  3. import com.laytonsmith.PureUtilities.Common.StringUtils;
  4. import com.laytonsmith.PureUtilities.SmartComment;
  5. import com.laytonsmith.annotations.OperatorPreferred;
  6. import com.laytonsmith.annotations.breakable;
  7. import com.laytonsmith.annotations.nolinking;
  8. import com.laytonsmith.annotations.unbreakable;
  9. import com.laytonsmith.core.Optimizable.OptimizationOption;
  10. import com.laytonsmith.core.compiler.BranchStatement;
  11. import com.laytonsmith.core.compiler.CompilerEnvironment;
  12. import com.laytonsmith.core.compiler.CompilerWarning;
  13. import com.laytonsmith.core.compiler.FileOptions;
  14. import com.laytonsmith.core.compiler.KeywordList;
  15. import com.laytonsmith.core.compiler.TokenStream;
  16. import com.laytonsmith.core.compiler.analysis.StaticAnalysis;
  17. import com.laytonsmith.core.compiler.keywords.ObjectDefinitionKeyword;
  18. import com.laytonsmith.core.constructs.CBareString;
  19. import com.laytonsmith.core.constructs.CDecimal;
  20. import com.laytonsmith.core.constructs.CDouble;
  21. import com.laytonsmith.core.constructs.CFunction;
  22. import com.laytonsmith.core.constructs.CInt;
  23. import com.laytonsmith.core.constructs.CKeyword;
  24. import com.laytonsmith.core.constructs.CLabel;
  25. import com.laytonsmith.core.constructs.CNull;
  26. import com.laytonsmith.core.constructs.CPreIdentifier;
  27. import com.laytonsmith.core.constructs.CSlice;
  28. import com.laytonsmith.core.constructs.CString;
  29. import com.laytonsmith.core.constructs.CSymbol;
  30. import com.laytonsmith.core.constructs.CVoid;
  31. import com.laytonsmith.core.constructs.Construct;
  32. import com.laytonsmith.core.constructs.IVariable;
  33. import com.laytonsmith.core.constructs.Target;
  34. import com.laytonsmith.core.constructs.Token;
  35. import com.laytonsmith.core.constructs.Token.TType;
  36. import com.laytonsmith.core.constructs.Variable;
  37. import com.laytonsmith.core.environments.Environment;
  38. import com.laytonsmith.core.environments.GlobalEnv;
  39. import com.laytonsmith.core.exceptions.CRE.CRECastException;
  40. import com.laytonsmith.core.exceptions.CRE.CRERangeException;
  41. import com.laytonsmith.core.exceptions.ConfigCompileException;
  42. import com.laytonsmith.core.exceptions.ConfigCompileGroupException;
  43. import com.laytonsmith.core.exceptions.ConfigRuntimeException;
  44. import com.laytonsmith.core.exceptions.ProgramFlowManipulationException;
  45. import com.laytonsmith.core.extensions.ExtensionManager;
  46. import com.laytonsmith.core.extensions.ExtensionTracker;
  47. import com.laytonsmith.core.functions.Compiler;
  48. import com.laytonsmith.core.functions.Compiler.__autoconcat__;
  49. import com.laytonsmith.core.functions.Compiler.__cbrace__;
  50. import com.laytonsmith.core.functions.Compiler.p;
  51. import com.laytonsmith.core.functions.Compiler.__smart_string__;
  52. import com.laytonsmith.core.functions.Math.neg;
  53. import com.laytonsmith.core.functions.ControlFlow;
  54. import com.laytonsmith.core.functions.DataHandling;
  55. import com.laytonsmith.core.functions.Function;
  56. import com.laytonsmith.core.functions.FunctionBase;
  57. import com.laytonsmith.core.functions.FunctionList;
  58. import com.laytonsmith.core.functions.IncludeCache;
  59. import com.laytonsmith.core.functions.ArrayHandling.array_get;
  60. import com.laytonsmith.core.natives.interfaces.Mixed;
  61. import com.laytonsmith.persistence.DataSourceException;
  62. import java.io.File;
  63. import java.io.IOException;
  64. import java.net.URISyntaxException;
  65. import java.util.ArrayList;
  66. import java.util.Arrays;
  67. import java.util.Collection;
  68. import java.util.Collections;
  69. import java.util.EmptyStackException;
  70. import java.util.EnumSet;
  71. import java.util.HashMap;
  72. import java.util.HashSet;
  73. import java.util.Iterator;
  74. import java.util.LinkedList;
  75. import java.util.List;
  76. import java.util.ListIterator;
  77. import java.util.Map;
  78. import java.util.NoSuchElementException;
  79. import java.util.Objects;
  80. import java.util.Set;
  81. import java.util.Stack;
  82. import java.util.concurrent.atomic.AtomicInteger;
  83. import java.util.regex.Pattern;
  84. /**
  85. * The MethodScriptCompiler class handles the various stages of compilation and provides helper methods for execution of
  86. * the compiled trees.
  87. */
  88. public final class MethodScriptCompiler {
  89. private static final EnumSet<Optimizable.OptimizationOption> NO_OPTIMIZATIONS = EnumSet.noneOf(Optimizable.OptimizationOption.class);
  90. private MethodScriptCompiler() {
  91. }
  92. private static final Pattern VAR_PATTERN = Pattern.compile("\\$[\\p{L}0-9_]+");
  93. private static final Pattern IVAR_PATTERN = Pattern.compile(IVariable.VARIABLE_NAME_REGEX);
  94. /**
  95. * Lexes the script, and turns it into a token stream. This looks through the script character by character.
  96. *
  97. * @param script The script to lex
  98. * @param file The file this script came from, or potentially null if the code is from a dynamic source
  99. * @param inPureMScript If the script is in pure MethodScript, this should be true. Pure MethodScript is defined as
  100. * code that doesn't have command alias wrappers.
  101. * @return A stream of tokens
  102. * @throws ConfigCompileException If compilation fails due to bad syntax
  103. */
  104. public static TokenStream lex(String script, Environment env, File file, boolean inPureMScript)
  105. throws ConfigCompileException {
  106. return lex(script, env, file, inPureMScript, false);
  107. }
  108. /**
  109. * Lexes the script, and turns it into a token stream. This looks through the script character by character.
  110. *
  111. * @param script The script to lex
  112. * @param env
  113. * @param file The file this script came from, or potentially null if the code is from a dynamic source
  114. * @param inPureMScript If the script is in pure MethodScript, this should be true. Pure MethodScript is defined as
  115. * code that doesn't have command alias wrappers.
  116. * @param saveAllTokens If this script is planning to be compiled, then this value should always be false, however,
  117. * if the calling code needs all tokens for informational purposes (and doesn't plan on actually compiling the code)
  118. * then this can be true. If true, all tokens are saved, including comments and (some) whitespace. Given this lexing
  119. * stream, the exact source code could be re-constructed.
  120. *
  121. * A note on whitespace: The whitespace tokens are not guaranteed to be accurate, however, the column information
  122. * is. If you have two tokens t1 and t2, each with a value of length 1, where the columns are 1 and 5, then that
  123. * means there are 4 spaces between the two.
  124. * @return A stream of tokens
  125. * @throws ConfigCompileException If compilation fails due to bad syntax
  126. */
  127. public static TokenStream lex(String script, Environment env, File file,
  128. boolean inPureMScript, boolean saveAllTokens) throws ConfigCompileException {
  129. if(env == null) {
  130. // We MUST have a CompilerEnvironment, but it doesn't need to be used, but we have to create it at this
  131. // stage.
  132. env = Environment.createEnvironment(new CompilerEnvironment());
  133. }
  134. if(!env.hasEnv(CompilerEnvironment.class)) {
  135. env = env.cloneAndAdd(new CompilerEnvironment());
  136. }
  137. if(script.isEmpty()) {
  138. return new TokenStream(new LinkedList<>(), "", new HashMap<>());
  139. }
  140. if((int) script.charAt(0) == 65279) {
  141. // Remove the UTF-8 Byte Order Mark, if present.
  142. script = script.substring(1);
  143. }
  144. final StringBuilder fileOptions = new StringBuilder();
  145. script = script.replaceAll("\r\n", "\n");
  146. script = script + "\n";
  147. final Set<String> keywords = KeywordList.getKeywordNames();
  148. final TokenStream tokenList = new TokenStream();
  149. // Set our state variables.
  150. boolean stateInQuote = false;
  151. int quoteLineNumberStart = 1;
  152. boolean inSmartQuote = false;
  153. int smartQuoteLineNumberStart = 1;
  154. boolean inComment = false;
  155. int commentLineNumberStart = 1;
  156. boolean commentIsBlock = false;
  157. boolean inOptVar = false;
  158. boolean inCommand = (!inPureMScript);
  159. boolean inMultiline = false;
  160. boolean inSmartComment = false;
  161. boolean inFileOptions = false;
  162. boolean inAnnotation = false;
  163. int fileOptionsLineNumberStart = 1;
  164. StringBuilder buf = new StringBuilder();
  165. int lineNum = 1;
  166. int column = 1;
  167. int lastColumn = 0;
  168. Target target = Target.UNKNOWN;
  169. // Lex the script character by character.
  170. for(int i = 0; i < script.length(); i++) {
  171. Character c = script.charAt(i);
  172. Character c2 = null;
  173. if(i < script.length() - 1) {
  174. c2 = script.charAt(i + 1);
  175. }
  176. column += i - lastColumn;
  177. lastColumn = i;
  178. if(c == '\n') {
  179. lineNum++;
  180. column = 1;
  181. if(!inMultiline && !inPureMScript) {
  182. inCommand = true;
  183. }
  184. }
  185. if(buf.length() == 0) {
  186. target = new Target(lineNum, file, column);
  187. }
  188. // If we are in file options, add the character to the buffer if it's not a file options end character.
  189. if(inFileOptions) {
  190. // For a '>' character outside of a comment, '\>' would have to be used in file options.
  191. // Other characters than '>'cannot be escaped.
  192. // If support for more escaped characters would be desired in the future, it could be added here.
  193. switch(c) {
  194. case '\\': {
  195. if(c2 == '>') { // "\>".
  196. fileOptions.append('>');
  197. i++;
  198. continue;
  199. }
  200. break;
  201. }
  202. case '>': {
  203. if(saveAllTokens) {
  204. tokenList.add(new Token(TType.FILE_OPTIONS_STRING,
  205. fileOptions.toString(), target));
  206. tokenList.add(new Token(TType.FILE_OPTIONS_END, ">", target));
  207. }
  208. inFileOptions = false;
  209. continue;
  210. }
  211. }
  212. fileOptions.append(c);
  213. continue;
  214. }
  215. // Comment handling. This is bypassed if we are in a string.
  216. if(!stateInQuote && !inSmartQuote) {
  217. switch(c) {
  218. // Block comments start (/* and /**) and Double slash line comment start (//).
  219. case '/': {
  220. if(!inComment) {
  221. if(c2 == '*') { // "/*" or "/**".
  222. buf.append("/*");
  223. inComment = true;
  224. commentIsBlock = true;
  225. if(i < script.length() - 2 && script.charAt(i + 2) == '*') { // "/**".
  226. inSmartComment = true;
  227. buf.append("*");
  228. i++;
  229. }
  230. commentLineNumberStart = lineNum;
  231. i++;
  232. continue;
  233. } else if(c2 == '/') { // "//".
  234. buf.append("//");
  235. inComment = true;
  236. i++;
  237. continue;
  238. }
  239. }
  240. break;
  241. }
  242. // Line comment start (#).
  243. case '#': {
  244. if(!inComment) { // "#".
  245. buf.append("#");
  246. inComment = true;
  247. continue;
  248. }
  249. break;
  250. }
  251. // Block comment end (*/).
  252. case '*': {
  253. if(inComment && commentIsBlock && c2 == '/') { // "*/".
  254. if(saveAllTokens || inSmartComment) {
  255. buf.append("*/");
  256. validateTerminatedBidiSequence(buf.toString(), target);
  257. tokenList.add(new Token(inSmartComment ? TType.SMART_COMMENT : TType.COMMENT,
  258. buf.toString(), target));
  259. }
  260. buf = new StringBuilder();
  261. target = new Target(lineNum, file, column);
  262. inComment = false;
  263. commentIsBlock = false;
  264. inSmartComment = false;
  265. i++;
  266. continue;
  267. }
  268. break;
  269. }
  270. // Line comment end (\n).
  271. case '\n': {
  272. if(inComment && !commentIsBlock) { // "\n".
  273. inComment = false;
  274. if(saveAllTokens) {
  275. validateTerminatedBidiSequence(buf.toString(), target);
  276. tokenList.add(new Token(TType.COMMENT, buf.toString(), target));
  277. tokenList.add(new Token(TType.NEWLINE, "\n", new Target(lineNum + 1, file, 0)));
  278. }
  279. buf = new StringBuilder();
  280. target = new Target(lineNum, file, column);
  281. continue;
  282. }
  283. break;
  284. }
  285. }
  286. }
  287. // If we are in a comment, add the character to the buffer.
  288. if(inComment || (inAnnotation && c != '}')) {
  289. buf.append(c);
  290. continue;
  291. }
  292. // Handle non-comment non-quoted characters.
  293. if(!stateInQuote) {
  294. // We're not in a comment or quoted string, handle: +=, -=, *=, /=, .=, ->, ++, --, %, **, *, +, -, /,
  295. // >=, <=, <<<, >>>, <, >, ===, !==, ==, !=, &&&, |||, &&, ||, !, {, }, .., ., ::, [, =, ], :, comma,
  296. // (, ), ;, and whitespace.
  297. matched:
  298. {
  299. Token token;
  300. switch(c) {
  301. case '+': {
  302. if(c2 == '=') { // "+=".
  303. token = new Token(TType.PLUS_ASSIGNMENT, "+=", target);
  304. i++;
  305. } else if(c2 == '+') { // "++".
  306. token = new Token(TType.INCREMENT, "++", target);
  307. i++;
  308. } else { // "+".
  309. token = new Token(TType.PLUS, "+", target);
  310. }
  311. break;
  312. }
  313. case '-': {
  314. if(c2 == '=') { // "-=".
  315. token = new Token(TType.MINUS_ASSIGNMENT, "-=", target);
  316. i++;
  317. } else if(c2 == '-') { // "--".
  318. token = new Token(TType.DECREMENT, "--", target);
  319. i++;
  320. } else if(c2 == '>') { // "->".
  321. token = new Token(TType.DEREFERENCE, "->", target);
  322. i++;
  323. } else { // "-".
  324. token = new Token(TType.MINUS, "-", target);
  325. }
  326. break;
  327. }
  328. case '*': {
  329. if(c2 == '=') { // "*=".
  330. token = new Token(TType.MULTIPLICATION_ASSIGNMENT, "*=", target);
  331. i++;
  332. } else if(c2 == '*') { // "**".
  333. token = new Token(TType.EXPONENTIAL, "**", target);
  334. i++;
  335. } else { // "*".
  336. token = new Token(TType.MULTIPLICATION, "*", target);
  337. }
  338. break;
  339. }
  340. case '/': {
  341. if(c2 == '=') { // "/=".
  342. token = new Token(TType.DIVISION_ASSIGNMENT, "/=", target);
  343. i++;
  344. } else { // "/".
  345. // Protect against matching commands.
  346. if(Character.isLetter(c2)) {
  347. break matched; // Pretend that division didn't match.
  348. }
  349. token = new Token(TType.DIVISION, "/", target);
  350. }
  351. break;
  352. }
  353. case '.': {
  354. if(c2 == '=') { // ".=".
  355. token = new Token(TType.CONCAT_ASSIGNMENT, ".=", target);
  356. i++;
  357. } else if(c2 == '.') { // "..".
  358. token = new Token(TType.SLICE, "..", target);
  359. i++;
  360. } else { // ".".
  361. token = new Token(TType.DOT, ".", target);
  362. }
  363. break;
  364. }
  365. case '%': {
  366. token = new Token(TType.MODULO, "%", target);
  367. break;
  368. }
  369. case '>': {
  370. if(c2 == '=') { // ">=".
  371. token = new Token(TType.GTE, ">=", target);
  372. i++;
  373. } else if(c2 == '>' && i < script.length() - 2 && script.charAt(i + 2) == '>') { // ">>>".
  374. token = new Token(TType.MULTILINE_START, ">>>", target);
  375. inMultiline = true;
  376. i += 2;
  377. } else { // ">".
  378. token = new Token(TType.GT, ">", target);
  379. }
  380. break;
  381. }
  382. case '<': {
  383. if(c2 == '!') { // "<!".
  384. if(buf.length() > 0) {
  385. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  386. buf = new StringBuilder();
  387. target = new Target(lineNum, file, column);
  388. }
  389. if(saveAllTokens) {
  390. tokenList.add(new Token(TType.FILE_OPTIONS_START, "<!", target));
  391. }
  392. inFileOptions = true;
  393. fileOptionsLineNumberStart = lineNum;
  394. i++;
  395. continue;
  396. } else if(c2 == '=') { // "<=".
  397. token = new Token(TType.LTE, "<=", target);
  398. i++;
  399. } else if(c2 == '<' && i < script.length() - 2 && script.charAt(i + 2) == '<') { // "<<<".
  400. token = new Token(TType.MULTILINE_END, "<<<", target);
  401. inMultiline = false;
  402. i += 2;
  403. } else { // "<".
  404. token = new Token(TType.LT, "<", target);
  405. }
  406. break;
  407. }
  408. case '=': {
  409. if(c2 == '=') {
  410. if(i < script.length() - 2 && script.charAt(i + 2) == '=') { // "===".
  411. token = new Token(TType.STRICT_EQUALS, "===", target);
  412. i += 2;
  413. } else { // "==".
  414. token = new Token(TType.EQUALS, "==", target);
  415. i++;
  416. }
  417. } else { // "=".
  418. if(inCommand) {
  419. if(inOptVar) {
  420. token = new Token(TType.OPT_VAR_ASSIGN, "=", target);
  421. } else {
  422. token = new Token(TType.ALIAS_END, "=", target);
  423. inCommand = false;
  424. }
  425. } else {
  426. token = new Token(TType.ASSIGNMENT, "=", target);
  427. }
  428. }
  429. break;
  430. }
  431. case '!': {
  432. if(c2 == '=') {
  433. if(i < script.length() - 2 && script.charAt(i + 2) == '=') { // "!==".
  434. token = new Token(TType.STRICT_NOT_EQUALS, "!==", target);
  435. i += 2;
  436. } else { // "!=".
  437. token = new Token(TType.NOT_EQUALS, "!=", target);
  438. i++;
  439. }
  440. } else { // "!".
  441. token = new Token(TType.LOGICAL_NOT, "!", target);
  442. }
  443. break;
  444. }
  445. case '&': {
  446. if(c2 == '&') {
  447. if(i < script.length() - 2 && script.charAt(i + 2) == '&') { // "&&&".
  448. token = new Token(TType.DEFAULT_AND, "&&&", target);
  449. i += 2;
  450. } else { // "&&".
  451. token = new Token(TType.LOGICAL_AND, "&&", target);
  452. i++;
  453. }
  454. } else { // "&".
  455. // Bitwise symbols are not used yet.
  456. break matched; // Pretend that bitwise AND didn't match.
  457. // token = new Token(TType.BIT_AND, "&", target);
  458. }
  459. break;
  460. }
  461. case '|': {
  462. if(c2 == '|') {
  463. if(i < script.length() - 2 && script.charAt(i + 2) == '|') { // "|||".
  464. token = new Token(TType.DEFAULT_OR, "|||", target);
  465. i += 2;
  466. } else { // "||".
  467. token = new Token(TType.LOGICAL_OR, "||", target);
  468. i++;
  469. }
  470. } else { // "|".
  471. // Bitwise symbols are not used yet.
  472. break matched; // Pretend that bitwise OR didn't match.
  473. // token = new Token(TType.BIT_OR, "|", target);
  474. }
  475. break;
  476. }
  477. // Bitwise symbols are not used yet.
  478. // case '^': {
  479. // token = new Token(TType.BIT_XOR, "^", target);
  480. // break;
  481. // }
  482. case ':': {
  483. if(c2 == ':') { // "::".
  484. token = new Token(TType.DEREFERENCE, "::", target);
  485. i++;
  486. } else { // ":".
  487. token = new Token(TType.LABEL, ":", target);
  488. }
  489. break;
  490. }
  491. case '{': {
  492. token = new Token(TType.LCURLY_BRACKET, "{", target);
  493. break;
  494. }
  495. case '}': {
  496. if(inAnnotation) {
  497. // Eventually, this will no longer be a comment type, but for now, we just want
  498. // to totally ignore annotations, as if they were comments.
  499. inAnnotation = false;
  500. token = new Token(/*TType.ANNOTATION*/TType.COMMENT, "@{" + buf.toString() + "}", target);
  501. buf = new StringBuilder();
  502. break;
  503. }
  504. token = new Token(TType.RCURLY_BRACKET, "}", target);
  505. break;
  506. }
  507. case '[': {
  508. token = new Token(TType.LSQUARE_BRACKET, "[", target);
  509. inOptVar = true;
  510. break;
  511. }
  512. case ']': {
  513. token = new Token(TType.RSQUARE_BRACKET, "]", target);
  514. inOptVar = false;
  515. break;
  516. }
  517. case ',': {
  518. token = new Token(TType.COMMA, ",", target);
  519. break;
  520. }
  521. case ';': {
  522. token = new Token(TType.SEMICOLON, ";", target);
  523. break;
  524. }
  525. case '(': {
  526. token = new Token(TType.FUNC_START, "(", target);
  527. // Handle the buffer or previous token, with the knowledge that a FUNC_START follows.
  528. if(buf.length() > 0) {
  529. if(saveAllTokens) {
  530. // In this case, we need to check for keywords first, because we want to go ahead
  531. // and convert into that stage. In the future, we might want to do this
  532. // unconditionally, but for now, just go ahead and only do it if saveAllTokens is
  533. // true, because we know that won't be used by the compiler.
  534. if(KeywordList.getKeywordByName(buf.toString()) != null) {
  535. // It's a keyword.
  536. tokenList.add(new Token(TType.KEYWORD, buf.toString(), target));
  537. } else {
  538. // It's not a keyword, but a normal function.
  539. tokenList.add(new Token(TType.FUNC_NAME, buf.toString(), target));
  540. }
  541. } else {
  542. tokenList.add(new Token(TType.FUNC_NAME, buf.toString(), target));
  543. }
  544. buf = new StringBuilder();
  545. target = new Target(lineNum, file, column);
  546. } else {
  547. // The previous token, if unknown, should be changed to a FUNC_NAME. If it's not
  548. // unknown, we may be doing standalone parenthesis, so auto tack on the __autoconcat__
  549. // function.
  550. try {
  551. int count = 0;
  552. Iterator<Token> it = tokenList.descendingIterator();
  553. Token t;
  554. while((t = it.next()).type == TType.WHITESPACE) {
  555. count++;
  556. }
  557. if(t.type == TType.UNKNOWN) {
  558. t.type = TType.FUNC_NAME;
  559. // Go ahead and remove the whitespace here too, they break things.
  560. count--;
  561. for(int a = 0; a < count; a++) {
  562. tokenList.removeLast();
  563. }
  564. } else {
  565. tokenList.add(new Token(TType.FUNC_NAME, "__autoconcat__", target));
  566. }
  567. } catch (NoSuchElementException e) {
  568. // This is the first element on the list, so, it's another autoconcat.
  569. tokenList.add(new Token(TType.FUNC_NAME, "__autoconcat__", target));
  570. }
  571. }
  572. break;
  573. }
  574. case ')': {
  575. token = new Token(TType.FUNC_END, ")", target);
  576. break;
  577. }
  578. case ' ': { // Whitespace case #1.
  579. token = new Token(TType.WHITESPACE, " ", target);
  580. break;
  581. }
  582. case '\t': { // Whitespace case #2 (TAB).
  583. token = new Token(TType.WHITESPACE, "\t", target);
  584. break;
  585. }
  586. case '@': {
  587. if(c2 == '{') {
  588. inAnnotation = true;
  589. i++;
  590. continue;
  591. }
  592. break matched;
  593. }
  594. default: {
  595. // No match was found at this point, so continue matching below.
  596. break matched;
  597. }
  598. }
  599. // Add previous characters as UNKNOWN token.
  600. if(buf.length() > 0) {
  601. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  602. buf = new StringBuilder();
  603. target = new Target(lineNum, file, column);
  604. }
  605. // Add the new token to the token list.
  606. tokenList.add(token);
  607. // Continue lexing.
  608. continue;
  609. }
  610. }
  611. // Handle non-comment characters that might start or stop a quoted string.
  612. switch(c) {
  613. case '\'': {
  614. if(stateInQuote && !inSmartQuote) {
  615. validateTerminatedBidiSequence(buf.toString(), target);
  616. tokenList.add(new Token(TType.STRING, buf.toString(), target));
  617. buf = new StringBuilder();
  618. target = new Target(lineNum, file, column);
  619. stateInQuote = false;
  620. continue;
  621. } else if(!stateInQuote) {
  622. stateInQuote = true;
  623. quoteLineNumberStart = lineNum;
  624. inSmartQuote = false;
  625. if(buf.length() > 0) {
  626. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  627. buf = new StringBuilder();
  628. target = new Target(lineNum, file, column);
  629. }
  630. continue;
  631. } else {
  632. // We're in a smart quote.
  633. buf.append("'");
  634. }
  635. break;
  636. }
  637. case '"': {
  638. if(stateInQuote && inSmartQuote) {
  639. validateTerminatedBidiSequence(buf.toString(), target);
  640. tokenList.add(new Token(TType.SMART_STRING, buf.toString(), target));
  641. buf = new StringBuilder();
  642. target = new Target(lineNum, file, column);
  643. stateInQuote = false;
  644. inSmartQuote = false;
  645. continue;
  646. } else if(!stateInQuote) {
  647. stateInQuote = true;
  648. inSmartQuote = true;
  649. smartQuoteLineNumberStart = lineNum;
  650. if(buf.length() > 0) {
  651. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  652. buf = new StringBuilder();
  653. target = new Target(lineNum, file, column);
  654. }
  655. continue;
  656. } else {
  657. // We're in normal quotes.
  658. buf.append('"');
  659. }
  660. break;
  661. }
  662. case '\n': {
  663. // Append a newline to the buffer if it's quoted.
  664. if(stateInQuote) {
  665. buf.append(c);
  666. } else {
  667. // Newline is not quoted. Move the buffer to an UNKNOWN token and add a NEWLINE token.
  668. if(buf.length() > 0) {
  669. tokenList.add(new Token(TType.UNKNOWN, buf.toString(), target));
  670. buf = new StringBuilder();
  671. target = new Target(lineNum, file, column);
  672. }
  673. tokenList.add(new Token(TType.NEWLINE, "\n", target));
  674. }
  675. continue;
  676. }
  677. case '\\': {
  678. // Handle escaped characters in quotes or a single "\" seperator token otherwise.
  679. // Handle backslash character outside of quotes.
  680. if(!stateInQuote) {
  681. tokenList.add(new Token(TType.SEPERATOR, "\\", target));
  682. break;
  683. }
  684. // Handle an escape sign in a quote.
  685. switch(c2) {
  686. case '\\':
  687. if(inSmartQuote) {
  688. // Escaping of '@' and '\' is handled within __smart_string__.
  689. buf.append('\\');
  690. }
  691. buf.append('\\');
  692. break;
  693. case '\'':
  694. case '"':
  695. buf.append(c2);
  696. break;
  697. case 'n':
  698. buf.append('\n');
  699. break;
  700. case 'r':
  701. buf.append('\r');
  702. break;
  703. case 't':
  704. buf.append('\t');
  705. break;
  706. case '0':
  707. buf.append('\0');
  708. break;
  709. case 'f':
  710. buf.append('\f');
  711. break; // Form feed.
  712. case 'v':
  713. buf.append('\u000B');
  714. break; // Vertical TAB.
  715. case 'a':
  716. buf.append('\u0007');
  717. break; // Alarm.
  718. case 'b':
  719. buf.append('\u0008');
  720. break; // Backspace.
  721. case 'u': { // Unicode (4 characters).
  722. // Grab the next 4 characters, and check to see if they are numbers.
  723. if(i + 5 >= script.length()) {
  724. throw new ConfigCompileException("Unrecognized unicode escape sequence", target);
  725. }
  726. String unicode = script.substring(i + 2, i + 6);
  727. int unicodeNum;
  728. try {
  729. unicodeNum = Integer.parseInt(unicode, 16);
  730. } catch (NumberFormatException e) {
  731. throw new ConfigCompileException(
  732. "Unrecognized unicode escape sequence: \\u" + unicode, target);
  733. }
  734. buf.append(Character.toChars(unicodeNum));
  735. i += 4;
  736. break;
  737. }
  738. case 'U': { // Unicode (8 characters).
  739. // Grab the next 8 characters and check to see if they are numbers.
  740. if(i + 9 >= script.length()) {
  741. throw new ConfigCompileException("Unrecognized unicode escape sequence", target);
  742. }
  743. String unicode = script.substring(i + 2, i + 10);
  744. int unicodeNum;
  745. try {
  746. unicodeNum = Integer.parseInt(unicode, 16);
  747. } catch (NumberFormatException e) {
  748. throw new ConfigCompileException(
  749. "Unrecognized unicode escape sequence: \\u" + unicode, target);
  750. }
  751. buf.append(Character.toChars(unicodeNum));
  752. i += 8;
  753. break;
  754. }
  755. case '@': {
  756. if(!inSmartQuote) {
  757. throw new ConfigCompileException("The escape sequence \\@ is not"
  758. + " a recognized escape sequence in a non-smart string", target);
  759. }
  760. buf.append("\\@");
  761. break;
  762. }
  763. default: {
  764. // Since we might expand this list later, don't let them use unescaped backslashes.
  765. throw new ConfigCompileException(
  766. "The escape sequence \\" + c2 + " is not a recognized escape sequence", target);
  767. }
  768. }
  769. i++;
  770. continue;
  771. }
  772. default: {
  773. // At this point, only non-comment and non-escaped characters that are not part of a
  774. // quote start/end are left.
  775. // Disallow Non-Breaking Space Characters.
  776. if(!stateInQuote && c == '\u00A0'/*nbsp*/) {
  777. throw new ConfigCompileException("NBSP character in script", target);
  778. }
  779. // Add the characters that didn't match anything to the buffer.
  780. buf.append(c);
  781. continue;
  782. }
  783. }
  784. } // End of lexing.
  785. // Handle unended file options.
  786. if(inFileOptions) {
  787. throw new ConfigCompileException("Unended file options. You started the the file options on line "
  788. + fileOptionsLineNumberStart, target);
  789. }
  790. // Handle unended string literals.
  791. if(stateInQuote) {
  792. if(inSmartQuote) {
  793. throw new ConfigCompileException("Unended string literal. You started the last double quote on line "
  794. + smartQuoteLineNumberStart, target);
  795. } else {
  796. throw new ConfigCompileException("Unended string literal. You started the last single quote on line "
  797. + quoteLineNumberStart, target);
  798. }
  799. }
  800. // Handle unended comment blocks. Since a newline is added to the end of the script, line comments are ended.
  801. if(inComment || commentIsBlock) {
  802. throw new ConfigCompileException("Unended block comment. You started the comment on line "
  803. + commentLineNumberStart, target);
  804. }
  805. // Look at the tokens and get meaning from them. Also, look for improper symbol locations
  806. // and go ahead and absorb unary +- into the token.
  807. ListIterator<Token> it = tokenList.listIterator(0);
  808. while(it.hasNext()) {
  809. Token t = it.next();
  810. // Combine whitespace tokens into one.
  811. if(t.type == TType.WHITESPACE && it.hasNext()) {
  812. Token next;
  813. if((next = it.next()).type == TType.WHITESPACE) {
  814. t.value += next.val();
  815. it.remove(); // Remove 'next'.
  816. } else {
  817. it.previous(); // Select 'next' <--.
  818. }
  819. it.previous(); // Select 't' <--.
  820. it.next(); // Select 't' -->.
  821. }
  822. // Convert "-" + number to -number if allowed.
  823. it.previous(); // Select 't' <--.
  824. if(it.hasPrevious() && t.type == TType.UNKNOWN) {
  825. Token prev1 = it.previous(); // Select 'prev1' <--.
  826. if(prev1.type.isPlusMinus()) {
  827. // Find the first non-whitespace token before the '-'.
  828. Token prevNonWhitespace = null;
  829. while(it.hasPrevious()) {
  830. if(it.previous().type != TType.WHITESPACE) {
  831. prevNonWhitespace = it.next();
  832. break;
  833. }
  834. }
  835. while(it.next() != prev1) { // Skip until selection is at 'prev1 -->'.
  836. }
  837. if(prevNonWhitespace != null) {
  838. // Convert "±UNKNOWN" if the '±' is used as a sign (and not an add/subtract operation).
  839. if(!prevNonWhitespace.type.isIdentifier() // Don't convert "number/string/var ± ...".
  840. && prevNonWhitespace.type != TType.FUNC_END // Don't convert "func() ± ...".
  841. && prevNonWhitespace.type != TType.RSQUARE_BRACKET // Don't convert "] ± ..." (arrays).
  842. && !IVAR_PATTERN.matcher(t.val()).matches() // Don't convert "± @var".
  843. && !VAR_PATTERN.matcher(t.val()).matches()) { // Don't convert "± $var".
  844. // It is a negative/positive number: Absorb the sign.
  845. t.value = prev1.value + t.value;
  846. it.remove(); // Remove 'prev1'.
  847. }
  848. }
  849. } else {
  850. it.next(); // Select 'prev1' -->.
  851. }
  852. }
  853. it.next(); // Select 't' -->.
  854. // Assign a type to all UNKNOWN tokens.
  855. if(t.type == TType.UNKNOWN) {
  856. if(t.val().charAt(0) == '/' && t.val().length() > 1) {
  857. t.type = TType.COMMAND;
  858. } else if(t.val().equals("$")) {
  859. t.type = TType.FINAL_VAR;
  860. } else if(VAR_PATTERN.matcher(t.val()).matches()) {
  861. t.type = TType.VARIABLE;
  862. } else if(IVAR_PATTERN.matcher(t.val()).matches()) {
  863. t.type = TType.IVARIABLE;
  864. } else if(t.val().charAt(0) == '@') {
  865. throw new ConfigCompileException("IVariables must match the regex: " + IVAR_PATTERN, t.getTarget());
  866. } else if(keywords.contains(t.val())) {
  867. t.type = TType.KEYWORD;
  868. } else if(t.val().matches("[\t ]*")) {
  869. t.type = TType.WHITESPACE;
  870. } else {
  871. t.type = TType.LIT;
  872. }
  873. }
  874. // Skip this check if we're not in pure mscript.
  875. if(inPureMScript) {
  876. if(it.hasNext()) {
  877. Token next = it.next(); // Select 'next' -->.
  878. it.previous(); // Select 'next' <--.
  879. it.previous(); // Select 't' <--.
  880. if(t.type.isSymbol() && !t.type.isUnary() && !next.type.isUnary()) {
  881. if(it.hasPrevious()) {
  882. Token prev1 = it.previous(); // Select 'prev1' <--.
  883. if(prev1.type.equals(TType.FUNC_START) || prev1.type.equals(TType.COMMA)
  884. || next.type.equals(TType.FUNC_END) || next.type.equals(TType.COMMA)
  885. || prev1.type.isSymbol() || next.type.isSymbol()) {
  886. throw new ConfigCompileException("Unexpected symbol (" + t.val() + ")", t.getTarget());
  887. }
  888. it.next(); // Select 'prev1' -->.
  889. }
  890. }
  891. it.next(); // Select 't' -->.
  892. }
  893. }
  894. }
  895. // Set file options
  896. {
  897. Map<String, String> defaults = new HashMap<>();
  898. List<File> dirs = new ArrayList<>();
  899. if(file != null) {
  900. File f = file.getParentFile();
  901. while(true) {
  902. if(f == null) {
  903. break;
  904. }
  905. File fileOptionDefaults = new File(f, ".msfileoptions");
  906. if(fileOptionDefaults.exists()) {
  907. dirs.add(fileOptionDefaults);
  908. }
  909. f = f.getParentFile();
  910. }
  911. }
  912. Collections.reverse(dirs);
  913. for(File d : dirs) {
  914. try {
  915. defaults.putAll(TokenStream.parseFileOptions(FileUtil.read(d), defaults).getRawOptions());
  916. } catch (IOException ex) {
  917. throw new ConfigCompileException("Cannot read " + d.getAbsolutePath(), Target.UNKNOWN, ex);
  918. }
  919. }
  920. tokenList.setFileOptions(fileOptions.toString(), defaults);
  921. }
  922. // Make sure that the file options are the first non-comment code in the file
  923. {
  924. boolean foundCode = false;
  925. for(Token t : tokenList) {
  926. if(t.type.isFileOption()) {
  927. if(foundCode) {
  928. throw new ConfigCompileException("File options must be the first non-comment section in the"
  929. + " code", t.target);
  930. }
  931. break;
  932. }
  933. if(!t.type.isComment() && !t.type.isWhitespace()) {
  934. foundCode = true;
  935. }
  936. }
  937. }
  938. {
  939. // Filename check
  940. String fileName = tokenList.getFileOptions().getName();
  941. if(!fileName.isEmpty()) {
  942. if(!file.getAbsolutePath().replace("\\", "/").endsWith(fileName.replace("\\", "/"))) {
  943. CompilerWarning warning = new CompilerWarning(file + " has the wrong file name in the file options ("
  944. + fileName + ")", new Target(0, file, 0), null);
  945. env.getEnv(CompilerEnvironment.class).addCompilerWarning(null, warning);
  946. }
  947. }
  948. }
  949. {
  950. // Required extension check
  951. // TODO: Add support for specifying required versions
  952. Collection<ExtensionTracker> exts = ExtensionManager.getTrackers().values();
  953. Set<String> notFound = new HashSet<>();
  954. for(String extension : tokenList.getFileOptions().getRequiredExtensions()) {
  955. boolean found = false;
  956. for(ExtensionTracker t : exts) {
  957. if(t.getIdentifier().equalsIgnoreCase(extension)) {
  958. found = true;
  959. break;
  960. }
  961. }
  962. if(!found) {
  963. notFound.add(extension);
  964. }
  965. }
  966. if(!notFound.isEmpty()) {
  967. throw new ConfigCompileException("Could not compile file, because one or more required"
  968. + " extensions are not loaded: " + StringUtils.Join(notFound, ", ")
  969. + ". These extensions must be provided before compilation can continue.",
  970. new Target(0, file, 0));
  971. }
  972. }
  973. return tokenList;
  974. }
  975. /**
  976. * This function breaks the token stream into parts, separating the aliases/MethodScript from the command triggers
  977. *
  978. * @param tokenStream
  979. * @param envs
  980. * @return
  981. * @throws ConfigCompileException
  982. */
  983. public static List<Script> preprocess(TokenStream tokenStream,
  984. Set<Class<? extends Environment.EnvironmentImpl>> envs) throws ConfigCompileException {
  985. if(tokenStream == null || tokenStream.isEmpty()) {
  986. return new ArrayList<>();
  987. }
  988. // Remove leading newlines.
  989. while(!tokenStream.isEmpty() && tokenStream.getFirst().type == TType.NEWLINE) {
  990. tokenStream.removeFirst(); // Remove leading newlines.
  991. }
  992. // Return an empty list if there were only newlines.
  993. if(tokenStream.isEmpty()) {
  994. return new ArrayList<>();
  995. }
  996. // Remove whitespaces and duplicate newlines.
  997. {
  998. ListIterator<Token> it = tokenStream.listIterator(0);
  999. Token token = it.next();
  1000. outerLoop:
  1001. while(true) {
  1002. switch(token.type) {
  1003. case WHITESPACE: {
  1004. it.remove(); // Remove whitespaces.
  1005. if(!it.hasNext()) {
  1006. break outerLoop;
  1007. }
  1008. token = it.next();
  1009. continue outerLoop;
  1010. }
  1011. case NEWLINE: {
  1012. while(it.hasNext()) {
  1013. if((token = it.next()).type == TType.NEWLINE) {
  1014. it.remove(); // Remove duplicate newlines.
  1015. } else {
  1016. continue outerLoop;
  1017. }
  1018. }
  1019. break outerLoop;
  1020. }
  1021. default: {
  1022. if(!it.hasNext()) {
  1023. break outerLoop;
  1024. }
  1025. token = it.next();
  1026. continue outerLoop;
  1027. }
  1028. }
  1029. }
  1030. }
  1031. // Handle multiline constructs.
  1032. // Take out newlines between the '= >>>' and '<<<' tokens (also removing the '>>>' and '<<<' tokens).
  1033. // Also remove comments and also remove newlines that are behind a '\'.
  1034. boolean insideMultiline = false;
  1035. ListIterator<Token> it = tokenStream.listIterator(0);
  1036. Token token = null;
  1037. while(it.hasNext()) {
  1038. token = it.next();
  1039. switch(token.type) {
  1040. case ALIAS_END: { // "=".
  1041. if(it.hasNext()) {
  1042. if(it.next().type == TType.MULTILINE_START) { // "= >>>".
  1043. insideMultiline = true;
  1044. it.remove(); // Remove multiline start (>>>).
  1045. it.previous(); // Select 'token' <---.
  1046. it.next(); // Select 'token' -->.
  1047. } else {
  1048. it.previous(); // Select 'next' <---.
  1049. }
  1050. }
  1051. continue;
  1052. }
  1053. case MULTILINE_END: { // "<<<".
  1054. // Handle multiline end token (<<<) without start.
  1055. if(!insideMultiline) {
  1056. throw new ConfigCompileException(
  1057. "Found multiline end symbol, and no multiline start found", token.target);
  1058. }
  1059. insideMultiline = false;
  1060. it.remove(); // Remove multiline end (<<<).
  1061. continue;
  1062. }
  1063. case MULTILINE_START: { // ">>>".
  1064. // Handle multiline start token (>>>) while already in multiline.
  1065. if(insideMultiline) {
  1066. throw new ConfigCompileException("Did not expect a multiline start symbol here,"
  1067. + " are you missing a multiline end symbol above this line?", token.target);
  1068. }
  1069. // Handle multiline start token (>>>) without alias end (=) in front.
  1070. it.previous(); // Select 'token' <--.
  1071. if(!it.hasPrevious() || it.previous().type != TType.ALIAS_END) {
  1072. throw new ConfigCompileException(
  1073. "Multiline symbol must follow the alias_end (=) symbol", token.target);
  1074. }
  1075. it.next(); // Select 'prev' -->.
  1076. it.next(); // Select 'token' -->.
  1077. continue;
  1078. }
  1079. case NEWLINE: { // "\n".
  1080. // Skip newlines that are inside a multiline construct.
  1081. if(insideMultiline) {
  1082. it.remove(); // Remove newline.
  1083. }
  1084. continue;
  1085. }
  1086. // Remove comments.
  1087. case COMMENT: {
  1088. it.remove(); // Remove comment.
  1089. continue;
  1090. }
  1091. default: {
  1092. // Remove newlines that are behind a '\'.
  1093. if(token.type != TType.STRING && token.val().equals("\\") && it.hasNext()) {
  1094. if(it.next().type == TType.NEWLINE) {
  1095. it.remove(); // Remove newline.
  1096. it.previous(); // Select 'token' <--.
  1097. it.next(); // Select 'token' -->.
  1098. } else {
  1099. it.previous(); // Select 'next' <--.
  1100. }
  1101. }
  1102. }
  1103. }
  1104. }
  1105. assert token != null;
  1106. // Handle missing multiline end token.
  1107. if(insideMultiline) {
  1108. throw new ConfigCompileException("Expecting a multiline end symbol, but your last multiline alias appears to be missing one.", token.target);
  1109. }
  1110. // Now that we have all lines minified, we should be able to split on newlines
  1111. // and easily find the left and right sides.
  1112. List<Token> left = new ArrayList<>();
  1113. List<Token> right = new ArrayList<>();
  1114. List<Script> scripts = new ArrayList<>();
  1115. SmartComment comment = null;
  1116. tokenLoop:
  1117. for(it = tokenStream.listIterator(0); it.hasNext();) {
  1118. Token t = it.next();
  1119. if(t.type == TType.SMART_COMMENT) {
  1120. if(comment != null) {
  1121. // TODO: Double smart comment, this should be an error case
  1122. }
  1123. comment = new SmartComment(t.val());
  1124. t = it.next();
  1125. }
  1126. // Add all tokens until ALIAS_END (=) or end of stream.
  1127. while(t.type != TType.ALIAS_END) {
  1128. if(!it.hasNext()) {
  1129. break tokenLoop; // End of stream.
  1130. }
  1131. left.add(t);
  1132. t = it.next();
  1133. }
  1134. // Add all tokens until NEWLINE (\n).
  1135. while(t.type != TType.NEWLINE) {
  1136. assert it.hasNext(); // All files end with a newline, so end of stream should be impossible here.
  1137. right.add(t);
  1138. t = it.next();
  1139. }
  1140. // Create a new script for the obtained left and right if end of stream has not been reached.
  1141. if(t.type == TType.NEWLINE) {
  1142. // Check for spurious symbols, which indicate an issue with the script, but ignore any whitespace.
  1143. for(int j = left.size() - 1; j >= 0; j--) {
  1144. if(left.get(j).type == TType.NEWLINE) {
  1145. if(j > 0 && left.get(j - 1).type != TType.WHITESPACE) {
  1146. throw new ConfigCompileException(
  1147. "Unexpected token: " + left.get(j - 1).val(), left.get(j - 1).getTarget());
  1148. }
  1149. }
  1150. }
  1151. // Create a new script from the command descriptor (left) and code (right) and add it to the list.
  1152. Script s = new Script(left, right, null, envs, tokenStream.getFileOptions(), comment);
  1153. scripts.add(s);
  1154. // Create new left and right array for the next script.
  1155. left = new ArrayList<>();
  1156. right = new ArrayList<>();
  1157. comment = null;
  1158. }
  1159. }
  1160. // Return the scripts.
  1161. return scripts;
  1162. }
  1163. /**
  1164. * Compiles the token stream into a valid ParseTree. This also includes optimization and reduction.
  1165. *
  1166. * @param stream The token stream, as generated by {@link #lex(String, Environment, File, boolean) lex}
  1167. * @param environment If an environment is already set up, it can be passed in here. The code will tolerate a null
  1168. * value, but if present, should be passed in. If the value is null, a standalone environment will be generated
  1169. * and used.
  1170. * @param envs The environments that are going to be present at runtime. Even if the {@code environment} parameter
  1171. * is null, this still must be non-null and populated with one or more values.
  1172. * @return A fully compiled, optimized, and reduced parse tree. If {@code stream} is null or empty, null is
  1173. * returned.
  1174. * @throws ConfigCompileException If the script contains syntax errors. Additionally, during optimization, certain
  1175. * methods may cause compile errors. Any function that can optimize static occurrences and throws a
  1176. * {@link ConfigRuntimeException} will have that exception converted to a ConfigCompileException.
  1177. * @throws com.laytonsmith.core.exceptions.ConfigCompileGroupException A ConfigCompileGroupException is just
  1178. * a collection of single {@link ConfigCompileException}s.
  1179. */
  1180. public static ParseTree compile(TokenStream stream, Environment environment,
  1181. Set<Class<? extends Environment.EnvironmentImpl>> envs) throws ConfigCompileException,
  1182. ConfigCompileGroupException {
  1183. return compile(stream, environment, envs, new StaticAnalysis(true));
  1184. }
  1185. /**
  1186. * Compiles the token stream into a valid ParseTree. This also includes optimization and reduction.
  1187. *
  1188. * @param stream The token stream, as generated by {@link #lex(String, Environment, File, boolean) lex}
  1189. * @param environment If an environment is already set up, it can be passed in here. The code will tolerate a null
  1190. * value, but if present, should be passed in. If the value is null, a standalone environment will be generated
  1191. * and used.
  1192. * @param envs The environments that are going to be present at runtime. Even if the {@code environment} parameter
  1193. * is null, this still must be non-null and populated with one or more values.
  1194. * @param staticAnalysis The static analysis object, or {@code null} to not perform static analysis. This object
  1195. * is used to perform static analysis on the AST that results from parsing, before any AST optimizations.
  1196. * this method has finished execution.
  1197. * @return A fully compiled, optimized, and reduced parse tree. If {@code stream} is null or empty, null is
  1198. * returned.
  1199. * @throws ConfigCompileException If the script contains syntax errors. Additionally, during optimization, certain
  1200. * methods may cause compile errors. Any function that can optimize static occurrences and throws a
  1201. * {@link ConfigRuntimeException} will have that exception converted to a ConfigCompileException.
  1202. * @throws com.laytonsmith.core.exceptions.ConfigCompileGroupException A ConfigCompileGroupException is just
  1203. * a collection of single {@link ConfigCompileException}s.
  1204. */
  1205. public static ParseTree compile(TokenStream stream, Environment environment,
  1206. Set<Class<? extends Environment.EnvironmentImpl>> envs, StaticAnalysis staticAnalysis)
  1207. throws ConfigCompileException, ConfigCompileGroupException {
  1208. Objects.requireNonNull(envs, () -> "envs parameter must not be null");
  1209. try {
  1210. if(environment == null) {
  1211. // We MUST have a CompilerEnvironment. It doesn't need to be used, but we have to create it at
  1212. // this stage.
  1213. environment = Static.GenerateStandaloneEnvironment(false);
  1214. }
  1215. if(!environment.hasEnv(CompilerEnvironment.class)) {
  1216. Environment e = Static.GenerateStandaloneEnvironment(false);
  1217. environment = environment.cloneAndAdd(e.getEnv(CompilerEnvironment.class));
  1218. }
  1219. } catch (IOException | DataSourceException | URISyntaxException | Profiles.InvalidProfileException ex) {
  1220. throw new RuntimeException(ex);
  1221. }
  1222. Set<ConfigCompileException> compilerErrors = new HashSet<>();
  1223. if(stream == null || stream.isEmpty()) {
  1224. return null;
  1225. }
  1226. Target unknown;
  1227. try {
  1228. //Instead of using Target.UNKNOWN, we can at least set the file.
  1229. unknown = new Target(0, stream.get(0).target.file(), 0);
  1230. } catch (Exception e) {
  1231. unknown = Target.UNKNOWN;
  1232. }
  1233. // Remove all newlines and whitespaces.
  1234. ListIterator<Token> it = stream.listIterator(0);
  1235. while(it.hasNext()) {
  1236. if(it.next().type.isWhitespace()) {
  1237. it.remove();
  1238. }
  1239. }
  1240. // Get the file options.
  1241. final FileOptions fileOptions = stream.getFileOptions();
  1242. final ParseTree rootNode = new ParseTree(fileOptions);
  1243. rootNode.setData(CNull.NULL);
  1244. ParseTree tree = rootNode;
  1245. Stack<ParseTree> parents = new Stack<>();
  1246. /**
  1247. * constructCount is used to determine if we need to use autoconcat when reaching a FUNC_END. The previous
  1248. * constructs, if the count is greater than 1, will be moved down into an autoconcat.
  1249. */
  1250. Stack<AtomicInteger> constructCount = new Stack<>();
  1251. constructCount.push(new AtomicInteger(0));
  1252. parents.push(tree);
  1253. tree.addChild(new ParseTree(new CFunction(__autoconcat__.NAME, unknown), fileOptions));
  1254. parents.push(tree.getChildAt(0));
  1255. tree = tree.getChildAt(0);
  1256. constructCount.push(new AtomicInteger(0));
  1257. /**
  1258. * The array stack is used to keep track of the number of square braces in use.
  1259. */
  1260. Stack<AtomicInteger> arrayStack = new Stack<>();
  1261. arrayStack.add(new AtomicInteger(-1));
  1262. Stack<AtomicInteger> minusArrayStack = new Stack<>();
  1263. Stack<AtomicInteger> minusFuncStack = new Stack<>();
  1264. int parens = 0;
  1265. Token t = null;
  1266. int braceCount = 0;
  1267. boolean inObjectDefinition = false;
  1268. // Create a Token array to iterate over, rather than using the LinkedList's O(n) get() method.
  1269. Token[] tokenArray = stream.toArray(new Token[stream.size()]);
  1270. for(int i = 0; i < tokenArray.length; i++) {
  1271. t = tokenArray[i];
  1272. Token prev1 = i - 1 >= 0 ? tokenArray[i - 1] : new Token(TType.UNKNOWN, "", t.target);
  1273. Token next1 = i + 1 < stream.size() ? tokenArray[i + 1] : new Token(TType.UNKNOWN, "", t.target);
  1274. Token next2 = i + 2 < stream.size() ? tokenArray[i + 2] : new Token(TType.UNKNOWN, "", t.target);
  1275. Token next3 = i + 3 < stream.size() ? tokenArray[i + 3] : new Token(TType.UNKNOWN, "", t.target);
  1276. // Brace handling
  1277. if(t.type == TType.LCURLY_BRACKET) {
  1278. inObjectDefinition = false;
  1279. ParseTree b = new ParseTree(new CFunction(__cbrace__.NAME, t.getTarget()), fileOptions);
  1280. tree.addChild(b);
  1281. tree = b;
  1282. parents.push(b);
  1283. braceCount++;
  1284. constructCount.push(new AtomicInteger(0));
  1285. continue;
  1286. }
  1287. if(t.type == TType.RCURLY_BRACKET) {
  1288. if(braceCount == 0) {
  1289. throw new ConfigCompileException("Unexpected end curly brace", t.target);
  1290. }
  1291. braceCount--;
  1292. if(constructCount.peek().get() > 1) {
  1293. //We need to autoconcat some stuff
  1294. int stacks = constructCount.peek().get();
  1295. int replaceAt = tree.getChildren().size() - stacks;
  1296. ParseTree c = new ParseTree(new CFunction(__autoconcat__.NAME, tree.getTarget()), fileOptions);
  1297. List<ParseTree> subChildren = new ArrayList<>();
  1298. for(int b = replaceAt; b < tree.numberOfChildren(); b++) {
  1299. subChildren.add(tree.getChildAt(b));
  1300. }
  1301. c.setChildren(subChildren);
  1302. if(replaceAt > 0) {
  1303. List<ParseTree> firstChildren = new ArrayList<>();
  1304. for(int d = 0; d < replaceAt; d++) {
  1305. firstChildren.add(tree.getChildAt(d));
  1306. }
  1307. tree.setChildren(firstChildren);
  1308. } else {
  1309. tree.removeChildren();
  1310. }
  1311. tree.addChild(c);
  1312. }
  1313. parents.pop();
  1314. tree = parents.peek();
  1315. constructCount.pop();
  1316. try {
  1317. constructCount.peek().incrementAndGet();
  1318. } catch (EmptyStackException e) {
  1319. throw new ConfigCompileException("Unexpected end curly brace", t.target);
  1320. }
  1321. continue;
  1322. }
  1323. if(t.type == TType.KEYWORD && KeywordList.getKeywordByName(t.value) instanceof ObjectDefinitionKeyword) {
  1324. inObjectDefinition = true;
  1325. }
  1326. //Associative array/label handling
  1327. if(t.type == TType.LABEL && tree.getChildren().size() > 0) {
  1328. //If it's not an atomic identifier it's an error.
  1329. if(!prev1.type.isAtomicLit() && prev1.type != TType.IVARIABLE && prev1.type != TType.KEYWORD) {
  1330. ConfigCompileException error = new ConfigCompileException("Invalid label specified", t.getTarget());
  1331. if(prev1.type == TType.FUNC_END) {
  1332. // This is a fairly common mistake, so we have special handling for this,
  1333. // because otherwise we would get a "Mismatched parenthesis" warning (which doesn't make sense),
  1334. // and potentially lots of other invalid errors down the line, so we go ahead
  1335. // and stop compilation at this point.
  1336. throw error;
  1337. }
  1338. compilerErrors.add(error);
  1339. }
  1340. // Wrap previous construct in a CLabel
  1341. ParseTree cc = tree.getChildren().get(tree.getChildren().size() - 1);
  1342. tree.removeChildAt(tree.getChildren().size() - 1);
  1343. tree.addChild(new ParseTree(new CLabel((Construct) cc.getData()), fileOptions));
  1344. continue;
  1345. }
  1346. //Array notation handling
  1347. if(t.type.equals(TType.LSQUARE_BRACKET)) {
  1348. //tree.addChild(new ParseTree(new CFunction("__cbracket__", t.getTarget()), fileOptions));
  1349. arrayStack.push(new AtomicInteger(tree.getChildren().size() - 1));
  1350. continue;
  1351. } else

Large files files are truncated, but you can click here to view the full file