PageRenderTime 55ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/parser/parser.pas

https://github.com/Piterolex/SScript-Compiler
Pascal | 715 lines | 490 code | 123 blank | 102 comment | 48 complexity | b52b4ef6387c0f1e7f2787493e135a77 MD5 | raw file
Possible License(s): LGPL-3.0
  1. (*
  2. Copyright Š by Patryk Wychowaniec, 2013
  3. All rights reserved.
  4. *)
  5. Unit Parser;
  6. Interface
  7. Uses Classes, FGL,
  8. symdef, Scanner, Tokens, Expression;
  9. Const DefaultSeparators = [_SEMICOLON, _COMMA, _BRACKET1_CL, _BRACKET2_CL, _BRACKET3_CL];
  10. Type TTokenList = specialize TFPGList<PToken_P>;
  11. { TParser }
  12. Type TParser = Class
  13. Private
  14. // private fields
  15. Compiler : Pointer;
  16. TokenList: TTokenList; // list of tokens (with stripped comments)
  17. DontFailOnEOF: Boolean;
  18. Public
  19. // public fields
  20. TokenPos: Int64; // current token ID (counting from 0)
  21. CurrentDeep: Integer; // current brackets' deep (`{` = +1, `}` = -1)
  22. Visibility : TVisibility; // current visibility
  23. Property getPosition: Int64 read TokenPos; // current token position
  24. Property getVisibility: TVisibility read Visibility; // current visibility state
  25. // public methods
  26. Constructor Create(const CompilerPnt: Pointer; InputFile: String; out inLongComment: Boolean);
  27. Destructor Destroy; override;
  28. Function getToken(const Index: uint32): TToken_P;
  29. Function getTokenPnt(const Index: uint32): PToken_P;
  30. Function getLastToken: TToken_P;
  31. Function getCurrentRange(Deep: int16=1): TRange;
  32. Function read: TToken_P;
  33. Function read_t: TToken;
  34. Function next(const I: Integer=0): TToken_P;
  35. Function next_pnt(const I: Integer=0): PToken_P;
  36. Function next_t(const I: Integer=0): TToken;
  37. Function read_ident: String;
  38. Function read_string: String;
  39. Function read_int: Integer;
  40. Function read_type(const AllowArrays: Boolean=True): TType;
  41. Function read_constant_expr(const Sep: TTokenSet=DefaultSeparators): PExpressionNode;
  42. Function read_constant_expr_int(const Sep: TTokenSet=DefaultSeparators): Int64;
  43. Procedure eat(Token: TToken);
  44. Procedure semicolon;
  45. Procedure skip_parenthesis;
  46. Procedure read_until(const Token: TToken);
  47. Function Can: Boolean;
  48. End;
  49. Implementation
  50. Uses CompilerUnit, SSCompiler, ExpressionCompiler, Messages, SysUtils;
  51. (* TParser.Create *)
  52. {
  53. Loads code from file and preparses it (removes comments etc.)
  54. }
  55. Constructor TParser.Create(const CompilerPnt: Pointer; InputFile: String; out inLongComment: Boolean);
  56. Var Scanner: TScanner; // token scanner
  57. Code : TStringList; // TScanner needs a TStringList to parse code
  58. Token : TToken_P; // current token
  59. PToken : PToken_P;
  60. ShortCommentLine: LongWord=0; // short comment (`//`) line
  61. Begin
  62. Compiler := CompilerPnt;
  63. inLongComment := False;
  64. TokenPos := 0;
  65. CurrentDeep := 0;
  66. { load code from file }
  67. Code := TStringList.Create;
  68. Code.LoadFromFile(InputFile); // `InputFile` is already set in the `CompileCode`
  69. { parse it }
  70. TokenList := TTokenList.Create;
  71. Scanner := TScanner.Create(Code);
  72. if (not Scanner.Can) Then // an empty file
  73. Begin
  74. New(PToken);
  75. With PToken^ do
  76. Begin
  77. Char := 1;
  78. Line := 1;
  79. Position := 0;
  80. Value := 'end-of-file';
  81. Token := _EOF;
  82. TokenName := 'EOF';
  83. End;
  84. TokenList.Add(PToken);
  85. Exit;
  86. End;
  87. While (Scanner.Can) do
  88. Begin
  89. Token := Scanner.getToken_P;
  90. if (Token.Token = noToken) Then // skip `noToken`-s
  91. Continue;
  92. if (Token.Token = _EOF) Then
  93. Begin
  94. DevLog(dvInfo, 'TParser.Create', 'reached `EOF` - finishing code parsing...');
  95. Break;
  96. End;
  97. Case Token.Token of
  98. _DOUBLE_SLASH { // }:
  99. if (not inLongComment) Then
  100. ShortCommentLine := Token.Line;
  101. else
  102. if (Token.Line <> ShortCommentLine) Then // not in short (one-line) comment
  103. Begin
  104. if (Token.Token = _LONGCMT_OPEN { /* }) Then
  105. inLongComment := True Else
  106. if (Token.Token = _LONGCMT_CLOSE { */ }) Then
  107. inLongComment := False Else
  108. if (not inLongComment) Then
  109. Begin
  110. New(PToken);
  111. PToken^ := Token;
  112. PToken^.Position := TokenList.Count;
  113. TokenList.Add(PToken);
  114. End;
  115. End;
  116. End;
  117. End;
  118. TokenPos := 0;
  119. CurrentDeep := 0;
  120. DontFailOnEOF := False;
  121. { destroy objects }
  122. Scanner.Free;
  123. Code.Free;
  124. End;
  125. (* TParser.Destroy *)
  126. Destructor TParser.Destroy;
  127. Var Token: PToken_P;
  128. Begin
  129. For Token in TokenList Do
  130. Dispose(Token);
  131. TokenList.Free;
  132. End;
  133. (* TParser.getToken *)
  134. {
  135. Returns a token with specified index.
  136. }
  137. Function TParser.getToken(const Index: uint32): TToken_P;
  138. Begin
  139. Result := TokenList[Index]^;
  140. End;
  141. (* TParser.getTokenPnt *)
  142. {
  143. Returns a pointer to token with specified index.
  144. }
  145. Function TParser.getTokenPnt(const Index: uint32): PToken_P;
  146. Begin
  147. Result := TokenList[Index];
  148. End;
  149. (* TParser.getLastToken *)
  150. {
  151. Returns last non-`noToken` token
  152. }
  153. Function TParser.getLastToken: TToken_P;
  154. Begin
  155. Exit(TokenList.Last^);
  156. End;
  157. (* TParser.getCurrentRange *)
  158. {
  159. Returns current scope's range.
  160. }
  161. Function TParser.getCurrentRange(Deep: int16=1): TRange;
  162. { SkipBlock }
  163. Procedure SkipBlock;
  164. Var Deep: int16 = 0;
  165. Begin
  166. if (next_t = _BRACKET3_OP) Then
  167. Begin
  168. While (true) Do
  169. Begin
  170. Case read.Token of
  171. _BRACKET3_OP:
  172. Inc(Deep);
  173. _BRACKET3_CL:
  174. Dec(Deep);
  175. End;
  176. if (Deep = 0) Then
  177. Break;
  178. End;
  179. End Else
  180. Begin
  181. read_until(_SEMICOLON);
  182. End;
  183. End;
  184. Var TPos: Int64;
  185. Label Parse_IF;
  186. Begin
  187. Try
  188. DontFailOnEOF := True; // don't fail when brackets are unclosed (it would fail with error `unexpected eof`) beacuse this error will be detected and raised later (when parsing the actual construction)
  189. TPos := TokenPos;
  190. Result.PBegin := TokenList[TokenPos]^;
  191. With TCompiler(Compiler) do
  192. Begin
  193. if (ParsingFORInitInstruction) Then // super special case: parsing a for init instruction.
  194. Begin
  195. read_until(_BRACKET1_CL);
  196. Case next_t of
  197. (* { *)
  198. _BRACKET3_OP:
  199. Begin
  200. SkipBlock;
  201. End;
  202. (* if *)
  203. _IF:
  204. Begin
  205. Parse_IF:
  206. eat(_IF);
  207. eat(_BRACKET1_OP);
  208. read_until(_BRACKET1_CL);
  209. SkipBlock;
  210. if (next_t = _ELSE) Then
  211. Begin
  212. eat(_ELSE);
  213. SkipBlock;
  214. if (next_t = _IF) Then
  215. goto Parse_IF;
  216. End;
  217. End;
  218. else
  219. read_until(_SEMICOLON);
  220. End;
  221. While (TokenPos >= TokenList.Count) Do
  222. Dec(TokenPos);
  223. Result.PEnd := TokenList[TokenPos]^;
  224. TokenPos := TPos;
  225. Exit;
  226. End;
  227. End;
  228. While (true) Do
  229. Begin
  230. if (TokenPos >= TokenList.Count) Then // ending `}` not found, so symbol must have global reachability
  231. Begin
  232. Dec(TokenPos);
  233. Break;
  234. End;
  235. Case TokenList[TokenPos]^.Token of
  236. _BRACKET3_OP: Inc(Deep);
  237. _BRACKET3_CL: Dec(Deep);
  238. End;
  239. Inc(TokenPos);
  240. if (Deep = 0) Then
  241. Break;
  242. End;
  243. While (TokenPos >= TokenList.Count) Do
  244. Dec(TokenPos);
  245. Result.PEnd := TokenList[TokenPos]^;
  246. TokenPos := TPos;
  247. Finally
  248. DontFailOnEOF := False;
  249. End;
  250. End;
  251. (* TParser.read *)
  252. {
  253. Reads a token
  254. }
  255. Function TParser.read: TToken_P;
  256. Begin
  257. if (TokenPos >= TokenList.Count) Then
  258. TCompiler(Compiler).CompileError(eEOF);
  259. Result := TokenList[TokenPos]^;
  260. Inc(TokenPos);
  261. With TCompiler(Compiler) do
  262. Case Result.Token of
  263. _INVALID_INT: CompileError(Result, eInvalidIntegerValue, [Result.Value]);
  264. _INVALID_FLOAT: CompileError(Result, eInvalidFloatValue, [Result.Value]);
  265. _INVALID_STRING: CompileError(Result, eStringExceedsLine, []);
  266. End;
  267. End;
  268. (* TParser.read_t *)
  269. {
  270. Reads a token's kind; see @TParser.read
  271. }
  272. Function TParser.read_t: TToken;
  273. Begin
  274. Result := read.Token;
  275. End;
  276. (* TParser.next *)
  277. {
  278. Returns a next - or previous (when `I` is negative) - token.
  279. }
  280. Function TParser.next(const I: Integer=0): TToken_P;
  281. Begin
  282. Result := next_pnt(I)^;
  283. End;
  284. (* TParser.next_pnt *)
  285. {
  286. Returns a next - or previous (when `I` is negative) - token's pointer.
  287. }
  288. Function TParser.next_pnt(const I: Integer=0): PToken_P;
  289. Begin
  290. if (TokenPos+I >= TokenList.Count) Then
  291. Result := TokenList.Last Else
  292. Result := TokenList[TokenPos+I];
  293. End;
  294. (* TParser.next_t *)
  295. {
  296. Works just as TParser.next, but gets only a token's kind.
  297. }
  298. Function TParser.next_t(const I: Integer=0): TToken;
  299. Begin
  300. Result := next(I).Token;
  301. End;
  302. (* TParser.read_ident *)
  303. {
  304. Reads an identifier; displays error `eExpectedIdentifier` when read token isn't an identifier.
  305. }
  306. Function TParser.read_ident: String;
  307. Begin
  308. if (next_t <> _IDENTIFIER) Then
  309. TCompiler(Compiler).CompileError(next, eExpectedIdentifier, [next.Value]);
  310. Result := read.Value;
  311. End;
  312. (* TParser.read_string *)
  313. {
  314. Reads a string; displays error `eExpectedString` when read token isn't a string.
  315. }
  316. Function TParser.read_string: String;
  317. Begin
  318. if (next_t <> _STRING) Then
  319. TCompiler(Compiler).CompileError(next, eExpectedString, [next.Value]);
  320. Result := read.Value;
  321. End;
  322. (* TParser.read_int *)
  323. {
  324. Reads an integer value; displays error `eExpectedInt` when read token isn't a string.
  325. }
  326. Function TParser.read_int: Integer;
  327. Begin
  328. if (next_t <> _INT) Then
  329. TCompiler(Compiler).CompileError(next, eExpectedInt, [next.Value]);
  330. Result := StrToInt(read.Value);
  331. End;
  332. (* TParser.read_type *)
  333. {
  334. Reads a type name or a full type (based on current token) and returns its ID.
  335. }
  336. Function TParser.read_type(const AllowArrays: Boolean=True): TType;
  337. Var Base, Typ, TmpType: TType;
  338. Token: TToken_P;
  339. I : Integer;
  340. FuncReturn: TType;
  341. FuncParams: TParamList;
  342. FuncParam : PParam;
  343. isArray, isStringBased, isFunction: Boolean;
  344. RequireDefaultValue : Boolean = False;
  345. NamespaceName: String;
  346. Namespace : TNamespace;
  347. Begin
  348. With TCompiler(Compiler) do
  349. Begin
  350. Result := nil;
  351. Base := nil;
  352. Token := read;
  353. isArray := False;
  354. isStringBased := False;
  355. isFunction := False;
  356. Typ := TType.Create;
  357. { read current token }
  358. Case Token.Token of
  359. _IDENTIFIER:
  360. Begin
  361. if (next_t = _DOUBLE_COLON) Then // `namespace name::type name`
  362. Begin
  363. eat(_DOUBLE_COLON);
  364. NamespaceName := Token.Value;
  365. Namespace := findNamespace(NamespaceName);
  366. if (Namespace = nil) Then // namespace not found
  367. Begin
  368. CompileError(next(-2), eUnknownNamespace, [NamespaceName]);
  369. read_ident;
  370. Exit;
  371. End;
  372. Token := next;
  373. Base := findTypeCandidate(read_ident, Namespace, Token);
  374. End Else // `type name`
  375. Begin
  376. Base := findTypeCandidate(Token.Value, getCurrentNamespace, Token);
  377. if (Base = nil) Then // type not found
  378. Begin
  379. CompileError(next(-1), eUnknownType, [Token.Value]);
  380. Exit;
  381. End;
  382. End;
  383. End;
  384. { function-type declaration }
  385. _FUNCTION:
  386. isFunction := True;
  387. else
  388. CompileError(next, eExpectedIdentifier, [Token.Value]);
  389. End;
  390. { function type }
  391. if (isFunction) Then
  392. Begin
  393. if (next_t = _LOWER) Then
  394. Begin
  395. { specialized function }
  396. eat(_LOWER);
  397. FuncReturn := read_type(); // return type
  398. eat(_GREATER);
  399. eat(_BRACKET1_OP);
  400. SetLength(FuncParams, 0);
  401. While (true) Do // parameter list
  402. Begin
  403. Token := next;
  404. if (Token.Token = _BRACKET1_CL) Then
  405. Break;
  406. SetLength(FuncParams, Length(FuncParams)+1);
  407. FuncParam := @FuncParams[High(FuncParams)];
  408. if (Token.Token = _CONST) Then // const-param
  409. Begin
  410. Token := read;
  411. FuncParam^.Attributes += [vaConst];
  412. FuncParam^.isConst := True;
  413. End Else
  414. if (Token.Token = _VAR) Then // var-param
  415. Begin
  416. Token := read;
  417. FuncParam^.isVar := True;
  418. End;
  419. FuncParam^.Typ := read_type(); // [param type]
  420. if (FuncParam^.Typ.isVoid) Then // error: void-typed param
  421. CompileError(eVoidNoNameParam);
  422. if (next_t = _IDENTIFIER) Then // optional identifier indicating parameter's name
  423. Begin
  424. FuncParam^.Name := read_ident;
  425. For I := 0 To High(FuncParams)-1 Do
  426. if (FuncParams[I].Name = FuncParam^.Name) Then // redeclaration
  427. CompileError(eRedeclaration, [FuncParam^.Name]);
  428. End;
  429. if (next_t = _EQUAL) Then // optional default parameter's value
  430. Begin
  431. eat(_EQUAL);
  432. FuncParam^.DefaultValue := read_constant_expr;
  433. TmpType := getTypeFromExpression(FuncParam^.DefaultValue);
  434. Dec(TokenPos);
  435. if (not TmpType.CanBeAssignedTo(FuncParam^.Typ)) Then
  436. CompileError(eWrongType, [TmpType.asString, FuncParam^.Typ.asString]);
  437. RequireDefaultValue := True;
  438. End Else
  439. if (RequireDefaultValue) Then
  440. CompileError(next, eDefaultParamValueRequired, [FuncParam^.Name]) Else
  441. FuncParam^.DefaultValue := nil;
  442. if (next_t = _BRACKET1_CL) Then // end of parameter list?
  443. Break;
  444. eat(_COMMA); // 'eat' comma (parameter list separator)
  445. End;
  446. eat(_BRACKET1_CL);
  447. Typ.RegPrefix := 'r';
  448. Typ.InternalID := TYPE_INT_id;
  449. Typ.FuncReturn := FuncReturn;
  450. Typ.FuncParams := FuncParams;
  451. Include(Typ.Attributes, taFunction);
  452. End Else
  453. Begin
  454. { unspecialized function }
  455. Typ.RegPrefix := 'r';
  456. Typ.InternalID := TYPE_INT_id;
  457. Typ.FuncReturn := TYPE_ANY;
  458. Typ.Attributes += [taFunction, taUnspecialized];
  459. End;
  460. if ((next_t = _BRACKET2_OP) and (AllowArrays)) Then // is it an array declaration?
  461. Begin
  462. Base := Typ.Clone;
  463. End Else
  464. Exit(Typ);
  465. End;
  466. { check for primary type existence }
  467. if (Base = nil) Then
  468. Begin
  469. CompileError(next, eUnknownType, [Token.Value]);
  470. Exit;
  471. End;
  472. Typ := Base.Clone;
  473. if (next_t = _BRACKET2_OP) Then
  474. Begin
  475. if (Base.isVoid) Then // `void` array cannot be created (it would destroy our universe)...
  476. Begin
  477. CompileError(next, eVoidArray, []);
  478. Exit;
  479. End;
  480. if (Base.InternalID = TYPE_ANY_id) Then // ... as well, as `any`-typed array
  481. Begin
  482. CompileError(next, eInternalError, ['Cannot create an ''any''-typed array!']);
  483. Exit;
  484. End;
  485. End;
  486. { is it an array (is the next token a `[`)? }
  487. While (next_t = _BRACKET2_OP) and (AllowArrays) Do
  488. Begin
  489. eat(_BRACKET2_OP);
  490. eat(_BRACKET2_CL);
  491. Inc(Typ.ArrayDimCount);
  492. End;
  493. isArray := Typ.ArrayDimCount > 0;
  494. isStringBased := type_equal(Typ, TYPE_STRING); // @TODO: memleak (as `TYPE_STRING` creates a new instance of `string` type)
  495. if (isArray) Then
  496. Begin
  497. Typ.RegPrefix := 'r';
  498. Typ.ArrayBase := Base;
  499. if (isStringBased) Then
  500. Begin
  501. Typ.RegPrefix := 's';
  502. Typ.ArrayBase := Typ.ArrayBase.ArrayBase;
  503. End;
  504. End;
  505. { set result }
  506. Result := Typ;
  507. End;
  508. End;
  509. (* TParser.read_constant_expr *)
  510. {
  511. Reads and evaluates a constant expression.
  512. }
  513. Function TParser.read_constant_expr(const Sep: TTokenSet=DefaultSeparators): PExpressionNode;
  514. Begin
  515. Result := MakeExpression(Compiler, Sep, []);
  516. OptimizeExpression(TCompiler(Compiler), Result, [oInsertConstants, oConstantFolding, oDisplayParseErrors]);
  517. End;
  518. (* TParser.read_constant_expr_int *)
  519. Function TParser.read_constant_expr_int(const Sep: TTokenSet=DefaultSeparators): Int64;
  520. Var Expr: PExpressionNode;
  521. Begin
  522. Expr := read_constant_expr(Sep);
  523. if (Expr^.Typ <> mtInt) Then
  524. TCompiler(Compiler).CompileError(eWrongType, [getExpressionTypeName(Expr), 'int']);
  525. if (Expr^.Value = null) Then
  526. Begin
  527. DevLog(dvError, 'TParser.read_constant_expr_int', 'Error: TParser.read_constant_expr_int() -> Expr^.Value = null; returned `0`');
  528. Exit(0);
  529. End;
  530. Exit(Expr^.Value);
  531. End;
  532. (* TParser.eat *)
  533. {
  534. 'eats' a specified token.
  535. (ie. if current token isn't token passed in the parameter, displays a syntax error).
  536. }
  537. Procedure TParser.eat(Token: TToken);
  538. Begin
  539. if (read_t <> Token) Then
  540. TCompiler(Compiler).CompileError(eExpected, [getTokenDisplay(Token), next(-1).Value]);
  541. End;
  542. (* TParser.semicolon *)
  543. {
  544. Eats a semicolon (`_SEMICOLON` token)
  545. }
  546. Procedure TParser.semicolon;
  547. Begin
  548. eat(_SEMICOLON);
  549. End;
  550. (* TParser.skip_parenthesis *)
  551. {
  552. Skips parenthesises
  553. }
  554. Procedure TParser.skip_parenthesis;
  555. Var Deep: Integer = 0;
  556. Begin
  557. Repeat
  558. if ((TokenPos >= TokenList.Count) and (DontFailOnEOF)) Then
  559. Exit;
  560. Case read_t of
  561. _BRACKET1_OP, _BRACKET2_OP, _BRACKET3_OP, _LOWER: Inc(Deep);
  562. _BRACKET1_CL, _BRACKET2_CL, _BRACKET3_CL, _GREATER: Dec(Deep);
  563. End;
  564. Until (Deep = 0);
  565. End;
  566. (* TParser.read_until *)
  567. Procedure TParser.read_until(const Token: TToken);
  568. Var Deep: Integer = 0;
  569. Tok : TToken;
  570. Begin
  571. While (true) do
  572. Begin
  573. if ((TokenPos >= TokenList.Count) and (DontFailOnEOF)) Then
  574. Exit;
  575. Tok := read_t;
  576. if (Tok = Token) and (Deep = 0) Then
  577. Break;
  578. Case Tok of
  579. _BRACKET1_OP, _BRACKET2_OP, _BRACKET3_OP: Inc(Deep);
  580. _BRACKET1_CL, _BRACKET2_CL, _BRACKET3_CL: Dec(Deep);
  581. End;
  582. End;
  583. End;
  584. (* TParser.Can *)
  585. {
  586. Returns 'true', if at least one token can be read.
  587. }
  588. Function TParser.Can: Boolean;
  589. Begin
  590. Result := (TokenPos < TokenList.Count);
  591. End;
  592. End.