PageRenderTime 56ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/IronPython_Main/Languages/Ruby/Ruby/Compiler/Parser/GPPG.cs

#
C# | 718 lines | 481 code | 136 blank | 101 comment | 113 complexity | 72ef55e42add05130ddc06a2c1633b2f MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception, CPL-1.0, CC-BY-SA-3.0, BSD-3-Clause, ISC, AGPL-3.0, LGPL-2.1, Apache-2.0
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Apache License, Version 2.0. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Apache License, Version 2.0, please send an email to
  8. * ironruby@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Apache License, Version 2.0.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections.Generic;
  17. using System.Diagnostics;
  18. using System.IO;
  19. using Microsoft.Scripting.Utils;
  20. using System.Text;
  21. using System.Threading;
  22. using TValue = IronRuby.Compiler.TokenValue;
  23. using TLocation = Microsoft.Scripting.SourceSpan;
  24. namespace IronRuby.Compiler {
  25. #region State
  26. internal sealed class State {
  27. #if DEBUG
  28. private int _id;
  29. public int Id { get { return _id; } set { _id = value; } }
  30. #endif
  31. // State x Terminal -> ERROR + SHIFT(State) + REDUCE(State) + ACCEPT
  32. //
  33. // SHIFT > 0
  34. // ERROR == 0
  35. // REDUCE < 0
  36. // ACCEPT == -1
  37. private readonly Dictionary<int, int> _actions;
  38. // State x NonTerminal -> State
  39. private readonly Dictionary<int, int> _gotos;
  40. // ParseAction - default action if terminal not in _actions dict
  41. private readonly int _defaultAction;
  42. public int DefaultAction {
  43. get { return _defaultAction; }
  44. }
  45. public Dictionary<int, int> GotoStates {
  46. get { return _gotos; }
  47. }
  48. public Dictionary<int, int> Actions {
  49. get { return _actions; }
  50. }
  51. public State(Dictionary<int, int> actions, Dictionary<int, int> gotos, int defaultAction) {
  52. _actions = actions;
  53. _gotos = gotos;
  54. _defaultAction = defaultAction;
  55. }
  56. #if DEBUG
  57. public override string/*!*/ ToString() {
  58. return _id.ToString();
  59. }
  60. #endif
  61. }
  62. #endregion
  63. #region ParserTables
  64. internal sealed class ParserTables {
  65. public State[] States;
  66. // upper word: LhsNonTerminal
  67. // lower word: RhsLength
  68. public int[] Rules;
  69. public int ErrorToken;
  70. public int EofToken;
  71. #if DEBUG // Metadata
  72. internal string[] NonTerminalNames;
  73. // concatenated symbols of rule RHSs;
  74. // symbol < 0 represents a non-terminal
  75. // symbol >= 0 represents a terminal
  76. internal short[] RuleRhsSymbols;
  77. // rule index -> index in RuleRhsSymbols array (calculated):
  78. internal ushort[] RuleRhsSymbolIndexes;
  79. #endif
  80. }
  81. #endregion
  82. #region IParserLogger
  83. internal interface IParserLogger {
  84. void BeforeReduction(int ruleId, int rhsLength);
  85. void BeforeShift(int stateId, int tokenId, bool isErrorShift);
  86. void BeforeGoto(int stateId, int ruleId);
  87. void StateEntered();
  88. void NextToken(int tokenId);
  89. }
  90. #endregion
  91. #region ShiftReduceParser
  92. public partial class Parser {
  93. private static ParserTables _tables;
  94. private static readonly object _tablesLock = new object();
  95. private TValue yyval;
  96. private TLocation yyloc;
  97. // Experimental : last yylloc prior to call of yylex()
  98. private TLocation _lastTokenSpan;
  99. private int _nextToken;
  100. private State _currentState;
  101. private bool _recovering;
  102. private int _tokensSinceLastError;
  103. private ParserStack<State, TValue, TLocation>/*!*/ _stack;
  104. private int _errorToken;
  105. private int _eofToken;
  106. private State[] _states;
  107. private int[] _rules;
  108. #if DEBUG
  109. // test hooks:
  110. internal State CurrentState { get { return _currentState; } }
  111. internal ParserStack<State, TValue, TLocation>/*!*/ Stack { get { return _stack; } }
  112. internal State[] States { get { return _states; } }
  113. internal int[] Rules { get { return _rules; } }
  114. internal ParserTables Tables { get { return _tables; } }
  115. #endif
  116. // methods that must be implemented by the parser
  117. //private void InitializeGenerated(ParserTables/*!*/ tables);
  118. //private TLocation MergeLocations(TLocation start, TLocation end);
  119. //private TValue GetTokenValue(); // lexical value: set by scanner
  120. //private TLocation GetTokenSpan(); // location value: set by scanner
  121. //private int GetNextToken();
  122. //private void ReportSyntaxError(string message);
  123. internal static int GetRuleRhsLength(int ruleDef) {
  124. return ruleDef & 0xffff;
  125. }
  126. internal static int GetRuleLhsNonterminal(int ruleDef) {
  127. return ruleDef >> 16;
  128. }
  129. private void InitializeTables() {
  130. _stack = new ParserStack<State, TValue, TLocation>();
  131. if (_tables == null) {
  132. lock (_tablesLock) {
  133. if (_tables == null) {
  134. Debug.Assert(typeof(TLocation).IsValueType);
  135. ParserTables tables = new ParserTables();
  136. InitializeGeneratedTables(tables);
  137. #if DEBUG
  138. InitializeMetadata(tables);
  139. InitializeRulesMetadata(tables);
  140. #endif
  141. Thread.MemoryBarrier();
  142. _tables = tables;
  143. }
  144. }
  145. }
  146. _states = _tables.States;
  147. _rules = _tables.Rules;
  148. _errorToken = _tables.ErrorToken;
  149. _eofToken = _tables.EofToken;
  150. }
  151. // TODO: possible optimization: build a single dictionary mapping all goto and actions for all states.
  152. // This (custom) dict might be precomputed by generator and allocated in a single array.
  153. // This would safe rellocation of ~650kB of Dictionary.Entry[] since the array would be considered a large object.
  154. private State[]/*!*/ BuildStates(short[]/*!*/ data) {
  155. Debug.Assert(data != null && data.Length > 0);
  156. //
  157. // serialized structure:
  158. //
  159. // length,
  160. // (
  161. // (action_count: positive short, goto_count: positive short) | (action_count: negative short),
  162. // (key: short, value: short){action_count} | (defaultAction: short),
  163. // (key: short, value: short){goto_count}
  164. // ){length}
  165. //
  166. // where action_count is
  167. // > 0 ... a number of items in actions hashtable
  168. // == 0 ... there is no action hashtable, but there is a single integer default action id
  169. // < 0 ... there is no action hashtable and no goto table, the value is default action id
  170. // goto_count is a number of items in gotos hashtable,
  171. // zero means there is no goto hashtable
  172. //
  173. int offset = 0;
  174. State[] states = new State[data[offset++]];
  175. for (int i = 0; i < states.Length; i++) {
  176. int actionCount = data[offset++];
  177. Dictionary<int, int> actions = null;
  178. Dictionary<int, int> gotos = null;
  179. int defaultAction = 0;
  180. if (actionCount >= 0) {
  181. int gotoCount = data[offset++];
  182. Debug.Assert(gotoCount >= 0);
  183. if (actionCount > 0) {
  184. actions = new Dictionary<int, int>(actionCount);
  185. for (int j = 0; j < actionCount; j++) {
  186. actions.Add(data[offset++], data[offset++]);
  187. }
  188. } else {
  189. defaultAction = data[offset++];
  190. }
  191. if (gotoCount > 0) {
  192. gotos = new Dictionary<int, int>(gotoCount);
  193. for (int j = 0; j < gotoCount; j++) {
  194. Debug.Assert(data[offset] < 0);
  195. gotos.Add(-data[offset++], data[offset++]);
  196. }
  197. }
  198. } else {
  199. defaultAction = actionCount;
  200. }
  201. states[i] = new State(actions, gotos, defaultAction);
  202. #if DEBUG
  203. states[i].Id = i;
  204. #endif
  205. }
  206. return states;
  207. }
  208. private bool Parse() {
  209. _nextToken = 0;
  210. _currentState = _states[0];
  211. _lastTokenSpan = GetTokenSpan();
  212. _stack.Push(_currentState, yyval, yyloc);
  213. while (true) {
  214. LogStateEntered();
  215. int action = _currentState.DefaultAction;
  216. if (_currentState.Actions != null) {
  217. if (_nextToken == 0) {
  218. // We save the last token span, so that the location span
  219. // of production right hand sides that begin or end with a
  220. // nullable production will be correct.
  221. _lastTokenSpan = GetTokenSpan();
  222. _nextToken = GetNextToken();
  223. }
  224. LogNextToken(_nextToken);
  225. _currentState.Actions.TryGetValue(_nextToken, out action);
  226. }
  227. if (action > 0) {
  228. LogBeforeShift(action, _nextToken, false);
  229. Shift(action);
  230. } else if (action < 0) {
  231. Reduce(-action - 1);
  232. // accept
  233. if (action == -1) {
  234. return true;
  235. }
  236. } else if (action == 0) {
  237. // error
  238. if (!ErrorRecovery()) {
  239. return false;
  240. }
  241. }
  242. }
  243. }
  244. private void Shift(int stateId) {
  245. _currentState = _states[stateId];
  246. _stack.Push(_currentState, GetTokenValue(), GetTokenSpan());
  247. if (_recovering) {
  248. if (_nextToken != _errorToken) {
  249. _tokensSinceLastError++;
  250. }
  251. if (_tokensSinceLastError > 5) {
  252. _recovering = false;
  253. }
  254. }
  255. if (_nextToken != _eofToken) {
  256. _nextToken = 0;
  257. }
  258. }
  259. private void Reduce(int ruleId) {
  260. int ruleDef = _rules[ruleId];
  261. int rhsLength = GetRuleRhsLength(ruleDef);
  262. LogBeforeReduction(ruleId, rhsLength);
  263. if (rhsLength == 0) {
  264. // The location span for an empty production will start with the
  265. // beginning of the next lexeme, and end with the finish of the
  266. // previous lexeme. This gives the correct behaviour when this
  267. // nonsense value is used in later Merge operations.
  268. yyloc = MergeLocations(_lastTokenSpan, GetTokenSpan());
  269. } else if (rhsLength == 1) {
  270. yyloc = _stack.PeekLocation(1);
  271. } else {
  272. TLocation at1 = GetLocation(rhsLength);
  273. TLocation atN = GetLocation(1);
  274. yyloc = MergeLocations(at1, atN);
  275. }
  276. DoAction(ruleId);
  277. _stack.Pop(rhsLength);
  278. var currentState = _stack.PeekState(1);
  279. int gotoState;
  280. if (currentState.GotoStates.TryGetValue(GetRuleLhsNonterminal(ruleDef), out gotoState)) {
  281. LogBeforeGoto(gotoState, ruleId);
  282. currentState = _states[gotoState];
  283. }
  284. _stack.Push(currentState, yyval, yyloc);
  285. _currentState = currentState;
  286. }
  287. // Default semantic action used when no action is specified in the rule.
  288. private void DoDefaultAction() {
  289. yyval = _stack.PeekValue(1);
  290. }
  291. public bool ErrorRecovery() {
  292. bool discard;
  293. if (!_recovering) { // if not recovering from previous error
  294. ReportSyntaxError(GetSyntaxErrorMessage());
  295. }
  296. if (!FindErrorRecoveryState())
  297. return false;
  298. //
  299. // The interim fix for the "looping in error recovery"
  300. // artifact involved moving the setting of the recovering
  301. // bool until after invalid tokens have been discarded.
  302. //
  303. ShiftErrorToken();
  304. discard = DiscardInvalidTokens();
  305. _recovering = true;
  306. _tokensSinceLastError = 0;
  307. return discard;
  308. }
  309. private string GetSyntaxErrorMessage() {
  310. StringBuilder errorMsg = new StringBuilder();
  311. errorMsg.AppendFormat("syntax error, unexpected {0}", Tokenizer.GetTokenDescription((Tokens)_nextToken)); // TODO: actual value?
  312. if (_currentState.Actions.Count < 7) {
  313. int i = 0;
  314. int last = _currentState.Actions.Keys.Count - 1;
  315. foreach (int terminal in _currentState.Actions.Keys) {
  316. if (i == 0) {
  317. errorMsg.Append(", expecting ");
  318. } else if (i == last) {
  319. errorMsg.Append(", or ");
  320. } else {
  321. errorMsg.Append(", ");
  322. }
  323. errorMsg.Append(Tokenizer.GetTokenDescription((Tokens)terminal));
  324. i++;
  325. }
  326. }
  327. return errorMsg.ToString();
  328. }
  329. public void ShiftErrorToken() {
  330. int oldNext = _nextToken;
  331. _nextToken = _errorToken;
  332. int state = _currentState.Actions[_nextToken];
  333. LogBeforeShift(state, _nextToken, true);
  334. Shift(state);
  335. _nextToken = oldNext;
  336. }
  337. public bool FindErrorRecoveryState() {
  338. // pop states until one found that accepts error token
  339. while (true) {
  340. // shift
  341. int action;
  342. if (_currentState.Actions != null && _currentState.Actions.TryGetValue(_errorToken, out action) && action > 0) {
  343. return true;
  344. }
  345. // LogState("Error, popping state", _stateStack.Peek(1));
  346. _stack.Pop();
  347. if (_stack.IsEmpty) {
  348. // Log("Aborting: didn't find a state that accepts error token");
  349. return false;
  350. } else {
  351. _currentState = _stack.PeekState(1);
  352. }
  353. }
  354. }
  355. public bool DiscardInvalidTokens() {
  356. int action = _currentState.DefaultAction;
  357. if (_currentState.Actions != null) {
  358. // Discard tokens until find one that works ...
  359. while (true) {
  360. if (_nextToken == 0) {
  361. _nextToken = GetNextToken();
  362. }
  363. LogNextToken(_nextToken);
  364. if (_nextToken == _eofToken)
  365. return false;
  366. _currentState.Actions.TryGetValue(_nextToken, out action);
  367. if (action != 0) {
  368. return true;
  369. }
  370. // LogToken("Error, discarding token", _nextToken);
  371. _nextToken = 0;
  372. }
  373. } else if (_recovering && _tokensSinceLastError == 0) {
  374. //
  375. // Boolean recovering is not set until after the first
  376. // error token has been shifted. Thus if we get back
  377. // here with recovering set and no tokens read we are
  378. // looping on the same error recovery action. This
  379. // happens if current_state.parser_table is null because
  380. // the state has an LR(0) reduction, but not all
  381. // lookahead tokens are valid. This only occurs for
  382. // error productions that *end* on "error".
  383. //
  384. // This action discards tokens one at a time until
  385. // the looping stops. Another attack would be to always
  386. // use the LALR(1) table if a production ends on "error"
  387. //
  388. // LogToken("Error, panic discard of {0}", _nextToken);
  389. _nextToken = 0;
  390. return true;
  391. } else {
  392. return true;
  393. }
  394. }
  395. private TValue GetValue(int depth) {
  396. return _stack.PeekValue(depth);
  397. }
  398. private TLocation GetLocation(int depth) {
  399. return _stack.PeekLocation(depth);
  400. }
  401. private void ClearInput() {
  402. // experimental in this version.
  403. _nextToken = 0;
  404. }
  405. private void StopErrorRecovery() {
  406. _recovering = false;
  407. }
  408. #region Debug Logging
  409. #if DEBUG
  410. private IParserLogger _logger;
  411. #endif
  412. [Conditional("DEBUG")]
  413. internal void EnableLogging(IParserLogger/*!*/ logger) {
  414. #if DEBUG
  415. Assert.NotNull(logger);
  416. _logger = logger;
  417. #endif
  418. }
  419. [Conditional("DEBUG")]
  420. internal void DisableLogging() {
  421. #if DEBUG
  422. _logger = null;
  423. #endif
  424. }
  425. [Conditional("DEBUG")]
  426. private void LogStateEntered() {
  427. #if DEBUG
  428. if (_logger != null) _logger.StateEntered();
  429. #endif
  430. }
  431. [Conditional("DEBUG")]
  432. private void LogNextToken(int tokenId) {
  433. #if DEBUG
  434. if (_logger != null) _logger.NextToken(tokenId);
  435. #endif
  436. }
  437. [Conditional("DEBUG")]
  438. private void LogBeforeReduction(int ruleId, int rhsLength) {
  439. #if DEBUG
  440. if (_logger != null) _logger.BeforeReduction(ruleId, rhsLength);
  441. #endif
  442. }
  443. [Conditional("DEBUG")]
  444. private void LogBeforeShift(int stateId, int tokenId, bool isErrorShift) {
  445. #if DEBUG
  446. if (_logger != null) _logger.BeforeShift(stateId, tokenId, isErrorShift);
  447. #endif
  448. }
  449. [Conditional("DEBUG")]
  450. private void LogBeforeGoto(int stateId, int ruleId) {
  451. #if DEBUG
  452. if (_logger != null) _logger.BeforeGoto(stateId, ruleId);
  453. #endif
  454. }
  455. #endregion
  456. #region Parser Reflection
  457. #if DEBUG
  458. private static void InitializeRulesMetadata(ParserTables/*!*/ tables) {
  459. ushort[] indexes = new ushort[tables.Rules.Length];
  460. ushort index = 0;
  461. for (int i = 0; i < indexes.Length; i++) {
  462. indexes[i] = index;
  463. index += (ushort)(tables.Rules[i] & 0xffff);
  464. }
  465. tables.RuleRhsSymbolIndexes = indexes;
  466. }
  467. // SHIFT > 0
  468. // ERROR == 0
  469. // REDUCE < 0
  470. // ACCEPT == -1
  471. internal string ActionToString(int action) {
  472. if (action > 0) return "S(" + action + ")";
  473. if (action == 0) return "";
  474. if (action == -1) return "ACCEPT";
  475. return "R(" + (-action) + ")";
  476. }
  477. internal string GetNonTerminalName(int nonTerminal) {
  478. Debug.Assert(nonTerminal > 0);
  479. return _tables.NonTerminalNames[nonTerminal];
  480. }
  481. // < 0 -> non-terminal
  482. // > 0 -> terminal
  483. internal string GetSymbolName(int symbol) {
  484. return (symbol < 0) ? GetNonTerminalName(-symbol) : Parser.GetTerminalName(symbol);
  485. }
  486. internal string RuleToString(int ruleIndex) {
  487. Debug.Assert(ruleIndex >= 0);
  488. StringBuilder sb = new StringBuilder();
  489. sb.Append(GetNonTerminalName(GetRuleLhsNonterminal(_tables.Rules[ruleIndex])));
  490. sb.Append(" -> ");
  491. // index of the first RHS symbol:
  492. int rhsLength = GetRuleRhsLength(_tables.Rules[ruleIndex]);
  493. if (rhsLength > 0) {
  494. int first = _tables.RuleRhsSymbolIndexes[ruleIndex];
  495. for (int i = 0; i < rhsLength; i++) {
  496. sb.Append(GetSymbolName(_tables.RuleRhsSymbols[first + i]));
  497. sb.Append(" ");
  498. }
  499. } else {
  500. sb.Append("<empty>");
  501. }
  502. return sb.ToString();
  503. }
  504. #endif
  505. [Conditional("DEBUG")]
  506. public void DumpTables(TextWriter/*!*/ output) {
  507. #if DEBUG
  508. Dictionary<int, bool> terminals = new Dictionary<int, bool>();
  509. Dictionary<int, bool> nonterminals = new Dictionary<int, bool>();
  510. int termCount = -1;
  511. int ntermCount = -1;
  512. for (int q = 0; q < _states.Length; q++) {
  513. State s = _states[q];
  514. if (s.Actions != null) {
  515. foreach (int t in s.Actions.Keys) {
  516. if (t > termCount) {
  517. termCount = t;
  518. }
  519. terminals[t] = true;
  520. }
  521. }
  522. if (s.GotoStates != null) {
  523. foreach (int t in s.GotoStates.Keys) {
  524. if (t > ntermCount) {
  525. ntermCount = t;
  526. }
  527. nonterminals[t] = true;
  528. }
  529. }
  530. }
  531. output.WriteLine("States x (Terms + NonTerms) = {0} x ({1} + {2})", _states.Length, termCount, ntermCount);
  532. output.Write("State,");
  533. output.Write("Default,");
  534. for (int t = 0; t < termCount; t++) {
  535. if (terminals.ContainsKey(t)) {
  536. output.Write(Parser.GetTerminalName(t));
  537. output.Write(",");
  538. }
  539. }
  540. for (int t = 0; t < ntermCount; t++) {
  541. if (nonterminals.ContainsKey(t)) {
  542. output.Write(t); // TODO
  543. output.Write(",");
  544. }
  545. }
  546. for (int q = 0; q < _states.Length; q++) {
  547. State s = _states[q];
  548. output.Write(q);
  549. output.Write(",");
  550. if (s.Actions == null) {
  551. output.Write(ActionToString(s.DefaultAction));
  552. }
  553. output.Write(",");
  554. for (int t = 0; t < termCount; t++) {
  555. if (terminals.ContainsKey(t)) {
  556. int action;
  557. if (s.Actions != null) {
  558. s.Actions.TryGetValue(t, out action);
  559. output.Write(ActionToString(action));
  560. }
  561. output.Write(",");
  562. }
  563. }
  564. for (int t = 0; t < ntermCount; t++) {
  565. if (nonterminals.ContainsKey(t)) {
  566. if (s.GotoStates != null) {
  567. int state;
  568. if (s.GotoStates.TryGetValue(t, out state)) {
  569. output.Write(state);
  570. }
  571. }
  572. output.Write(",");
  573. }
  574. }
  575. output.WriteLine();
  576. }
  577. #endif
  578. }
  579. #endregion
  580. }
  581. #endregion
  582. }