PageRenderTime 51ms CodeModel.GetById 15ms app.highlight 30ms RepoModel.GetById 1ms app.codeStats 0ms

/IronPython_Main/Languages/Ruby/Ruby/Compiler/Parser/GPPG.cs

#
C# | 718 lines | 481 code | 136 blank | 101 comment | 113 complexity | 72ef55e42add05130ddc06a2c1633b2f MD5 | raw file
  1/* ****************************************************************************
  2 *
  3 * Copyright (c) Microsoft Corporation. 
  4 *
  5 * This source code is subject to terms and conditions of the Apache License, Version 2.0. A 
  6 * copy of the license can be found in the License.html file at the root of this distribution. If 
  7 * you cannot locate the  Apache License, Version 2.0, please send an email to 
  8 * ironruby@microsoft.com. By using this source code in any fashion, you are agreeing to be bound 
  9 * by the terms of the Apache License, Version 2.0.
 10 *
 11 * You must not remove this notice, or any other, from this software.
 12 *
 13 *
 14 * ***************************************************************************/
 15
 16using System;
 17using System.Collections.Generic;
 18using System.Diagnostics;
 19using System.IO;
 20using Microsoft.Scripting.Utils;
 21using System.Text;
 22using System.Threading;
 23
 24using TValue = IronRuby.Compiler.TokenValue;
 25using TLocation = Microsoft.Scripting.SourceSpan;
 26
 27namespace IronRuby.Compiler {
 28
 29    #region State
 30
 31    internal sealed class State {
 32#if DEBUG
 33        private int _id;
 34        public int Id { get { return _id; } set { _id = value; } }
 35#endif
 36
 37        // State x Terminal -> ERROR + SHIFT(State) + REDUCE(State) + ACCEPT
 38        //
 39        // SHIFT > 0
 40        // ERROR == 0
 41        // REDUCE < 0
 42        // ACCEPT == -1
 43        private readonly Dictionary<int, int> _actions;
 44
 45        // State x NonTerminal -> State
 46        private readonly Dictionary<int, int> _gotos;   
 47
 48        // ParseAction - default action if terminal not in _actions dict
 49        private readonly int _defaultAction;		  
 50
 51        public int DefaultAction {
 52            get { return _defaultAction; }
 53        }
 54
 55        public Dictionary<int, int> GotoStates {
 56            get { return _gotos; }
 57        }
 58
 59        public Dictionary<int, int> Actions {
 60            get { return _actions; }
 61        }
 62
 63        public State(Dictionary<int, int> actions, Dictionary<int, int> gotos, int defaultAction) {
 64            _actions = actions;
 65            _gotos = gotos;
 66            _defaultAction = defaultAction;
 67        }
 68
 69#if DEBUG
 70        public override string/*!*/ ToString() {
 71            return _id.ToString();
 72        }
 73#endif
 74    }
 75
 76    #endregion
 77
 78    #region ParserTables
 79
 80    internal sealed class ParserTables {
 81        public State[] States;
 82        // upper word: LhsNonTerminal
 83        // lower word: RhsLength
 84        public int[] Rules;
 85        public int ErrorToken;
 86        public int EofToken;
 87
 88#if DEBUG // Metadata
 89        internal string[] NonTerminalNames;
 90
 91        // concatenated symbols of rule RHSs; 
 92        // symbol < 0 represents a non-terminal
 93        // symbol >= 0 represents a terminal
 94        internal short[] RuleRhsSymbols;
 95
 96        // rule index -> index in RuleRhsSymbols array (calculated):
 97        internal ushort[] RuleRhsSymbolIndexes;
 98#endif
 99    }
100
101    #endregion
102
103    #region IParserLogger
104
105    internal interface IParserLogger {
106        void BeforeReduction(int ruleId, int rhsLength);
107        void BeforeShift(int stateId, int tokenId, bool isErrorShift);
108        void BeforeGoto(int stateId, int ruleId);
109        void StateEntered();
110        void NextToken(int tokenId);
111    }
112
113    #endregion
114
115    #region ShiftReduceParser
116
117    public partial class Parser {
118        private static ParserTables _tables;
119        private static readonly object _tablesLock = new object();
120
121        private TValue yyval;
122        private TLocation yyloc;
123
124        // Experimental : last yylloc prior to call of yylex()
125        private TLocation _lastTokenSpan;
126
127        private int _nextToken;
128        private State _currentState;
129
130        private bool _recovering;
131        private int _tokensSinceLastError;
132
133        private ParserStack<State, TValue, TLocation>/*!*/ _stack;
134        private int _errorToken;
135        private int _eofToken;
136
137        private State[] _states;
138        private int[] _rules;
139
140#if DEBUG
141        // test hooks:
142        internal State CurrentState { get { return _currentState; } }
143        internal ParserStack<State, TValue, TLocation>/*!*/ Stack { get { return _stack; } }
144        internal State[] States { get { return _states; } }
145        internal int[] Rules { get { return _rules; } }
146        internal ParserTables Tables { get { return _tables; } } 
147#endif
148        // methods that must be implemented by the parser
149        //private void InitializeGenerated(ParserTables/*!*/ tables);
150        //private TLocation MergeLocations(TLocation start, TLocation end);
151        //private TValue GetTokenValue();     // lexical value: set by scanner
152        //private TLocation GetTokenSpan();   // location value: set by scanner
153        //private int GetNextToken();
154        //private void ReportSyntaxError(string message);
155
156        internal static int GetRuleRhsLength(int ruleDef) {
157            return ruleDef & 0xffff;
158        }
159
160        internal static int GetRuleLhsNonterminal(int ruleDef) {
161            return ruleDef >> 16;
162        }
163
164        private void InitializeTables() {
165            _stack = new ParserStack<State, TValue, TLocation>();
166
167            if (_tables == null) {
168                lock (_tablesLock) {
169                    if (_tables == null) {
170                        Debug.Assert(typeof(TLocation).IsValueType);
171
172                        ParserTables tables = new ParserTables();
173                        InitializeGeneratedTables(tables);
174#if DEBUG
175                        InitializeMetadata(tables);
176                        InitializeRulesMetadata(tables);
177#endif
178                        Thread.MemoryBarrier();
179                        _tables = tables;
180                    }
181                }
182            }
183
184            _states = _tables.States;
185            _rules = _tables.Rules;
186            _errorToken = _tables.ErrorToken;
187            _eofToken = _tables.EofToken;
188        }
189
190        // TODO: possible optimization: build a single dictionary mapping all goto and actions for all states.
191        // This (custom) dict might be precomputed by generator and allocated in a single array.
192        // This would safe rellocation of ~650kB of Dictionary.Entry[] since the array would be considered a large object.
193        private State[]/*!*/ BuildStates(short[]/*!*/ data) {
194            Debug.Assert(data != null && data.Length > 0);
195
196            // 
197            // serialized structure:
198            //
199            // length, 
200            // (
201            //   (action_count: positive short, goto_count: positive short) | (action_count: negative short), 
202            //   (key: short, value: short){action_count} | (defaultAction: short), 
203            //   (key: short, value: short){goto_count} 
204            // ){length}
205            //
206            // where action_count is 
207            //   > 0  ... a number of items in actions hashtable 
208            //   == 0 ... there is no action hashtable, but there is a single integer default action id
209            //   < 0  ... there is no action hashtable and no goto table, the value is default action id
210            // goto_count is a number of items in gotos hashtable,
211            //   zero means there is no goto hashtable
212            //
213
214            int offset = 0;
215            State[] states = new State[data[offset++]];
216
217            for (int i = 0; i < states.Length; i++) {
218                int actionCount = data[offset++];
219
220                Dictionary<int, int> actions = null;
221                Dictionary<int, int> gotos = null;
222                int defaultAction = 0;
223
224                if (actionCount >= 0) {
225                    int gotoCount = data[offset++];
226                    Debug.Assert(gotoCount >= 0);
227
228                    if (actionCount > 0) {
229                        actions = new Dictionary<int, int>(actionCount);
230                        for (int j = 0; j < actionCount; j++) {
231                            actions.Add(data[offset++], data[offset++]);
232                        }
233                    } else {
234                        defaultAction = data[offset++];
235                    }
236
237                    if (gotoCount > 0) {
238                        gotos = new Dictionary<int, int>(gotoCount);
239                        for (int j = 0; j < gotoCount; j++) {
240                            Debug.Assert(data[offset] < 0);
241                            gotos.Add(-data[offset++], data[offset++]);
242                        }
243                    }
244                } else {
245                    defaultAction = actionCount;
246                }
247
248                states[i] = new State(actions, gotos, defaultAction);
249#if DEBUG
250                states[i].Id = i;
251#endif
252            }
253
254            return states;
255        }
256
257        private bool Parse() {
258
259            _nextToken = 0;
260            _currentState = _states[0];
261            _lastTokenSpan = GetTokenSpan();
262
263            _stack.Push(_currentState, yyval, yyloc);
264
265            while (true) {
266
267                LogStateEntered();
268                
269                int action = _currentState.DefaultAction;
270
271                if (_currentState.Actions != null) {
272                    if (_nextToken == 0) {
273
274                        // We save the last token span, so that the location span
275                        // of production right hand sides that begin or end with a
276                        // nullable production will be correct.
277                        _lastTokenSpan = GetTokenSpan();
278                        _nextToken = GetNextToken();
279                    }
280
281                    LogNextToken(_nextToken);
282
283                    _currentState.Actions.TryGetValue(_nextToken, out action);
284                }
285
286                if (action > 0) {
287                    LogBeforeShift(action, _nextToken, false);
288                    Shift(action);
289                } else if (action < 0) {
290                    Reduce(-action - 1);
291
292                    // accept
293                    if (action == -1) {
294                        return true;
295                    }
296                } else if (action == 0) {
297                    // error
298                    if (!ErrorRecovery()) {
299                        return false;
300                    }
301                }
302            }
303        }
304
305        private void Shift(int stateId) {
306            _currentState = _states[stateId];
307
308            _stack.Push(_currentState, GetTokenValue(), GetTokenSpan());
309
310            if (_recovering) {
311                if (_nextToken != _errorToken) {
312                    _tokensSinceLastError++;
313                }
314
315                if (_tokensSinceLastError > 5) {
316                    _recovering = false;
317                }
318            }
319
320            if (_nextToken != _eofToken) {
321                _nextToken = 0;
322            }
323        }
324
325        private void Reduce(int ruleId) {
326            int ruleDef = _rules[ruleId];
327            int rhsLength = GetRuleRhsLength(ruleDef);
328
329            LogBeforeReduction(ruleId, rhsLength);
330
331            if (rhsLength == 0) {
332                // The location span for an empty production will start with the
333                // beginning of the next lexeme, and end with the finish of the
334                // previous lexeme.  This gives the correct behaviour when this
335                // nonsense value is used in later Merge operations.
336                yyloc = MergeLocations(_lastTokenSpan, GetTokenSpan());
337            } else if (rhsLength == 1) {
338                yyloc = _stack.PeekLocation(1);
339            } else {
340                TLocation at1 = GetLocation(rhsLength);
341                TLocation atN = GetLocation(1);
342                yyloc = MergeLocations(at1, atN);
343            }
344
345            DoAction(ruleId);
346
347            _stack.Pop(rhsLength);
348
349            var currentState = _stack.PeekState(1);
350
351            int gotoState;
352            if (currentState.GotoStates.TryGetValue(GetRuleLhsNonterminal(ruleDef), out gotoState)) {
353                LogBeforeGoto(gotoState, ruleId);
354                currentState = _states[gotoState];
355            }
356
357            _stack.Push(currentState, yyval, yyloc);
358
359            _currentState = currentState;
360        }
361
362        // Default semantic action used when no action is specified in the rule.
363        private void DoDefaultAction() {
364            yyval = _stack.PeekValue(1);
365        }
366
367        public bool ErrorRecovery() {
368            bool discard;
369
370            if (!_recovering) { // if not recovering from previous error
371                ReportSyntaxError(GetSyntaxErrorMessage());
372            }
373
374            if (!FindErrorRecoveryState())
375                return false;
376
377            //
378            //  The interim fix for the "looping in error recovery"
379            //  artifact involved moving the setting of the recovering 
380            //  bool until after invalid tokens have been discarded.
381            //
382            ShiftErrorToken();
383            discard = DiscardInvalidTokens();
384            _recovering = true;
385            _tokensSinceLastError = 0;
386            return discard;
387        }
388
389        private string GetSyntaxErrorMessage() {
390            StringBuilder errorMsg = new StringBuilder();
391            errorMsg.AppendFormat("syntax error, unexpected {0}", Tokenizer.GetTokenDescription((Tokens)_nextToken)); // TODO: actual value?
392
393            if (_currentState.Actions.Count < 7) {
394                int i = 0;
395                int last = _currentState.Actions.Keys.Count - 1;
396                foreach (int terminal in _currentState.Actions.Keys) {
397                    if (i == 0) {
398                        errorMsg.Append(", expecting ");
399                    } else if (i == last) {
400                        errorMsg.Append(", or ");
401                    } else {
402                        errorMsg.Append(", ");
403                    }
404
405                    errorMsg.Append(Tokenizer.GetTokenDescription((Tokens)terminal));
406                    i++;
407                }
408            }
409            return errorMsg.ToString();
410        }
411
412        public void ShiftErrorToken() {
413            int oldNext = _nextToken;
414            _nextToken = _errorToken;
415
416            int state = _currentState.Actions[_nextToken];
417            LogBeforeShift(state, _nextToken, true);
418            Shift(state);
419
420            _nextToken = oldNext;
421        }
422
423
424        public bool FindErrorRecoveryState() {
425            // pop states until one found that accepts error token
426            while (true) {
427
428                // shift
429                int action;
430                if (_currentState.Actions != null && _currentState.Actions.TryGetValue(_errorToken, out action) && action > 0) {
431                    return true;
432                }
433
434                // LogState("Error, popping state", _stateStack.Peek(1));
435
436                _stack.Pop();
437
438                if (_stack.IsEmpty) {
439                    // Log("Aborting: didn't find a state that accepts error token");
440                    return false;
441                } else {
442                    _currentState = _stack.PeekState(1);
443                }
444            }
445        }
446
447        public bool DiscardInvalidTokens() {
448
449            int action = _currentState.DefaultAction;
450
451            if (_currentState.Actions != null) {
452                
453                // Discard tokens until find one that works ...
454                while (true) {
455                    if (_nextToken == 0) {
456                        _nextToken = GetNextToken();
457                    }
458
459                    LogNextToken(_nextToken);
460
461                    if (_nextToken == _eofToken)
462                        return false;
463
464                    _currentState.Actions.TryGetValue(_nextToken, out action);
465
466                    if (action != 0) {
467                        return true;
468                    }
469
470                    // LogToken("Error, discarding token", _nextToken);
471                    _nextToken = 0;
472                }
473
474            } else if (_recovering && _tokensSinceLastError == 0) {
475                // 
476                //  Boolean recovering is not set until after the first
477                //  error token has been shifted.  Thus if we get back 
478                //  here with recovering set and no tokens read we are
479                //  looping on the same error recovery action.  This 
480                //  happens if current_state.parser_table is null because
481                //  the state has an LR(0) reduction, but not all
482                //  lookahead tokens are valid.  This only occurs for
483                //  error productions that *end* on "error".
484                //
485                //  This action discards tokens one at a time until
486                //  the looping stops.  Another attack would be to always
487                //  use the LALR(1) table if a production ends on "error"
488                //
489                // LogToken("Error, panic discard of {0}", _nextToken);
490                _nextToken = 0;
491                return true;
492            } else {
493                return true;
494            }
495        }
496
497        private TValue GetValue(int depth) {
498            return _stack.PeekValue(depth);
499        }
500
501        private TLocation GetLocation(int depth) {
502            return _stack.PeekLocation(depth);
503        }
504
505        private void ClearInput() {
506            // experimental in this version.
507            _nextToken = 0;
508        }
509
510        private void StopErrorRecovery() {
511            _recovering = false;
512        }
513
514        #region Debug Logging
515
516#if DEBUG
517        private IParserLogger _logger;
518#endif
519
520        [Conditional("DEBUG")]
521        internal void EnableLogging(IParserLogger/*!*/ logger) {
522#if DEBUG
523            Assert.NotNull(logger);
524            _logger = logger;
525#endif
526        }
527
528        [Conditional("DEBUG")]
529        internal void DisableLogging() {
530#if DEBUG
531            _logger = null;
532#endif
533        }
534
535        [Conditional("DEBUG")]
536        private void LogStateEntered() {
537#if DEBUG
538            if (_logger != null) _logger.StateEntered();
539#endif
540        }
541
542        [Conditional("DEBUG")]
543        private void LogNextToken(int tokenId) {
544#if DEBUG
545            if (_logger != null) _logger.NextToken(tokenId);
546#endif
547        }
548
549        [Conditional("DEBUG")]
550        private void LogBeforeReduction(int ruleId, int rhsLength) {
551#if DEBUG
552            if (_logger != null) _logger.BeforeReduction(ruleId, rhsLength);
553#endif
554        }
555
556        [Conditional("DEBUG")]
557        private void LogBeforeShift(int stateId, int tokenId, bool isErrorShift) {
558#if DEBUG
559            if (_logger != null) _logger.BeforeShift(stateId, tokenId, isErrorShift);
560#endif
561        }
562
563        [Conditional("DEBUG")]
564        private void LogBeforeGoto(int stateId, int ruleId) {
565#if DEBUG
566            if (_logger != null) _logger.BeforeGoto(stateId, ruleId);
567#endif
568        }
569
570        #endregion
571
572        #region Parser Reflection
573        
574#if DEBUG
575        
576        private static void InitializeRulesMetadata(ParserTables/*!*/ tables) {
577            ushort[] indexes = new ushort[tables.Rules.Length];
578            ushort index = 0;
579            for (int i = 0; i < indexes.Length; i++) {
580                indexes[i] = index;
581                index += (ushort)(tables.Rules[i] & 0xffff);
582            }
583            tables.RuleRhsSymbolIndexes = indexes;
584        }
585        
586        // SHIFT > 0
587        // ERROR == 0
588        // REDUCE < 0
589        // ACCEPT == -1
590        internal string ActionToString(int action) {
591            if (action > 0) return "S(" + action + ")";
592            if (action == 0) return "";
593            if (action == -1) return "ACCEPT";
594            return "R(" + (-action) + ")"; 
595        }
596
597        internal string GetNonTerminalName(int nonTerminal) {
598            Debug.Assert(nonTerminal > 0);
599            return _tables.NonTerminalNames[nonTerminal];
600        }
601
602        // < 0 -> non-terminal
603        // > 0 -> terminal
604        internal string GetSymbolName(int symbol) {
605            return (symbol < 0) ? GetNonTerminalName(-symbol) : Parser.GetTerminalName(symbol);
606        }
607
608        internal string RuleToString(int ruleIndex) {
609            Debug.Assert(ruleIndex >= 0);
610            StringBuilder sb = new StringBuilder();
611            sb.Append(GetNonTerminalName(GetRuleLhsNonterminal(_tables.Rules[ruleIndex])));
612            sb.Append(" -> ");
613
614            // index of the first RHS symbol:
615            int rhsLength = GetRuleRhsLength(_tables.Rules[ruleIndex]);
616            if (rhsLength > 0) {
617                int first = _tables.RuleRhsSymbolIndexes[ruleIndex];
618                for (int i = 0; i < rhsLength; i++) {
619                    sb.Append(GetSymbolName(_tables.RuleRhsSymbols[first + i]));
620                    sb.Append(" ");
621                }
622            } else {
623                sb.Append("<empty>");
624            }
625
626            return sb.ToString();
627        }
628#endif
629
630        [Conditional("DEBUG")]
631        public void DumpTables(TextWriter/*!*/ output) {
632#if DEBUG
633            Dictionary<int, bool> terminals = new Dictionary<int, bool>();
634            Dictionary<int, bool> nonterminals = new Dictionary<int, bool>();
635
636            int termCount = -1;
637            int ntermCount = -1;
638            for (int q = 0; q < _states.Length; q++) {
639                State s = _states[q];
640                if (s.Actions != null) {
641                    foreach (int t in s.Actions.Keys) {
642                        if (t > termCount) {
643                            termCount = t;
644                        }
645
646                        terminals[t] = true;
647                    }
648                }
649
650                if (s.GotoStates != null) {
651                    foreach (int t in s.GotoStates.Keys) {
652                        if (t > ntermCount) {
653                            ntermCount = t;
654                        }
655                        nonterminals[t] = true;
656                    }
657                }
658            }
659
660            output.WriteLine("States x (Terms + NonTerms) = {0} x ({1} + {2})", _states.Length, termCount, ntermCount);
661
662            output.Write("State,");
663            output.Write("Default,");
664            for (int t = 0; t < termCount; t++) {
665                if (terminals.ContainsKey(t)) {
666                    output.Write(Parser.GetTerminalName(t));
667                    output.Write(",");
668                }
669            }
670
671            for (int t = 0; t < ntermCount; t++) {
672                if (nonterminals.ContainsKey(t)) {
673                    output.Write(t); // TODO
674                    output.Write(",");
675                }
676            }
677
678            for (int q = 0; q < _states.Length; q++) {
679                State s = _states[q];
680                output.Write(q);
681                output.Write(",");
682                if (s.Actions == null) {
683                    output.Write(ActionToString(s.DefaultAction));
684                }
685                output.Write(",");
686
687                for (int t = 0; t < termCount; t++) {
688                    if (terminals.ContainsKey(t)) {
689                        int action;
690                        if (s.Actions != null) {
691                            s.Actions.TryGetValue(t, out action);
692                            output.Write(ActionToString(action));
693                        }
694                        output.Write(",");
695                    }
696                }
697
698                for (int t = 0; t < ntermCount; t++) {
699                    if (nonterminals.ContainsKey(t)) {
700                        if (s.GotoStates != null) {
701                            int state;
702                            if (s.GotoStates.TryGetValue(t, out state)) {
703                                output.Write(state);
704                            }
705                        }
706                        output.Write(",");
707                    }
708                }
709                output.WriteLine();
710            }
711#endif
712        }
713
714        #endregion
715    }
716
717    #endregion
718}