PageRenderTime 36ms CodeModel.GetById 19ms app.highlight 12ms RepoModel.GetById 2ms app.codeStats 0ms

/src/NUnit/UiException/CSharpParser/TokenClassifier.cs

#
C# | 433 lines | 316 code | 31 blank | 86 comment | 4 complexity | e3c1f10fe9495441401ec3cd6557eadf MD5 | raw file
  1// ****************************************************************
  2// This is free software licensed under the NUnit license. You may
  3// obtain a copy of the license at http://nunit.org
  4// ****************************************************************
  5
  6using System;
  7using System.Collections.Generic;
  8using System.Text;
  9
 10namespace NUnit.UiException.CodeFormatters
 11{
 12    /// <summary>
 13    /// Used at an internal stage to convert LexToken into ClassifiedToken. This class provides
 14    /// a very basic semantic analysis to make text following in one the categories below:
 15    ///     - regular code,
 16    ///     - developper comments,
 17    ///     - strings / character.
 18    /// The output of this class is used by CSharpCodeFormatter to achieve the basic syntax coloring.
 19    /// </summary>
 20    public class TokenClassifier
 21    {
 22        #region SMSTATE code
 23
 24        // the list below contains constant values defining states for the finite
 25        // smState machine that makes all the work of converting LexToken into ClassifiedToken.
 26        // for instance, Lexer can send inputs like:
 27        //
 28        //   [Text][Separator][CommentC_Open][Text][CommentC_Close]
 29        //
 30        // This LexToken sequence can for instance be converted that way by TokenClassifier.
 31        //
 32        //   - [Text][Separator]                     => [Code]
 33        //   - [CommentC_Open][Text][CommentC_Close] => [Comment]
 34        // 
 35
 36        /// <summary>
 37        /// State code for the smState machine.
 38        /// State when reaching a code block.
 39        /// </summary>
 40        public const int SMSTATE_CODE = 0;
 41
 42        /// <summary>
 43        /// State code for the smState machine.
 44        /// State when reaching a C comment block.
 45        /// </summary>
 46        public const int SMSTATE_CCOMMENT = 1;
 47
 48        /// <summary>
 49        /// State code for the smState machine.
 50        /// State when reaching a C++ comment block.
 51        /// </summary>
 52        public const int SMSTATE_CPPCOMMENT = 2;
 53
 54        /// <summary>
 55        /// State code for the smState machine.
 56        /// State when reaching a char surrounded by single quotes.
 57        /// </summary>
 58        public const int SMSTATE_CHAR = 3;
 59
 60        /// <summary>
 61        /// State code for the smState machine.
 62        /// State when reaching a string surrounded by double quotes.
 63        /// </summary>
 64        public const int SMSTATE_STRING = 4;
 65
 66        #endregion
 67
 68        /// <summary>
 69        /// A finite smState machine where states are: SMSTATE values and
 70        /// transitions are LexToken.
 71        /// </summary>
 72        private StateMachine _sm;
 73
 74        /// <summary>
 75        /// The current StateMachine's SMTATE code.
 76        /// </summary>
 77        private int _sm_output;
 78
 79        /// <summary>
 80        /// Makes a link between SMSTATE code and ClassificationTag.
 81        /// </summary>
 82        private Dictionary<int, ClassificationTag> _tags;
 83
 84        /// <summary>
 85        /// Contains the list of C# keywords.
 86        /// </summary>
 87        private Dictionary<string, bool> _keywords;
 88
 89        /// <summary>
 90        /// Indicate whether Lexer is in escaping mode.
 91        /// This flag is set to true when parsing "\\" and
 92        /// can influate on the following LexerTag value.
 93        /// </summary>
 94        private bool _escaping;
 95
 96        /// <summary>
 97        /// Build a new instance of TokenClassifier.
 98        /// </summary>
 99        public TokenClassifier()
100        {
101            string[] words;
102
103            _sm = new StateMachine();
104
105            _tags = new Dictionary<int, ClassificationTag>();
106            _tags.Add(SMSTATE_CODE, ClassificationTag.Code);
107            _tags.Add(SMSTATE_CCOMMENT, ClassificationTag.Comment);
108            _tags.Add(SMSTATE_CPPCOMMENT, ClassificationTag.Comment);
109            _tags.Add(SMSTATE_CHAR, ClassificationTag.String);
110            _tags.Add(SMSTATE_STRING, ClassificationTag.String);
111
112            // build the list of predefined keywords.
113            // this is from the official msdn site. Curiously, some keywords
114            // were ommited from the official documentation.
115            //   For instance "get", "set", "region" and "endregion" were
116            // not part of the official list. Maybe it's a mistake or a misunderstanding
117            // whatever... I want them paint in blue as well!
118
119            words = new string[] {
120                "abstract", "event", "new", "struct", "as", "explicit", "null", "switch",
121                "base", "extern", "object", "this", "bool", "false", "operator", "throw",
122                "break", "finally", "out", "true", "byte", "fixed", "override", "try", "case",
123                "float", "params", "typeof", "catch", "for", "private", "uint", "char",
124                "foreach", "protected", "ulong", "checked", "goto", "public", "unchecked",
125                "class", "if", "readonly", "unsafe", "const", "implicit", "ref", "ushort",
126                "continue", "in", "return", "using", "decimal", "int", "sbyte", "virtual",
127                "default", "interface", "sealed", "volatile", "delegate", "internal",
128                "short", "void", "do", "is", "sizeof", "while", "double", "lock", "stackalloc",
129                "else", "long", "static", "enum", "namespace", "string", "partial", "get", "set",
130                "region", "endregion",
131            };
132
133            _keywords = new Dictionary<string, bool>();
134            foreach (string key in words)
135                _keywords.Add(key, true);
136
137            Reset();
138
139            return;
140        }
141
142        /// <summary>
143        /// Tells whether TokenClassifier is currently in escaping mode. When true,
144        /// this flag causes TokenClassifier to override the final classification
145        /// of a basic entity (such as: ") to be treated as normal text instead of
146        /// being interpreted as a string delimiter.
147        /// </summary>
148        public bool Escaping
149        {
150            get { return (_escaping); }
151        }
152
153        /// <summary>
154        /// Reset the StateMachine to default value. (code block).
155        /// </summary>
156        public void Reset()
157        {
158            _sm_output = SMSTATE_CODE;
159            _escaping = false;
160
161            return;
162        }
163
164        /// <summary>
165        /// Classify the given LexToken into a ClassificationTag.
166        /// </summary>
167        /// <param name="token">The token to be classified.</param>
168        /// <returns>The smState value.</returns>
169        public ClassificationTag Classify(LexToken token)
170        {
171            int classTag;
172
173            UiExceptionHelper.CheckNotNull(token, "token");
174
175            classTag = AcceptLexToken(token);
176
177            if (classTag == SMSTATE_CODE &&
178                _keywords.ContainsKey(token.Text))
179                return (ClassificationTag.Keyword);
180
181            // Parsing a token whoose Text value is set to '\'
182            // causes the classifier to set/reset is escaping mode.
183
184            if (token.Text == "\\" &&
185                _sm_output == SMSTATE_STRING &&
186                !_escaping)
187                _escaping = true;
188            else
189                _escaping = false;
190
191            return (_tags[classTag]);
192        }
193
194        /// <summary>
195        /// Classify the given token and get its corresponding SMSTATE value.
196        /// </summary>
197        /// <param name="token">The LexToken to be classified.</param>
198        /// <returns>An SMSTATE value.</returns>
199        protected int AcceptLexToken(LexToken token)
200        {
201            int smState;
202
203            if (_escaping)
204                return (SMSTATE_STRING);
205
206            smState = GetTokenSMSTATE(_sm_output, token.Tag);
207            _sm_output = GetSMSTATE(_sm_output, token.Tag);
208
209            return (smState);
210        }
211
212        /// <summary>
213        /// Gets the SMSTATE under the "transition" going from "smState".
214        /// </summary>
215        /// <param name="smState">The current smState.</param>
216        /// <param name="transition">The current LexerTag.</param>
217        /// <returns>The new smState.</returns>
218        protected int GetSMSTATE(int smState, LexerTag transition)
219        {
220            return (_sm.GetSMSTATE(smState, transition));
221        }
222
223        /// <summary>
224        /// Gets a token SMSTATE under the "transition" going from "smState".
225        /// </summary>
226        /// <param name="smState">The current smState machine.</param>
227        /// <param name="transition">The LexerTag to be classified.</param>
228        /// <returns>The LexerTag's classification.</returns>
229        protected int GetTokenSMSTATE(int smState, LexerTag transition)
230        {
231            return (_sm.GetTokenSMSTATE(smState, transition));
232        }
233
234        #region StateMachine
235
236        /// <summary>
237        /// Defines a transition (of a state machine).
238        /// </summary>
239        class TransitionData
240        {
241            /// <summary>
242            /// The current transition.
243            /// </summary>
244            public LexerTag Transition;
245
246            /// <summary>
247            /// The SMSTATE code reached when following that transition.
248            /// </summary>
249            public int SMSTATE;
250
251            /// <summary>
252            /// The TokenSMSTATE reached when following that transition.
253            /// </summary>
254            public int TokenSMSTATE;
255
256            public TransitionData(LexerTag transition, int smState)
257            {
258                Transition = transition;
259
260                SMSTATE = smState;
261                TokenSMSTATE = smState;
262
263                return;
264            }
265
266            public TransitionData(LexerTag transition, int smState, int tokenSmState) :
267                this(transition, smState)
268            {
269                TokenSMSTATE = tokenSmState;
270            }
271        }
272
273        /// <summary>
274        /// Defines a state (of a state machine) and its associated transitions.
275        /// </summary>
276        class State
277        {
278            public int InitialState;
279            public TransitionData[] Transitions;
280
281            public State(int initialState, TransitionData[] transitions)
282            {
283                int i;
284                int j;
285
286                UiExceptionHelper.CheckNotNull(transitions, "transitions");
287                UiExceptionHelper.CheckTrue(
288                    transitions.Length == 8,
289                    "expecting transitions.Length to be 8",
290                    "transitions");
291
292                for (i = 0; i < transitions.Length; ++i)
293                    for (j = 0; j < transitions.Length; ++j)
294                    {
295                        if (j == i)
296                            continue;
297
298                        if (transitions[j].Transition == transitions[i].Transition)
299                            UiExceptionHelper.CheckTrue(false,
300                                String.Format("transition '{0}' already present", transitions[j].Transition),
301                                "transitions");
302                    }
303
304
305                InitialState = initialState;
306                Transitions = transitions;
307
308                return;
309            }
310
311            public TransitionData this[LexerTag transition]
312            {
313                get
314                {
315                    foreach (TransitionData couple in Transitions)
316                        if (couple.Transition == transition)
317                            return (couple);
318                    return (null);
319                }
320            }
321        }
322
323        /// <summary>
324        /// A finite state machine. Where states are SMSTATE codes and
325        /// transitions are LexTokens.
326        /// </summary>
327        class StateMachine
328        {
329            private State[] _states;
330
331            public StateMachine()
332            {
333                _states = new State[5];
334
335                // defines transitions from SMSTATE_CODE
336                _states[0] = new State(
337                    SMSTATE_CODE,
338                    new TransitionData[] {
339                        new TransitionData(LexerTag.EndOfLine, SMSTATE_CODE),
340                        new TransitionData(LexerTag.Separator, SMSTATE_CODE),
341                        new TransitionData(LexerTag.Text, SMSTATE_CODE),
342                        new TransitionData(LexerTag.CommentC_Open, SMSTATE_CCOMMENT),
343                        new TransitionData(LexerTag.CommentC_Close, SMSTATE_CODE, SMSTATE_CCOMMENT),
344                        new TransitionData(LexerTag.CommentCpp, SMSTATE_CPPCOMMENT),
345                        new TransitionData(LexerTag.SingleQuote, SMSTATE_CHAR),
346                        new TransitionData(LexerTag.DoubleQuote, SMSTATE_STRING),
347                    });
348
349                // defines transitions from SMSTATE_CCOMMENT
350                _states[1] = new State(
351                    SMSTATE_CCOMMENT,
352                    new TransitionData[] {
353                        new TransitionData(LexerTag.EndOfLine, SMSTATE_CCOMMENT),
354                        new TransitionData(LexerTag.Separator, SMSTATE_CCOMMENT),
355                        new TransitionData(LexerTag.Text, SMSTATE_CCOMMENT),
356                        new TransitionData(LexerTag.CommentC_Open, SMSTATE_CCOMMENT),
357                        new TransitionData(LexerTag.CommentC_Close, SMSTATE_CODE, SMSTATE_CCOMMENT),
358                        new TransitionData(LexerTag.CommentCpp, SMSTATE_CCOMMENT),
359                        new TransitionData(LexerTag.SingleQuote, SMSTATE_CCOMMENT),
360                        new TransitionData(LexerTag.DoubleQuote, SMSTATE_CCOMMENT),
361                    });
362
363                // defines transitions from SMSTATE_CPPCOMMENT
364                _states[2] = new State(
365                    SMSTATE_CPPCOMMENT,
366                    new TransitionData[] {
367                        new TransitionData(LexerTag.EndOfLine, SMSTATE_CODE),
368                        new TransitionData(LexerTag.Separator, SMSTATE_CPPCOMMENT),
369                        new TransitionData(LexerTag.Text, SMSTATE_CPPCOMMENT),
370                        new TransitionData(LexerTag.CommentC_Open, SMSTATE_CPPCOMMENT),
371                        new TransitionData(LexerTag.CommentC_Close, SMSTATE_CPPCOMMENT),
372                        new TransitionData(LexerTag.CommentCpp, SMSTATE_CPPCOMMENT),
373                        new TransitionData(LexerTag.SingleQuote, SMSTATE_CPPCOMMENT),
374                        new TransitionData(LexerTag.DoubleQuote, SMSTATE_CPPCOMMENT),
375                    });
376
377                // defines transition from SMSTATE_CHAR
378                _states[3] = new State(
379                    SMSTATE_CHAR,
380                    new TransitionData[] {
381                        new TransitionData(LexerTag.EndOfLine, SMSTATE_CHAR),
382                        new TransitionData(LexerTag.Separator, SMSTATE_CHAR),
383                        new TransitionData(LexerTag.Text, SMSTATE_CHAR),
384                        new TransitionData(LexerTag.CommentC_Open, SMSTATE_CHAR),
385                        new TransitionData(LexerTag.CommentC_Close, SMSTATE_CHAR),
386                        new TransitionData(LexerTag.CommentCpp, SMSTATE_CHAR),
387                        new TransitionData(LexerTag.SingleQuote, SMSTATE_CODE, SMSTATE_CHAR),
388                        new TransitionData(LexerTag.DoubleQuote, SMSTATE_CHAR),
389                    });
390
391                // defines transition from SMSTATE_STRING
392                _states[4] = new State(
393                    SMSTATE_STRING,
394                    new TransitionData[] {
395                        new TransitionData(LexerTag.EndOfLine, SMSTATE_STRING),
396                        new TransitionData(LexerTag.Separator, SMSTATE_STRING),
397                        new TransitionData(LexerTag.Text, SMSTATE_STRING),
398                        new TransitionData(LexerTag.CommentC_Open, SMSTATE_STRING),
399                        new TransitionData(LexerTag.CommentC_Close, SMSTATE_STRING),
400                        new TransitionData(LexerTag.CommentCpp, SMSTATE_STRING),
401                        new TransitionData(LexerTag.SingleQuote, SMSTATE_STRING),
402                        new TransitionData(LexerTag.DoubleQuote, SMSTATE_CODE, SMSTATE_STRING),
403                    });
404
405                return;
406            }
407
408            /// <summary>
409            /// Follow "transition" going from "smState" and returns reached SMSTATE.
410            /// </summary>
411            public int GetSMSTATE(int smState, LexerTag transition)
412            {
413                foreach (State st in _states)
414                    if (st.InitialState == smState)
415                        return (st[transition].SMSTATE);
416                return (SMSTATE_CODE);
417            }
418
419            /// <summary>
420            /// Follow "transition" going from "smState" and returns reached TokenSMSTATE.
421            /// </summary>
422            public int GetTokenSMSTATE(int smState, LexerTag transition)
423            {
424                foreach (State st in _states)
425                    if (st.InitialState == smState)
426                        return (st[transition].TokenSMSTATE);
427                return (SMSTATE_CODE);
428            }
429        }
430
431        #endregion
432    }
433}