PageRenderTime 26ms CodeModel.GetById 13ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/src/NUnit/UiException/CSharpParser/TokenDictionary.cs

#
C# | 286 lines | 126 code | 39 blank | 121 comment | 15 complexity | ff66ece6a958c2a90c8aa771dbf09711 MD5 | raw file
  1// ****************************************************************
  2// This is free software licensed under the NUnit license. You may
  3// obtain a copy of the license at http://nunit.org
  4// ****************************************************************
  5
  6using System;
  7using System.Collections.Generic;
  8using System.Text;
  9using System.Collections;
 10
 11namespace NUnit.UiException.CodeFormatters
 12{
 13    /// <summary>
 14    /// TokenDictionary is responsible for defining and identifying a set of basic
 15    /// strings in a given text that have a particular meaning. For instance:
 16    ///  - Separator, (ex: "{" ";" "]" ...)
 17    ///  - comment markers, (ex: "//" "/*" "*/")
 18    ///  - string markers, (ex: '"' '\'')
 19    ///  - Other -> "Text" (all other strings but the ones aboves).
 20    /// To achieve this, TokenDictionary firstly defines methods to register and query which
 21    /// strings have been registered. Secondly it defines a convenient method: TryMatch()
 22    /// responsible for splitting a given string in one or two parts where the first one will
 23    /// fall in one of the above categories. When calling TryMatch() iteratively --see Lexer--,
 24    /// one can tag a text into a list of tokens that might server for a semantic analysis.
 25    ///
 26    /// TokenDictionary and Lexer are both responsible for dealing with the lexical analysis
 27    /// job that is the first step to make basic syntax coloring. 
 28    /// </summary>
 29    /// <see cref="Lexer">Front class for the lexical analysis.</see>
 30    public class TokenDictionary :
 31        IEnumerable
 32    {
 33        private List<InternalLexToken> _list;
 34        private List<LexToken> _working;
 35
 36        /// <summary>
 37        /// Build an empty instance of TokenDictionary.
 38        /// </summary>
 39        public TokenDictionary()
 40        {
 41            _list = new List<InternalLexToken>();
 42            _working = new List<LexToken>();
 43
 44            return;
 45        }
 46
 47        /// <summary>
 48        /// Gets the token count defined in this instance.
 49        /// </summary>
 50        public int Count
 51        {
 52            get { return (_list.Count); }
 53        }
 54
 55        /// <summary>
 56        /// Gets the token at the given index.
 57        /// </summary>
 58        /// <param name="index">Index of the token to be returned.</param>
 59        /// <returns>The token at the specified index.</returns>
 60        public LexToken this[int index]
 61        {
 62            get { return (_list[index]); }
 63        }
 64
 65        /// <summary>
 66        /// Build a new token and add it to the list of tokens known by TokenDictionary.
 67        /// Tokens must be added from the longest text value to the shortest otherwise
 68        /// an exception will be raised.
 69        /// </summary>
 70        /// <param name="value">
 71        /// The token's text value. It must not be null nor empty. It must not be already
 72        /// defined neither. If there are tokens already defined, value's length must not
 73        /// be longer than the previous added token.
 74        /// </param>
 75        /// <param name="tag">The token's tag value.</param>
 76        public void Add(string value, LexerTag tag)
 77        {
 78            InternalLexToken newToken;
 79
 80            UiExceptionHelper.CheckNotNull(value, "value");
 81            UiExceptionHelper.CheckFalse(value == "",
 82                "Token value must not be empty.", "value");
 83            UiExceptionHelper.CheckFalse(
 84                Contains(value),
 85                String.Format("Token '{0}' is already defined.", value),
 86                "value");
 87            if (Count > 0)
 88                UiExceptionHelper.CheckTrue(
 89                    _list[Count - 1].Text.Length >= value.Length,
 90                    "Tokens must be inserted from the longest to the shortest value.",
 91                    "value");
 92
 93            newToken = new InternalLexToken(value, tag);
 94
 95            // loop through each item to populate
 96            // newToken.StartingWith list.
 97
 98            foreach (InternalLexToken item in _list)
 99                if (item.Text.StartsWith(value))
100                    newToken.StartingWith.Add(item);
101
102            _list.Add(newToken);
103
104            return;
105        }
106
107        /// <summary>
108        /// Tests whether the given string matches a token known by this instance.
109        /// </summary>
110        /// <param name="value">
111        ///     A string to be identify with a token in this instance.
112        /// </param>
113        /// <returns>
114        ///     True if the string matches a token's text
115        ///     value in this instance, false otherwise.
116        /// </returns>
117        public bool Contains(string value)
118        {
119            foreach (LexToken item in _list)
120                if (item.Text == value)
121                    return (true);
122            return (false);
123        }
124
125        /// <summary>
126        /// Try to match in "text" + "prediction" a token previously defined with the Add() method.
127        /// Since TryMatch() may return null, it should be called from a loop that scans iteratively
128        /// all characters of an input text.
129        ///
130        /// TryMatch() can put the caller in the two following situations: 
131        /// 1) if parameters "text"+"prediction" don't hold any token, null will be returned. In this
132        ///    case caller is expected to append to "text" one character more and to shift "prediction"
133        ///    by one character ahead before calling TryMatch() again.
134        /// 2) if parameters "text"+"prediction" look like [data]TOKEN --where [data] is any other string
135        ///    but the ones in tokens-- TryMatch() will return an instance of LexToken which LexToken.Text
136        ///    and LexToken.Tag properties will be setup with identified data. In this case caller is
137        ///    expected to shift its reading position by the lenght of text put in LexToken.Text. Besides
138        ///    "text" parameter should reset its length to 1 again.
139        /// </summary>
140        /// <param name="text">
141        /// At the very beginning, text should be of size 1 and set up with the first character from the
142        /// input text. Each time TryMatch() return null, the following character from the input text
143        /// should be appended to "text". Once a token is returned, this parameter should reset its size
144        /// to 1 and be filled with the character coming just after the identified string.
145        /// This parameter cannot be null.
146        /// </param>
147        /// <param name="prediction">
148        /// This parameter represents a constant sized string that goes just before the data in "text".
149        /// If the caller reach the end of the text and there are not enough character to fill "prediction"
150        /// completely this parameter can be filled with remaining character and eventually becoming empty.
151        /// The size of this string should be equal to the lenght of the longest token defined in
152        /// this instance of TokenDictionary.
153        /// This parameter cannot be null.
154        /// </param>
155        /// <returns>
156        /// The first identifiable LexToken in "text"+"prediction". Returns may be null.
157        /// </returns>
158        /// <see cref="Lexer.Next()">
159        /// To have a look on the loop implementation..
160        /// </see>
161        public LexToken TryMatch(string text, string prediction)
162        {
163            UiExceptionHelper.CheckNotNull(text, "text");
164            UiExceptionHelper.CheckNotNull(prediction, "prediction");
165
166            foreach (InternalLexToken token in _list)
167            {
168                if (text.EndsWith(token.Text))
169                {
170                    // text may look like [data]TOKEN
171                    // where [data] is normal text possibly empty.
172
173                    if (text.Length > token.Text.Length)
174                    {
175                        // get only [data] part
176                        return (new LexToken(
177                            text.Substring(0, text.Length - token.Text.Length),
178                            LexerTag.Text, -1));
179                    }
180
181                    // text looks like TOKEN, however we can't return text at
182                    // this stage before testing content of prediction. Since
183                    // there is a possibility that a longer TOKEN be in the concatenated
184                    // string: text + prediction. (note: longer TOKENs have higher
185                    // priority over shorter ones)
186
187                    if (prediction != "")
188                    {
189                        string pattern;
190                        int i;
191
192                        _working.Clear();
193                        PopulateTokenStartingWith(token, _working);
194
195                        for (i = 1; i < _working.Count; ++i)
196                        {
197                            if (_working[i].Text.Length <= text.Length ||
198                                _working[i].Text.Length > text.Length + prediction.Length)
199                                continue;
200                            pattern = text + prediction.Substring(0,
201                                _working[i].Text.Length - text.Length);
202                            if (_working[i].Text == pattern)
203                                return (_working[i]);
204                        }
205                    }
206
207                    return (token);
208                }
209            }
210
211            // no match found, if prediction is empty
212            // this means we reach end of text and return
213            // text as a LexerToken.Text
214
215            if (prediction == "")
216                return (new LexToken(text, LexerTag.Text, -1));
217
218            return (null);
219        }
220
221        /// <summary>
222        /// Builds the list of all LexToken which text value starts with the one in starter.
223        /// </summary>
224        /// <param name="starter">The token that the reference text.</param>
225        /// <param name="output">The list of tokens which text starts with the one in starter.</param>
226        protected void PopulateTokenStartingWith(LexToken starter, List<LexToken> output)
227        {
228            InternalLexToken token;
229
230            UiExceptionHelper.CheckNotNull(starter, "starter");
231            UiExceptionHelper.CheckNotNull(output, "output");
232
233            output.Add(starter);
234
235            token = (InternalLexToken)starter;
236            foreach (LexToken item in token.StartingWith)
237                output.Add(item);
238
239            return;
240        }
241
242        #region InternalLexToken
243
244        /// <summary>
245        /// Inherits of LexToken and add a public array that holds the list of all other tokens
246        /// which text values start with the one in the current instance.
247        /// </summary>
248        class InternalLexToken :
249            LexToken
250        {
251            /// <summary>
252            /// Holds the list of all other tokens which text values start like the one
253            /// in this instance. This array is used to solve ambiguity when finding a
254            /// string that could possibly represents more than one token.
255            /// </summary>
256            public List<LexToken> StartingWith;
257
258            /// <summary>
259            /// Build a new instance of InternalLexToken with the given data.
260            /// </summary>
261            /// <param name="value">The token's text value.</param>
262            /// <param name="tag">The token's tag value.</param>
263            public InternalLexToken(string value, LexerTag tag)
264            {
265                _start = -1;
266                _text = value;
267                _tag = tag;
268
269                StartingWith = new List<LexToken>();
270
271                return;
272            }
273        }
274
275        #endregion
276
277        #region IEnumerable Membres
278
279        public IEnumerator GetEnumerator()
280        {
281            return (_list.GetEnumerator());
282        }
283
284        #endregion
285    }
286}