/src/NUnit/UiException/CSharpParser/TokenDictionary.cs
C# | 286 lines | 126 code | 39 blank | 121 comment | 15 complexity | ff66ece6a958c2a90c8aa771dbf09711 MD5 | raw file
Possible License(s): GPL-2.0
- // ****************************************************************
- // This is free software licensed under the NUnit license. You may
- // obtain a copy of the license at http://nunit.org
- // ****************************************************************
-
- using System;
- using System.Collections.Generic;
- using System.Text;
- using System.Collections;
-
- namespace NUnit.UiException.CodeFormatters
- {
- /// <summary>
- /// TokenDictionary is responsible for defining and identifying a set of basic
- /// strings in a given text that have a particular meaning. For instance:
- /// - Separator, (ex: "{" ";" "]" ...)
- /// - comment markers, (ex: "//" "/*" "*/")
- /// - string markers, (ex: '"' '\'')
- /// - Other -> "Text" (all other strings but the ones aboves).
- /// To achieve this, TokenDictionary firstly defines methods to register and query which
- /// strings have been registered. Secondly it defines a convenient method: TryMatch()
- /// responsible for splitting a given string in one or two parts where the first one will
- /// fall in one of the above categories. When calling TryMatch() iteratively --see Lexer--,
- /// one can tag a text into a list of tokens that might server for a semantic analysis.
- ///
- /// TokenDictionary and Lexer are both responsible for dealing with the lexical analysis
- /// job that is the first step to make basic syntax coloring.
- /// </summary>
- /// <see cref="Lexer">Front class for the lexical analysis.</see>
- public class TokenDictionary :
- IEnumerable
- {
- private List<InternalLexToken> _list;
- private List<LexToken> _working;
-
- /// <summary>
- /// Build an empty instance of TokenDictionary.
- /// </summary>
- public TokenDictionary()
- {
- _list = new List<InternalLexToken>();
- _working = new List<LexToken>();
-
- return;
- }
-
- /// <summary>
- /// Gets the token count defined in this instance.
- /// </summary>
- public int Count
- {
- get { return (_list.Count); }
- }
-
- /// <summary>
- /// Gets the token at the given index.
- /// </summary>
- /// <param name="index">Index of the token to be returned.</param>
- /// <returns>The token at the specified index.</returns>
- public LexToken this[int index]
- {
- get { return (_list[index]); }
- }
-
- /// <summary>
- /// Build a new token and add it to the list of tokens known by TokenDictionary.
- /// Tokens must be added from the longest text value to the shortest otherwise
- /// an exception will be raised.
- /// </summary>
- /// <param name="value">
- /// The token's text value. It must not be null nor empty. It must not be already
- /// defined neither. If there are tokens already defined, value's length must not
- /// be longer than the previous added token.
- /// </param>
- /// <param name="tag">The token's tag value.</param>
- public void Add(string value, LexerTag tag)
- {
- InternalLexToken newToken;
-
- UiExceptionHelper.CheckNotNull(value, "value");
- UiExceptionHelper.CheckFalse(value == "",
- "Token value must not be empty.", "value");
- UiExceptionHelper.CheckFalse(
- Contains(value),
- String.Format("Token '{0}' is already defined.", value),
- "value");
- if (Count > 0)
- UiExceptionHelper.CheckTrue(
- _list[Count - 1].Text.Length >= value.Length,
- "Tokens must be inserted from the longest to the shortest value.",
- "value");
-
- newToken = new InternalLexToken(value, tag);
-
- // loop through each item to populate
- // newToken.StartingWith list.
-
- foreach (InternalLexToken item in _list)
- if (item.Text.StartsWith(value))
- newToken.StartingWith.Add(item);
-
- _list.Add(newToken);
-
- return;
- }
-
- /// <summary>
- /// Tests whether the given string matches a token known by this instance.
- /// </summary>
- /// <param name="value">
- /// A string to be identify with a token in this instance.
- /// </param>
- /// <returns>
- /// True if the string matches a token's text
- /// value in this instance, false otherwise.
- /// </returns>
- public bool Contains(string value)
- {
- foreach (LexToken item in _list)
- if (item.Text == value)
- return (true);
- return (false);
- }
-
- /// <summary>
- /// Try to match in "text" + "prediction" a token previously defined with the Add() method.
- /// Since TryMatch() may return null, it should be called from a loop that scans iteratively
- /// all characters of an input text.
- ///
- /// TryMatch() can put the caller in the two following situations:
- /// 1) if parameters "text"+"prediction" don't hold any token, null will be returned. In this
- /// case caller is expected to append to "text" one character more and to shift "prediction"
- /// by one character ahead before calling TryMatch() again.
- /// 2) if parameters "text"+"prediction" look like [data]TOKEN --where [data] is any other string
- /// but the ones in tokens-- TryMatch() will return an instance of LexToken which LexToken.Text
- /// and LexToken.Tag properties will be setup with identified data. In this case caller is
- /// expected to shift its reading position by the lenght of text put in LexToken.Text. Besides
- /// "text" parameter should reset its length to 1 again.
- /// </summary>
- /// <param name="text">
- /// At the very beginning, text should be of size 1 and set up with the first character from the
- /// input text. Each time TryMatch() return null, the following character from the input text
- /// should be appended to "text". Once a token is returned, this parameter should reset its size
- /// to 1 and be filled with the character coming just after the identified string.
- /// This parameter cannot be null.
- /// </param>
- /// <param name="prediction">
- /// This parameter represents a constant sized string that goes just before the data in "text".
- /// If the caller reach the end of the text and there are not enough character to fill "prediction"
- /// completely this parameter can be filled with remaining character and eventually becoming empty.
- /// The size of this string should be equal to the lenght of the longest token defined in
- /// this instance of TokenDictionary.
- /// This parameter cannot be null.
- /// </param>
- /// <returns>
- /// The first identifiable LexToken in "text"+"prediction". Returns may be null.
- /// </returns>
- /// <see cref="Lexer.Next()">
- /// To have a look on the loop implementation..
- /// </see>
- public LexToken TryMatch(string text, string prediction)
- {
- UiExceptionHelper.CheckNotNull(text, "text");
- UiExceptionHelper.CheckNotNull(prediction, "prediction");
-
- foreach (InternalLexToken token in _list)
- {
- if (text.EndsWith(token.Text))
- {
- // text may look like [data]TOKEN
- // where [data] is normal text possibly empty.
-
- if (text.Length > token.Text.Length)
- {
- // get only [data] part
- return (new LexToken(
- text.Substring(0, text.Length - token.Text.Length),
- LexerTag.Text, -1));
- }
-
- // text looks like TOKEN, however we can't return text at
- // this stage before testing content of prediction. Since
- // there is a possibility that a longer TOKEN be in the concatenated
- // string: text + prediction. (note: longer TOKENs have higher
- // priority over shorter ones)
-
- if (prediction != "")
- {
- string pattern;
- int i;
-
- _working.Clear();
- PopulateTokenStartingWith(token, _working);
-
- for (i = 1; i < _working.Count; ++i)
- {
- if (_working[i].Text.Length <= text.Length ||
- _working[i].Text.Length > text.Length + prediction.Length)
- continue;
- pattern = text + prediction.Substring(0,
- _working[i].Text.Length - text.Length);
- if (_working[i].Text == pattern)
- return (_working[i]);
- }
- }
-
- return (token);
- }
- }
-
- // no match found, if prediction is empty
- // this means we reach end of text and return
- // text as a LexerToken.Text
-
- if (prediction == "")
- return (new LexToken(text, LexerTag.Text, -1));
-
- return (null);
- }
-
- /// <summary>
- /// Builds the list of all LexToken which text value starts with the one in starter.
- /// </summary>
- /// <param name="starter">The token that the reference text.</param>
- /// <param name="output">The list of tokens which text starts with the one in starter.</param>
- protected void PopulateTokenStartingWith(LexToken starter, List<LexToken> output)
- {
- InternalLexToken token;
-
- UiExceptionHelper.CheckNotNull(starter, "starter");
- UiExceptionHelper.CheckNotNull(output, "output");
-
- output.Add(starter);
-
- token = (InternalLexToken)starter;
- foreach (LexToken item in token.StartingWith)
- output.Add(item);
-
- return;
- }
-
- #region InternalLexToken
-
- /// <summary>
- /// Inherits of LexToken and add a public array that holds the list of all other tokens
- /// which text values start with the one in the current instance.
- /// </summary>
- class InternalLexToken :
- LexToken
- {
- /// <summary>
- /// Holds the list of all other tokens which text values start like the one
- /// in this instance. This array is used to solve ambiguity when finding a
- /// string that could possibly represents more than one token.
- /// </summary>
- public List<LexToken> StartingWith;
-
- /// <summary>
- /// Build a new instance of InternalLexToken with the given data.
- /// </summary>
- /// <param name="value">The token's text value.</param>
- /// <param name="tag">The token's tag value.</param>
- public InternalLexToken(string value, LexerTag tag)
- {
- _start = -1;
- _text = value;
- _tag = tag;
-
- StartingWith = new List<LexToken>();
-
- return;
- }
- }
-
- #endregion
-
- #region IEnumerable Membres
-
- public IEnumerator GetEnumerator()
- {
- return (_list.GetEnumerator());
- }
-
- #endregion
- }
- }