PageRenderTime 25ms CodeModel.GetById 3ms RepoModel.GetById 0ms app.codeStats 0ms

/src/NUnit/UiException/CSharpParser/TokenClassifier.cs

#
C# | 433 lines | 316 code | 31 blank | 86 comment | 4 complexity | e3c1f10fe9495441401ec3cd6557eadf MD5 | raw file
Possible License(s): GPL-2.0
  1. // ****************************************************************
  2. // This is free software licensed under the NUnit license. You may
  3. // obtain a copy of the license at http://nunit.org
  4. // ****************************************************************
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Text;
  8. namespace NUnit.UiException.CodeFormatters
  9. {
  10. /// <summary>
  11. /// Used at an internal stage to convert LexToken into ClassifiedToken. This class provides
  12. /// a very basic semantic analysis to make text following in one the categories below:
  13. /// - regular code,
  14. /// - developper comments,
  15. /// - strings / character.
  16. /// The output of this class is used by CSharpCodeFormatter to achieve the basic syntax coloring.
  17. /// </summary>
  18. public class TokenClassifier
  19. {
  20. #region SMSTATE code
  21. // the list below contains constant values defining states for the finite
  22. // smState machine that makes all the work of converting LexToken into ClassifiedToken.
  23. // for instance, Lexer can send inputs like:
  24. //
  25. // [Text][Separator][CommentC_Open][Text][CommentC_Close]
  26. //
  27. // This LexToken sequence can for instance be converted that way by TokenClassifier.
  28. //
  29. // - [Text][Separator] => [Code]
  30. // - [CommentC_Open][Text][CommentC_Close] => [Comment]
  31. //
  32. /// <summary>
  33. /// State code for the smState machine.
  34. /// State when reaching a code block.
  35. /// </summary>
  36. public const int SMSTATE_CODE = 0;
  37. /// <summary>
  38. /// State code for the smState machine.
  39. /// State when reaching a C comment block.
  40. /// </summary>
  41. public const int SMSTATE_CCOMMENT = 1;
  42. /// <summary>
  43. /// State code for the smState machine.
  44. /// State when reaching a C++ comment block.
  45. /// </summary>
  46. public const int SMSTATE_CPPCOMMENT = 2;
  47. /// <summary>
  48. /// State code for the smState machine.
  49. /// State when reaching a char surrounded by single quotes.
  50. /// </summary>
  51. public const int SMSTATE_CHAR = 3;
  52. /// <summary>
  53. /// State code for the smState machine.
  54. /// State when reaching a string surrounded by double quotes.
  55. /// </summary>
  56. public const int SMSTATE_STRING = 4;
  57. #endregion
  58. /// <summary>
  59. /// A finite smState machine where states are: SMSTATE values and
  60. /// transitions are LexToken.
  61. /// </summary>
  62. private StateMachine _sm;
  63. /// <summary>
  64. /// The current StateMachine's SMTATE code.
  65. /// </summary>
  66. private int _sm_output;
  67. /// <summary>
  68. /// Makes a link between SMSTATE code and ClassificationTag.
  69. /// </summary>
  70. private Dictionary<int, ClassificationTag> _tags;
  71. /// <summary>
  72. /// Contains the list of C# keywords.
  73. /// </summary>
  74. private Dictionary<string, bool> _keywords;
  75. /// <summary>
  76. /// Indicate whether Lexer is in escaping mode.
  77. /// This flag is set to true when parsing "\\" and
  78. /// can influate on the following LexerTag value.
  79. /// </summary>
  80. private bool _escaping;
  81. /// <summary>
  82. /// Build a new instance of TokenClassifier.
  83. /// </summary>
  84. public TokenClassifier()
  85. {
  86. string[] words;
  87. _sm = new StateMachine();
  88. _tags = new Dictionary<int, ClassificationTag>();
  89. _tags.Add(SMSTATE_CODE, ClassificationTag.Code);
  90. _tags.Add(SMSTATE_CCOMMENT, ClassificationTag.Comment);
  91. _tags.Add(SMSTATE_CPPCOMMENT, ClassificationTag.Comment);
  92. _tags.Add(SMSTATE_CHAR, ClassificationTag.String);
  93. _tags.Add(SMSTATE_STRING, ClassificationTag.String);
  94. // build the list of predefined keywords.
  95. // this is from the official msdn site. Curiously, some keywords
  96. // were ommited from the official documentation.
  97. // For instance "get", "set", "region" and "endregion" were
  98. // not part of the official list. Maybe it's a mistake or a misunderstanding
  99. // whatever... I want them paint in blue as well!
  100. words = new string[] {
  101. "abstract", "event", "new", "struct", "as", "explicit", "null", "switch",
  102. "base", "extern", "object", "this", "bool", "false", "operator", "throw",
  103. "break", "finally", "out", "true", "byte", "fixed", "override", "try", "case",
  104. "float", "params", "typeof", "catch", "for", "private", "uint", "char",
  105. "foreach", "protected", "ulong", "checked", "goto", "public", "unchecked",
  106. "class", "if", "readonly", "unsafe", "const", "implicit", "ref", "ushort",
  107. "continue", "in", "return", "using", "decimal", "int", "sbyte", "virtual",
  108. "default", "interface", "sealed", "volatile", "delegate", "internal",
  109. "short", "void", "do", "is", "sizeof", "while", "double", "lock", "stackalloc",
  110. "else", "long", "static", "enum", "namespace", "string", "partial", "get", "set",
  111. "region", "endregion",
  112. };
  113. _keywords = new Dictionary<string, bool>();
  114. foreach (string key in words)
  115. _keywords.Add(key, true);
  116. Reset();
  117. return;
  118. }
  119. /// <summary>
  120. /// Tells whether TokenClassifier is currently in escaping mode. When true,
  121. /// this flag causes TokenClassifier to override the final classification
  122. /// of a basic entity (such as: ") to be treated as normal text instead of
  123. /// being interpreted as a string delimiter.
  124. /// </summary>
  125. public bool Escaping
  126. {
  127. get { return (_escaping); }
  128. }
  129. /// <summary>
  130. /// Reset the StateMachine to default value. (code block).
  131. /// </summary>
  132. public void Reset()
  133. {
  134. _sm_output = SMSTATE_CODE;
  135. _escaping = false;
  136. return;
  137. }
  138. /// <summary>
  139. /// Classify the given LexToken into a ClassificationTag.
  140. /// </summary>
  141. /// <param name="token">The token to be classified.</param>
  142. /// <returns>The smState value.</returns>
  143. public ClassificationTag Classify(LexToken token)
  144. {
  145. int classTag;
  146. UiExceptionHelper.CheckNotNull(token, "token");
  147. classTag = AcceptLexToken(token);
  148. if (classTag == SMSTATE_CODE &&
  149. _keywords.ContainsKey(token.Text))
  150. return (ClassificationTag.Keyword);
  151. // Parsing a token whoose Text value is set to '\'
  152. // causes the classifier to set/reset is escaping mode.
  153. if (token.Text == "\\" &&
  154. _sm_output == SMSTATE_STRING &&
  155. !_escaping)
  156. _escaping = true;
  157. else
  158. _escaping = false;
  159. return (_tags[classTag]);
  160. }
  161. /// <summary>
  162. /// Classify the given token and get its corresponding SMSTATE value.
  163. /// </summary>
  164. /// <param name="token">The LexToken to be classified.</param>
  165. /// <returns>An SMSTATE value.</returns>
  166. protected int AcceptLexToken(LexToken token)
  167. {
  168. int smState;
  169. if (_escaping)
  170. return (SMSTATE_STRING);
  171. smState = GetTokenSMSTATE(_sm_output, token.Tag);
  172. _sm_output = GetSMSTATE(_sm_output, token.Tag);
  173. return (smState);
  174. }
  175. /// <summary>
  176. /// Gets the SMSTATE under the "transition" going from "smState".
  177. /// </summary>
  178. /// <param name="smState">The current smState.</param>
  179. /// <param name="transition">The current LexerTag.</param>
  180. /// <returns>The new smState.</returns>
  181. protected int GetSMSTATE(int smState, LexerTag transition)
  182. {
  183. return (_sm.GetSMSTATE(smState, transition));
  184. }
  185. /// <summary>
  186. /// Gets a token SMSTATE under the "transition" going from "smState".
  187. /// </summary>
  188. /// <param name="smState">The current smState machine.</param>
  189. /// <param name="transition">The LexerTag to be classified.</param>
  190. /// <returns>The LexerTag's classification.</returns>
  191. protected int GetTokenSMSTATE(int smState, LexerTag transition)
  192. {
  193. return (_sm.GetTokenSMSTATE(smState, transition));
  194. }
  195. #region StateMachine
  196. /// <summary>
  197. /// Defines a transition (of a state machine).
  198. /// </summary>
  199. class TransitionData
  200. {
  201. /// <summary>
  202. /// The current transition.
  203. /// </summary>
  204. public LexerTag Transition;
  205. /// <summary>
  206. /// The SMSTATE code reached when following that transition.
  207. /// </summary>
  208. public int SMSTATE;
  209. /// <summary>
  210. /// The TokenSMSTATE reached when following that transition.
  211. /// </summary>
  212. public int TokenSMSTATE;
  213. public TransitionData(LexerTag transition, int smState)
  214. {
  215. Transition = transition;
  216. SMSTATE = smState;
  217. TokenSMSTATE = smState;
  218. return;
  219. }
  220. public TransitionData(LexerTag transition, int smState, int tokenSmState) :
  221. this(transition, smState)
  222. {
  223. TokenSMSTATE = tokenSmState;
  224. }
  225. }
  226. /// <summary>
  227. /// Defines a state (of a state machine) and its associated transitions.
  228. /// </summary>
  229. class State
  230. {
  231. public int InitialState;
  232. public TransitionData[] Transitions;
  233. public State(int initialState, TransitionData[] transitions)
  234. {
  235. int i;
  236. int j;
  237. UiExceptionHelper.CheckNotNull(transitions, "transitions");
  238. UiExceptionHelper.CheckTrue(
  239. transitions.Length == 8,
  240. "expecting transitions.Length to be 8",
  241. "transitions");
  242. for (i = 0; i < transitions.Length; ++i)
  243. for (j = 0; j < transitions.Length; ++j)
  244. {
  245. if (j == i)
  246. continue;
  247. if (transitions[j].Transition == transitions[i].Transition)
  248. UiExceptionHelper.CheckTrue(false,
  249. String.Format("transition '{0}' already present", transitions[j].Transition),
  250. "transitions");
  251. }
  252. InitialState = initialState;
  253. Transitions = transitions;
  254. return;
  255. }
  256. public TransitionData this[LexerTag transition]
  257. {
  258. get
  259. {
  260. foreach (TransitionData couple in Transitions)
  261. if (couple.Transition == transition)
  262. return (couple);
  263. return (null);
  264. }
  265. }
  266. }
  267. /// <summary>
  268. /// A finite state machine. Where states are SMSTATE codes and
  269. /// transitions are LexTokens.
  270. /// </summary>
  271. class StateMachine
  272. {
  273. private State[] _states;
  274. public StateMachine()
  275. {
  276. _states = new State[5];
  277. // defines transitions from SMSTATE_CODE
  278. _states[0] = new State(
  279. SMSTATE_CODE,
  280. new TransitionData[] {
  281. new TransitionData(LexerTag.EndOfLine, SMSTATE_CODE),
  282. new TransitionData(LexerTag.Separator, SMSTATE_CODE),
  283. new TransitionData(LexerTag.Text, SMSTATE_CODE),
  284. new TransitionData(LexerTag.CommentC_Open, SMSTATE_CCOMMENT),
  285. new TransitionData(LexerTag.CommentC_Close, SMSTATE_CODE, SMSTATE_CCOMMENT),
  286. new TransitionData(LexerTag.CommentCpp, SMSTATE_CPPCOMMENT),
  287. new TransitionData(LexerTag.SingleQuote, SMSTATE_CHAR),
  288. new TransitionData(LexerTag.DoubleQuote, SMSTATE_STRING),
  289. });
  290. // defines transitions from SMSTATE_CCOMMENT
  291. _states[1] = new State(
  292. SMSTATE_CCOMMENT,
  293. new TransitionData[] {
  294. new TransitionData(LexerTag.EndOfLine, SMSTATE_CCOMMENT),
  295. new TransitionData(LexerTag.Separator, SMSTATE_CCOMMENT),
  296. new TransitionData(LexerTag.Text, SMSTATE_CCOMMENT),
  297. new TransitionData(LexerTag.CommentC_Open, SMSTATE_CCOMMENT),
  298. new TransitionData(LexerTag.CommentC_Close, SMSTATE_CODE, SMSTATE_CCOMMENT),
  299. new TransitionData(LexerTag.CommentCpp, SMSTATE_CCOMMENT),
  300. new TransitionData(LexerTag.SingleQuote, SMSTATE_CCOMMENT),
  301. new TransitionData(LexerTag.DoubleQuote, SMSTATE_CCOMMENT),
  302. });
  303. // defines transitions from SMSTATE_CPPCOMMENT
  304. _states[2] = new State(
  305. SMSTATE_CPPCOMMENT,
  306. new TransitionData[] {
  307. new TransitionData(LexerTag.EndOfLine, SMSTATE_CODE),
  308. new TransitionData(LexerTag.Separator, SMSTATE_CPPCOMMENT),
  309. new TransitionData(LexerTag.Text, SMSTATE_CPPCOMMENT),
  310. new TransitionData(LexerTag.CommentC_Open, SMSTATE_CPPCOMMENT),
  311. new TransitionData(LexerTag.CommentC_Close, SMSTATE_CPPCOMMENT),
  312. new TransitionData(LexerTag.CommentCpp, SMSTATE_CPPCOMMENT),
  313. new TransitionData(LexerTag.SingleQuote, SMSTATE_CPPCOMMENT),
  314. new TransitionData(LexerTag.DoubleQuote, SMSTATE_CPPCOMMENT),
  315. });
  316. // defines transition from SMSTATE_CHAR
  317. _states[3] = new State(
  318. SMSTATE_CHAR,
  319. new TransitionData[] {
  320. new TransitionData(LexerTag.EndOfLine, SMSTATE_CHAR),
  321. new TransitionData(LexerTag.Separator, SMSTATE_CHAR),
  322. new TransitionData(LexerTag.Text, SMSTATE_CHAR),
  323. new TransitionData(LexerTag.CommentC_Open, SMSTATE_CHAR),
  324. new TransitionData(LexerTag.CommentC_Close, SMSTATE_CHAR),
  325. new TransitionData(LexerTag.CommentCpp, SMSTATE_CHAR),
  326. new TransitionData(LexerTag.SingleQuote, SMSTATE_CODE, SMSTATE_CHAR),
  327. new TransitionData(LexerTag.DoubleQuote, SMSTATE_CHAR),
  328. });
  329. // defines transition from SMSTATE_STRING
  330. _states[4] = new State(
  331. SMSTATE_STRING,
  332. new TransitionData[] {
  333. new TransitionData(LexerTag.EndOfLine, SMSTATE_STRING),
  334. new TransitionData(LexerTag.Separator, SMSTATE_STRING),
  335. new TransitionData(LexerTag.Text, SMSTATE_STRING),
  336. new TransitionData(LexerTag.CommentC_Open, SMSTATE_STRING),
  337. new TransitionData(LexerTag.CommentC_Close, SMSTATE_STRING),
  338. new TransitionData(LexerTag.CommentCpp, SMSTATE_STRING),
  339. new TransitionData(LexerTag.SingleQuote, SMSTATE_STRING),
  340. new TransitionData(LexerTag.DoubleQuote, SMSTATE_CODE, SMSTATE_STRING),
  341. });
  342. return;
  343. }
  344. /// <summary>
  345. /// Follow "transition" going from "smState" and returns reached SMSTATE.
  346. /// </summary>
  347. public int GetSMSTATE(int smState, LexerTag transition)
  348. {
  349. foreach (State st in _states)
  350. if (st.InitialState == smState)
  351. return (st[transition].SMSTATE);
  352. return (SMSTATE_CODE);
  353. }
  354. /// <summary>
  355. /// Follow "transition" going from "smState" and returns reached TokenSMSTATE.
  356. /// </summary>
  357. public int GetTokenSMSTATE(int smState, LexerTag transition)
  358. {
  359. foreach (State st in _states)
  360. if (st.InitialState == smState)
  361. return (st[transition].TokenSMSTATE);
  362. return (SMSTATE_CODE);
  363. }
  364. }
  365. #endregion
  366. }
  367. }