/main/contrib/NRefactory/Project/Src/Lexer/AbstractLexer.cs
C# | 374 lines | 284 code | 39 blank | 51 comment | 61 complexity | 7fc93dd0e8da2297e6bd4a8f24e34a26 MD5 | raw file
1// <file>
2// <copyright see="prj:///doc/copyright.txt"/>
3// <license see="prj:///doc/license.txt"/>
4// <owner name="Mike Krüger" email="mike@icsharpcode.net"/>
5// <version>$Revision: 4482 $</version>
6// </file>
7
8using System;
9using System.Collections;
10using System.Collections.Generic;
11using System.IO;
12using System.Text;
13
14namespace ICSharpCode.OldNRefactory.Parser
15{
16 /// <summary>
17 /// This is the base class for the C# and VB.NET lexer
18 /// </summary>
19 [System.Diagnostics.CodeAnalysis.SuppressMessage("Microsoft.Naming", "CA1708:IdentifiersShouldDifferByMoreThanCase")]
20 public abstract class AbstractLexer : ILexer
21 {
22 TextReader reader;
23 int col = 1;
24 int line = 1;
25
26 [CLSCompliant(false)]
27 protected Errors errors = new Errors();
28
29 protected Token lastToken = null;
30 protected Token curToken = null;
31 protected Token peekToken = null;
32
33 string[] specialCommentTags = null;
34 protected Hashtable specialCommentHash = null;
35 List<TagComment> tagComments = new List<TagComment>();
36 protected StringBuilder sb = new StringBuilder();
37 [CLSCompliant(false)]
38 protected SpecialTracker specialTracker = new SpecialTracker();
39
40 // used for the original value of strings (with escape sequences).
41 protected StringBuilder originalValue = new StringBuilder();
42
43 public bool SkipAllComments { get; set; }
44 public bool EvaluateConditionalCompilation { get; set; }
45 public virtual IDictionary<string, object> ConditionalCompilationSymbols {
46 get { throw new NotSupportedException(); }
47 }
48
49 protected static IEnumerable<string> GetSymbols (string symbols)
50 {
51 if (!string.IsNullOrEmpty(symbols)) {
52 foreach (string symbol in symbols.Split (';', ' ', '\t')) {
53 string s = symbol.Trim ();
54 if (s.Length == 0)
55 continue;
56 yield return s;
57 }
58 }
59 }
60
61 public virtual void SetConditionalCompilationSymbols (string symbols)
62 {
63 throw new NotSupportedException ();
64 }
65
66 protected int Line {
67 get {
68 return line;
69 }
70 }
71 protected int Col {
72 get {
73 return col;
74 }
75 }
76
77 protected bool recordRead = false;
78 protected StringBuilder recordedText = new StringBuilder ();
79
80 protected int ReaderRead()
81 {
82 ++col;
83 int val = reader.Read();
84 if (recordRead)
85 recordedText.Append ((char)val);
86 if (val == '\r') {
87 if (reader.Peek() == '\n') {
88 lineBreakPosition = new Location (col + 2, line);
89 reader.Read ();
90 } else {
91 lineBreakPosition = new Location (col + 1, line);
92 }
93 ++line;
94 col = 1;
95 LineBreak ();
96 return '\n';
97 }
98 if (val == '\n') {
99 lineBreakPosition = new Location (col + 1, line);
100 ++line;
101 col = 1;
102 LineBreak ();
103 }
104 return val;
105 }
106 protected int ReaderPeek()
107 {
108 return reader.Peek();
109 }
110
111 public Errors Errors {
112 get {
113 return errors;
114 }
115 }
116
117 /// <summary>
118 /// Returns the comments that had been read and containing tag key words.
119 /// </summary>
120 public List<TagComment> TagComments {
121 get {
122 return tagComments;
123 }
124 }
125
126 public SpecialTracker SpecialTracker {
127 get {
128 return specialTracker;
129 }
130 }
131
132 /// <summary>
133 /// Special comment tags are tags like TODO, HACK or UNDONE which are read by the lexer and stored in <see cref="TagComments"/>.
134 /// </summary>
135 public string[] SpecialCommentTags {
136 get {
137 return specialCommentTags;
138 }
139 set {
140 specialCommentTags = value;
141 specialCommentHash = null;
142 if (specialCommentTags != null && specialCommentTags.Length > 0) {
143 specialCommentHash = new Hashtable();
144 foreach (string str in specialCommentTags) {
145 specialCommentHash.Add(str, null);
146 }
147 }
148 }
149 }
150
151 /// <summary>
152 /// The current Token. <seealso cref="ICSharpCode.OldNRefactory.Parser.Token"/>
153 /// </summary>
154 public Token Token {
155 get {
156// Console.WriteLine("Call to Token");
157 return lastToken;
158 }
159 }
160
161 /// <summary>
162 /// The next Token (The <see cref="Token"/> after <see cref="NextToken"/> call) . <seealso cref="ICSharpCode.OldNRefactory.Parser.Token"/>
163 /// </summary>
164 public Token LookAhead {
165 get {
166// Console.WriteLine("Call to LookAhead");
167 return curToken;
168 }
169 }
170
171 /// <summary>
172 /// Constructor for the abstract lexer class.
173 /// </summary>
174 protected AbstractLexer(TextReader reader)
175 {
176 this.reader = reader;
177 }
178
179 #region System.IDisposable interface implementation
180 public virtual void Dispose()
181 {
182 reader.Close();
183 reader = null;
184 errors = null;
185 lastToken = curToken = peekToken = null;
186 specialCommentHash = null;
187 tagComments = null;
188 sb = originalValue = null;
189 }
190 #endregion
191
192 /// <summary>
193 /// Must be called before a peek operation.
194 /// </summary>
195 public void StartPeek()
196 {
197 peekToken = curToken;
198 }
199
200 /// <summary>
201 /// Gives back the next token. A second call to Peek() gives the next token after the last call for Peek() and so on.
202 /// </summary>
203 /// <returns>An <see cref="Token"/> object.</returns>
204 public Token Peek()
205 {
206// Console.WriteLine("Call to Peek");
207 if (peekToken.next == null) {
208 peekToken.next = Next();
209 specialTracker.InformToken(peekToken.next.kind);
210 }
211 peekToken = peekToken.next;
212 return peekToken;
213 }
214
215 /// <summary>
216 /// Reads the next token and gives it back.
217 /// </summary>
218 /// <returns>An <see cref="Token"/> object.</returns>
219 public virtual Token NextToken()
220 {
221 if (curToken == null) {
222 curToken = Next();
223 specialTracker.InformToken(curToken.kind);
224 //Console.WriteLine(ICSharpCode.OldNRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")");
225 return curToken;
226 }
227
228 lastToken = curToken;
229
230 if (curToken.next == null) {
231 curToken.next = Next();
232 if (curToken.next != null) {
233 specialTracker.InformToken(curToken.next.kind);
234 }
235 }
236
237 curToken = curToken.next;
238 //Console.WriteLine(ICSharpCode.OldNRefactory.Parser.CSharp.Tokens.GetTokenString(curToken.kind) + " -- " + curToken.val + "(" + curToken.kind + ")");
239 return curToken;
240 }
241
242 protected abstract Token Next();
243
244 protected static bool IsIdentifierPart(int ch)
245 {
246 if (ch == 95) return true; // 95 = '_'
247 if (ch == -1) return false;
248 return char.IsLetterOrDigit((char)ch); // accept unicode letters
249 }
250
251 protected static bool IsHex(char digit)
252 {
253 return Char.IsDigit(digit) || ('A' <= digit && digit <= 'F') || ('a' <= digit && digit <= 'f');
254 }
255
256 protected int GetHexNumber(char digit)
257 {
258 if (Char.IsDigit(digit)) {
259 return digit - '0';
260 }
261 if ('A' <= digit && digit <= 'F') {
262 return digit - 'A' + 0xA;
263 }
264 if ('a' <= digit && digit <= 'f') {
265 return digit - 'a' + 0xA;
266 }
267 errors.Error(line, col, String.Format("Invalid hex number '" + digit + "'"));
268 return 0;
269 }
270 protected Location lineBreakPosition = new Location (1, 1);
271 protected Location lastLineEnd = new Location (1, 1);
272 protected Location curLineEnd = new Location (1, 1);
273 protected void LineBreak ()
274 {
275 lastLineEnd = curLineEnd;
276 curLineEnd = new Location (col - 1, line);
277 }
278 protected bool HandleLineEnd(char ch)
279 {
280 // Handle MS-DOS or MacOS line ends.
281 if (ch == '\r') {
282 if (reader.Peek() == '\n') { // MS-DOS line end '\r\n'
283 ReaderRead(); // LineBreak (); called by ReaderRead ();
284 return true;
285 } else { // assume MacOS line end which is '\r'
286 LineBreak ();
287 return true;
288 }
289 }
290 if (ch == '\n') {
291 LineBreak ();
292 return true;
293 }
294 return false;
295 }
296
297 protected void SkipToEndOfLine()
298 {
299 int nextChar;
300 while ((nextChar = reader.Read()) != -1) {
301 char ch = (char)nextChar;
302
303 if (ch == '\r') {
304 if (reader.Peek() == '\n') {
305 lineBreakPosition = new Location (col + 2, line);
306 reader.Read();
307 } else {
308 lineBreakPosition = new Location (col + 1, line);
309 }
310 ++line;
311 col = 1;
312 return;
313 }
314
315 // Return read string, if EOL is reached
316 if (ch == '\n') {
317 lineBreakPosition = new Location (col + 1, line);
318 ++line;
319 col = 1;
320 return;
321 }
322 }
323 }
324
325 protected string ReadToEndOfLine()
326 {
327 return ReadToEndOfLine (true);
328 }
329 protected string ReadToEndOfLine(bool resetBuilder)
330 {
331 if (resetBuilder)
332 sb.Length = 0;
333 int nextChar;
334 while ((nextChar = reader.Read()) != -1) {
335 char ch = (char)nextChar;
336
337 if (nextChar == '\r') {
338 if (reader.Peek() == '\n') {
339 lineBreakPosition = new Location (col + 2, line);
340 reader.Read();
341 } else {
342 lineBreakPosition = new Location (col + 1, line);
343 }
344 ++line;
345 col = 1;
346 return sb.ToString();
347 }
348
349 // Return read string, if EOL is reached
350 if (nextChar == '\n') {
351 lineBreakPosition = new Location (col + 1, line);
352 ++line;
353 col = 1;
354 return sb.ToString();
355 }
356
357 sb.Append(ch);
358 }
359
360 // Got EOF before EOL
361 string retStr = sb.ToString();
362 col += retStr.Length;
363 return retStr;
364 }
365
366 /// <summary>
367 /// Skips to the end of the current code block.
368 /// For this, the lexer must have read the next token AFTER the token opening the
369 /// block (so that Lexer.Token is the block-opening token, not Lexer.LookAhead).
370 /// After the call, Lexer.LookAhead will be the block-closing token.
371 /// </summary>
372 public abstract void SkipCurrentBlock(int targetToken);
373 }
374}