/Scanner.cs
C# | 488 lines | 421 code | 50 blank | 17 comment | 83 complexity | 0f49322992a25512a4e38ed2eda315bb MD5 | raw file
- using System;
- using System.Collections.Generic;
- using System.Collections;
- using System.Linq;
- using System.Text;
- using System.IO;
- namespace Compiler_
- {
- class Scanner
- {
- private string m_fileLoc;
- private char[] m_input;
- private int m_charPtr;
- private int m_lineNum;
- private int m_colNum;
- private Token m_prev_Token;
- Stack<Dictionary<string, Record>> SYMBOL_TABLE_STACK;
- public Scanner(string file, char[] input, Stack<Dictionary<string, Record>> Symb)
- {
- m_fileLoc = file;
- m_charPtr = 0;
- m_colNum = 1;
- m_lineNum = 1;
- m_input = input;
- SYMBOL_TABLE_STACK = Symb;
- }
- //Bootstrap + Entry point
- static public void Scan(Scanner scan)
- {
- scan.GetToken();
- }
- static public Token ScanAndRet(Scanner scan)
- {
- scan.GetToken();
- return scan.TOKEN;
- }
-
- private void GetToken()
- {
- Token tk = new Token();
- RESTART:
- try
- {
- //Keyword or Identifier
- if (IsLetter())
- {
- ProcessKeyword(ref tk);
- }
- //Integer or Real or Double etc..
- else if (IsNumeric())
- {
- ProcessNumerics(ref tk);
- }
- //Space
- else if (IsWhiteSpaceOrSemiColon())
- {
- ProcessWhitespace(ref tk);
- }
- //Operators
- else if (IsCharOperator(m_input[m_charPtr]))
- {
- ProcessOperators(ref tk);
- }
- //String
- else if (IsString(m_input[m_charPtr]))
- {
- ProcessString(ref tk);
- }
- //Restart -- add error checking later
- else
- goto RESTART;
- }
- catch (IndexOutOfRangeException e)
- {
- tk.TOKEN_TYPE = TOKEN_TYPES.TK_EOF;
- }
- catch (TruncationCharErrorException e)
- {
- tk.TOKEN_VALUE = tk.TOKEN_CURRENT;
- tk.TOKEN_TYPE = GetKeywordOrIdentType(tk);
- tk.TOKEN_LINE = m_lineNum;
- }
- catch (BadIdentifierFormatException e)
- {
- tk.TOKEN_VALUE = TOKEN_TYPES.TK_ERROR;
- }
-
- catch (RestartException e)
- {
- goto RESTART;
- }
- TOKEN = tk;
-
- }
- void ProcessKeyword(ref Token tk)
- {
- tk.TOKEN_COL = m_colNum;
- while (char.IsLetterOrDigit(m_input[m_charPtr]))
- {
- tk.TOKEN_CURRENT = tk.TOKEN_CURRENT + m_input[m_charPtr++];
- m_colNum++;
- if (tk.TOKEN_CURRENT.Length > 31)
- throw new TruncationCharErrorException();
- }
- tk.TOKEN_VALUE = tk.TOKEN_CURRENT;
- tk.TOKEN_TYPE = GetKeywordOrIdentType(tk);
- tk.TOKEN_LINE = m_lineNum;
-
- }
- void ProcessNumerics(ref Token tk)
- {
- tk.TOKEN_COL = m_colNum;
- //Integer
- while (char.IsDigit(m_input[m_charPtr]))
- {
- tk.TOKEN_CURRENT = tk.TOKEN_CURRENT + m_input[m_charPtr++];
- m_colNum++;
- }
- tk.TOKEN_TYPE = TOKEN_TYPES.TK_INTLIT;
- //Range -- ignore, let operator handle it
- if (m_input[m_charPtr] == '.' && m_input[m_charPtr + 1] == '.')
- { }
- //Real
- else if (m_input[m_charPtr] == '.')
- {
- tk.TOKEN_CURRENT += m_input[m_charPtr++];
- while (char.IsDigit(m_input[m_charPtr]))
- {
- tk.TOKEN_CURRENT = tk.TOKEN_CURRENT + m_input[m_charPtr++];
- m_colNum++;
- }
- tk.TOKEN_TYPE = TOKEN_TYPES.TK_REALLIT;
- }
- //Bad Format
- if (!IsWhiteSpaceOrSemiColon() &&
- !IsCharOperator(m_input[m_charPtr]))
- throw new BadIdentifierFormatException();
- tk.TOKEN_VALUE = tk.TOKEN_CURRENT;
- tk.TOKEN_LINE = m_lineNum;
-
- }
- private void ProcessWhitespace(ref Token tk)
- {
- //check newline
- if (m_input[m_charPtr] == 0xA)
- {
- m_lineNum++;
- m_colNum = 1;
- }
- else
- {
- m_colNum++;
- }
- m_charPtr++;
- throw new RestartException();
- }
- private void ProcessOperators(ref Token tk)
- {
- tk.TOKEN_COL = m_colNum;
- tk.TOKEN_VALUE = GetCharOperator(m_input[m_charPtr]);
- tk.TOKEN_TYPE = GetOperatorType(tk);
- tk.TOKEN_LINE = m_lineNum;
-
- //Ignore comments -- of type {
- if (tk.TOKEN_TYPE == TOKEN_TYPES.TK_LBRACKETOP)
- {
- while (m_input[m_charPtr] != '}')
- {
- m_charPtr++;
- m_colNum++;
- if (m_input[m_charPtr] == 0xA)
- m_lineNum++;
- }
- m_charPtr++;
- m_colNum++;
- throw new RestartException();
- }
- //Ignore comments -- of type (*
- else if (tk.TOKEN_TYPE == TOKEN_TYPES.TK_LCOLASTOP)
- {
- string temp = "";
- while (temp != "*)")
- {
- m_charPtr++;
- m_colNum++;
- temp = "";
- temp += m_input[m_charPtr];
- temp += m_input[m_charPtr + 1];
- if (m_input[m_charPtr] == 0xA)
- m_lineNum++;
- }
- m_charPtr += 2;
- m_colNum += 2;
- throw new RestartException();
- }
- }
- private void ProcessString(ref Token tk)
- {
- tk.TOKEN_COL = m_colNum;
- m_colNum++;
- m_charPtr++;
- do
- {
- tk.TOKEN_CURRENT += m_input[m_charPtr];
- m_colNum++;
- m_charPtr++;
- } while (m_input[m_charPtr] != '\'' || (m_input[m_charPtr] == '\'' && m_input[m_charPtr + 1] == '\''));
- tk.TOKEN_VALUE = tk.TOKEN_CURRENT;
- tk.TOKEN_TYPE = TOKEN_TYPES.TK_STRING;
-
- tk.TOKEN_LINE = m_lineNum;
- m_colNum++;
- m_charPtr++;
- }
- private TOKEN_TYPES GetKeywordOrIdentType(Token tk)
- {
- string key = (string)tk.TOKEN_VALUE;
- TOKEN_TYPES type = new TOKEN_TYPES();
- switch (key)
- {
- case "and" : type = TOKEN_TYPES.TK_AND; break;
- case "absolute": type = TOKEN_TYPES.TK_ABSOLUTE; break;
- case "array": type = TOKEN_TYPES.TK_ARRAY; break;
- case "asm": type = TOKEN_TYPES.TK_ASM; break;
- case "begin": type = TOKEN_TYPES.TK_BEGIN; break;
- case "boolean": type = TOKEN_TYPES.TK_BOOLEAN; break;
- case "case": type = TOKEN_TYPES.TK_CASE; break;
- case "const": type = TOKEN_TYPES.TK_CONST; break;
- case "constructor": type = TOKEN_TYPES.TK_CONSTRUCTOR; break;
- case "destructor": type = TOKEN_TYPES.TK_DESTRUCTOR; break;
- case "div": type = TOKEN_TYPES.TK_DIV; break;
- case "do": type = TOKEN_TYPES.TK_DO; break;
- case "downto": type = TOKEN_TYPES.TK_DOWNTO; break;
- case "else": type = TOKEN_TYPES.TK_ELSE; break;
- case "end": type = TOKEN_TYPES.TK_END; break;
- case "false": type = TOKEN_TYPES.TK_FALSE; break;
- case "file": type = TOKEN_TYPES.TK_FILE; break;
- case "for": type = TOKEN_TYPES.TK_FOR; break;
- case "function": type = TOKEN_TYPES.TK_FUNCTION; break;
- case "goto": type = TOKEN_TYPES.TK_GOTO; break;
- case "if": type = TOKEN_TYPES.TK_IF; break;
- case "implementation": type = TOKEN_TYPES.TK_IMPLEMENTATION; break;
- case "in": type = TOKEN_TYPES.TK_IN; break;
- case "inherited": type = TOKEN_TYPES.TK_INHERITED; break;
- case "inline": type = TOKEN_TYPES.TK_INLINE; break;
- case "interface": type = TOKEN_TYPES.TK_INTERFACE; break;
- case "integer": type = TOKEN_TYPES.TK_INTEGER; break;
- case "label": type = TOKEN_TYPES.TK_LABEL; break;
- case "mod": type = TOKEN_TYPES.TK_MOD; break;
- case "nil": type = TOKEN_TYPES.TK_NIL; break;
- case "not": type = TOKEN_TYPES.TK_NOT; break;
- case "object": type = TOKEN_TYPES.TK_OBJECT; break;
- case "of": type = TOKEN_TYPES.TK_OF; break;
- case "on": type = TOKEN_TYPES.TK_ON; break;
- case "operator": type = TOKEN_TYPES.TK_OPERATOR; break;
- case "or": type = TOKEN_TYPES.TK_OR; break;
- case "packed": type = TOKEN_TYPES.TK_PACKED; break;
- case "procedure": type = TOKEN_TYPES.TK_PROCEDURE; break;
- case "program": type = TOKEN_TYPES.TK_PROGRAM; break;
- case "record": type = TOKEN_TYPES.TK_RECORD; break;
- case "reintroduce": type = TOKEN_TYPES.TK_REINTRODUCE; break;
- case "repeat": type = TOKEN_TYPES.TK_REPEAT; break;
- case "self": type = TOKEN_TYPES.TK_SELF; break;
- case "set": type = TOKEN_TYPES.TK_SET; break;
- case "shl": type = TOKEN_TYPES.TK_SHL; break;
- case "shr": type = TOKEN_TYPES.TK_SHR; break;
- case "string": type = TOKEN_TYPES.TK_STRING; break;
- case "then": type = TOKEN_TYPES.TK_THEN; break;
- case "true": type = TOKEN_TYPES.TK_TRUE; break;
- case "to": type = TOKEN_TYPES.TK_TO; break;
- case "type": type = TOKEN_TYPES.TK_TYPE; break;
- case "unit": type = TOKEN_TYPES.TK_UNIT; break;
- case "until": type = TOKEN_TYPES.TK_UNTIL; break;
- case "uses": type = TOKEN_TYPES.TK_USES; break;
- case "var": type = TOKEN_TYPES.TK_VAR; break;
- case "while": type = TOKEN_TYPES.TK_WHILE; break;
- case "with": type = TOKEN_TYPES.TK_WITH; break;
- case "xor": type = TOKEN_TYPES.TK_XOR; break;
- default: type = GetTokenIfNotKeyword(key); break;
- }
- return type;
- }
- //Get the TOKEN_TYPE of the Variable or Id if a record of it exists in the stack
- private TOKEN_TYPES GetTokenIfNotKeyword(string key)
- {
- Record rec = new Record();
- rec.TYPE_LT = TOKEN_TYPES.TK_ID;
- Stack<Dictionary<string, Record>> tempStack = new Stack<Dictionary<string, Record>>();
- bool found = false;
- int STACK_COUNT = SYMBOL_TABLE_STACK.Count;
- //return if the SYMBOL TABLE IS EMPTY
- if (SYMBOL_TABLE_STACK.Count == 0)
- return rec.TYPE_LT;
- while(SYMBOL_TABLE_STACK.Count!=0)
- {
-
- Dictionary<string, Record> symbolTable = SYMBOL_TABLE_STACK.Pop();
- tempStack.Push(symbolTable);
- foreach (KeyValuePair<string, Record> keyV in symbolTable)
- if (keyV.Key == key)
- {
- rec = keyV.Value;
- found = true;
- }
- }
- //Pop SymbolTables back into the SYMBOL_TABLE_STACK
- for(int i=0; i<STACK_COUNT; i++)
- SYMBOL_TABLE_STACK.Push(tempStack.Pop());
- return rec.TYPE_LT;
- }
- private TOKEN_TYPES GetOperatorType(Token tk)
- {
- string key = (string)tk.TOKEN_VALUE;
- TOKEN_TYPES type = new TOKEN_TYPES();
- switch (key)
- {
- case "+": type = TOKEN_TYPES.TK_PLUSOP; break;
- case "-": type = TOKEN_TYPES.TK_MINUSOP; break;
- case "*": type = TOKEN_TYPES.TK_MULTOP; break;
- case "/": type = TOKEN_TYPES.TK_DIVOP; break;
- case "=": type = TOKEN_TYPES.TK_EQUALOP; break;
- case "<": type = TOKEN_TYPES.TK_LTOP; break;
- case ">": type = TOKEN_TYPES.TK_GTOP; break;
- case ".": type = TOKEN_TYPES.TK_DOTOP; break;
- case ",": type = TOKEN_TYPES.TK_COMMAOP; break;
- case "(": type = TOKEN_TYPES.TK_LPARENOP; break;
- case ")": type = TOKEN_TYPES.TK_RPARENOP; break;
- case ":": type = TOKEN_TYPES.TK_COLONOP; break;
- case ";": type = TOKEN_TYPES.TK_SEMICOLON; break;
- case "^": type = TOKEN_TYPES.TK_POWEROP; break;
- case "@": type = TOKEN_TYPES.TK_ATOP; break;
- case "{": type = TOKEN_TYPES.TK_LBRACKETOP; break;
- case "}": type = TOKEN_TYPES.TK_RBRACKETOP; break;
- case "$": type = TOKEN_TYPES.TK_DOLLAROP; break;
- case "#": type = TOKEN_TYPES.TK_POUNDOP; break;
- case "&": type = TOKEN_TYPES.TK_AMPERSANDOP; break;
- case "%": type = TOKEN_TYPES.TK_PERCENTOP; break;
- case "[": type = TOKEN_TYPES.TK_LARRAYBRACK; break;
- case "]": type = TOKEN_TYPES.TK_RARRAYBRACK; break;
- case "<<": type = TOKEN_TYPES.TK_SHIFTLOP; break;
- case ">>": type = TOKEN_TYPES.TK_SHIFTROP; break;
- case "**": type = TOKEN_TYPES.TK_DOUBASTOP; break;
- case "<>": type = TOKEN_TYPES.TK_NOTEQOP; break;
- case "><": type = TOKEN_TYPES.TK_NOTEQ2OP; break;
- case "<=": type = TOKEN_TYPES.TK_LTEQOP; break;
- case ">=": type = TOKEN_TYPES.TK_GTEQOP; break;
- case ":=": type = TOKEN_TYPES.TK_COLEQOP; break;
- case "+=": type = TOKEN_TYPES.TK_PLUSEQOP; break;
- case "-=": type = TOKEN_TYPES.TK_MINUSEQOP; break;
- case "*=": type = TOKEN_TYPES.TK_MULTEQOP; break;
- case "/=": type = TOKEN_TYPES.TK_DIVEQOP; break;
- case "(*": type = TOKEN_TYPES.TK_LCOLASTOP; break;
- case "..": type = TOKEN_TYPES.TK_ARRAYRNGOP; break;
- }
- return type;
- }
- public Token TOKEN
- {
- get { return Global.CURRENT_TOKEN; }
- set
- {
- Global.PREV_TOKEN = Global.CURRENT_TOKEN;
- Global.CURRENT_TOKEN = value;
- }
-
- }
- public Token PREVTOKEN
- {
- get { return Global.PREV_TOKEN; }
- }
- public bool IsLetter()
- {
- return (char.IsLetter(m_input[m_charPtr]));
- }
- public bool IsNumeric()
- {
- return (char.IsDigit(m_input[m_charPtr]));
- }
- public bool IsString(char x)
- {
- return(x=='\'');
- }
- public bool IsWhiteSpaceOrSemiColon()
- {
- return (m_input[m_charPtr] <= 0x20);
- }
- public bool IsCharOperator(char x)
- {
- bool op = false;
- switch (x)
- {
- case '+': case '-': case '*': case '/': case '=': case '<': case '>': case '[': case ']': case '.': case ',': case '(':
- case ')': case ':': case '^': case '@': case '{': case '}': case '$': case '#': case '&': case '%': case ';':
- op = true;
- break;
- }
- return op;
- }
- public string GetCharOperator(char x)
- {
- string op = x.ToString();
- m_charPtr++;
- m_colNum++;
- if (m_charPtr == m_input.Length) return op;
- switch (x)
- {
- case '<' :
- if (m_input[m_charPtr] == '>' || m_input[m_charPtr] == '<' || m_input[m_charPtr] == '=')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- case '>' :
- if (m_input[m_charPtr] == '>' || m_input[m_charPtr] == '<' || m_input[m_charPtr] == '=')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- case '*' :
- if (m_input[m_charPtr] == '*')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- case ':' :
- if (m_input[m_charPtr] == '=')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- case '+' :
- if (m_input[m_charPtr] == '=')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- case '-' :
- if (m_input[m_charPtr] == '=')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- case '/' :
- if (m_input[m_charPtr] == '=')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- case '(' :
- if (m_input[m_charPtr] == '*' || m_input[m_charPtr] == '.')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- case '.' :
- if (m_input[m_charPtr] == ')' || m_input[m_charPtr] == '.')
- { op += m_input[m_charPtr++]; m_colNum++; }
- break;
- }
- return op;
- }
- }
- }