/tests/output/d/40007-Lexer.d
D | 2413 lines | 2111 code | 135 blank | 167 comment | 279 complexity | 8e17c2a2568f7a38960f0145c06ff5c3 MD5 | raw file
Possible License(s): GPL-2.0
Large files files are truncated, but you can click here to view the full file
- /+
- * Copyright (c) 1999-2006 by Digital Mars
- * All Rights Reserved
- * written by Walter Bright www.digitalmars.com
- * License for redistribution is by either the Artistic License in artistic.txt, or the GNU General Public License in gnu.txt.
- * See the included readme.txt for details.
- * D Language conversion by: J Duncan
- +/
-
- /**
- * d language lexer
- */
-
- module dparser.Lexer;
-
- import dparser.Root;
-
- import dparser.Tokens;
- import dparser.Token;
- import dparser.Keyword;
-
- import dparser.Types;
-
- import dparser.Module;
- import dparser.Identifier;
- import dparser.unialpha;
-
- import dparser.OutBuffer;
-
- //private import std.ctype;
- //private import std.string;
- //import dwf.core.debugapi;
-
- int errno = 0;
-
- //#if _WIN32 && __DMC__
- // from \dm\src\include\setlocal.h
- //extern "C" char * __cdecl __locale_decpoint;
- char* __locale_decpoint;
- //#endif
- //const uint LS = 0x2028; // UTF line separator
- //const uint PS = 0x2029; // UTF paragraph separator
-
- //extern int isUniAlpha(unsigned u);
- //extern int HtmlNamedEntity(unsigned char *p, int length);
-
- /**
- * Lexer object
- */
-
- class Lexer
- {
- static Identifier[char[]] stringtable;
- static OutBuffer stringbuffer;
- static Token * freelist;
-
- Token token; // current token
- Module mod; // current module
- Loc loc; // for error messages
- ubyte *base; // pointer to start of buffer
- ubyte *end; // past end of buffer
- ubyte *p; // current character
- int doDocComment; // collect doc comment information
- int anyToken; // !=0 means seen at least one token
- int commentToken; // !=0 means comments are TOKcomment's
-
-
- this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
- {
- if (stringbuffer is null) {
- stringbuffer = new OutBuffer;
- }
- loc = Loc(mod, 1);
-
- this.base = base;
- this.end = base + endoffset;
- this.p = base + begoffset;
- this.mod = mod;
- this.doDocComment = doDocComment;
- this.commentToken = commentToken;
-
- /*
- * If first line starts with '#!', ignore the line
- */
-
- if (p[0] == '#' && p[1] == '!') {
- p += 2;
- while (true) {
- ubyte c = *p;
- switch (c) {
- case '\n':
- p++;
- break;
-
- case '\r':
- p++;
- if (*p == '\n') {
- p++;
- }
- break;
-
- case 0:
- case 0x1A:
- break;
-
- default:
- if (c & 0x80) {
- uint u = decodeUTF();
- if (u == PS || u == LS) {
- break;
- }
- }
- p++;
- continue;
- }
- break;
- }
-
- loc.linnum = 2;
- }
- }
-
-
-
- // generate a unique identifier for this string
- static Identifier idPool(in char[] str) {
- // StringValue sv;
- // uint len = s.length;
- // StringValue sv = stringtable.update(s, len);
- // Identifier* id = cast(Identifier*) sv.ptrvalue;
- // if( id is null )
- if ((str in stringtable) == null) {
- stringtable[str] = new Identifier(str, TOK.TOKidentifier);
- }
- return(stringtable[str]);
- }
-
- static void initKeywords() {
- // build character map
- cmtable_init();
-
- // create keyword tokens & identifiers
- dparser.Keyword.initKeywords();
-
- // create standard lexer tokens
- dparser.Token.createLexerTokens();
- }
-
- // Combine two document comments into one.
- static char[] combineComments(char[] c1, char[] c2) {
- char[] c = c2;
- if (c1.length) {
- c = c1;
- if (c2.length) {
- c = c1 ~ "\n" ~ c2;
- }
- }
- return(c);
- }
-
- // Decode UTF character. Issue error messages for invalid sequences. Return decoded character, advance p to last character in UTF sequence.
- //! fix
- uint decodeUTF() {
- ubyte * s = p;
- ubyte c = *s;
-
- assert(c & 0x80);
- if (!(c & 0x80)) {
- return(c);
- }
-
- return(cast(uint)'X');
- /*
- * dchar u;
- * uint len;
- *
- *
- *
- * // Check length of remaining string up to 6 UTF-8 characters
- * for( len = 1; len < 6 && s[len]; len++ )
- * {
- *
- * }
- * /+
- * uint idx = 0;
- * char* msg = utf_decodeChar( s, len, &idx, &u );
- * p += idx - 1;
- * if( msg )
- * {
- * error(msg);
- * }
- * +/
- * return u;
- */
- }
-
- void error(...) {
- if ((mod !is null) && !global.gag) {
- writefln(formatLoc(loc, _arguments, _argptr));
- /*
- * char[] p = loc.toChars();
- * if( p.length )
- * writef( "%s: ", p );
- * writefx( stdout, _arguments, _argptr, 1 );
- */
- if (global.errors >= global.max_errors) { // moderate blizzard of cascading messages
- throw new Exception("too many errors");
- }
- }
-
- global.errors++;
- }
-
- void errorLoc(Loc loc, ...) {
- if ((mod !is null) && !global.gag) {
- writefln(formatLoc(loc, _arguments, _argptr));
- /*
- * char[] p = loc.toChars();
- * if( p.length )
- * writef("%s: ", p);
- * writefx(stdout, _arguments, _argptr, 1);
- */
- if (global.errors >= 20) { // moderate blizzard of cascading messages
- throw new Exception("too many errors");
- }
- }
-
- global.errors++;
- }
-
-
- TOK nextToken() {
- if (token.next) {
- Token* t = token.next;
- memcpy(&token, t, Token.sizeof);
- // t.next = freelist;
- // freelist = t;
- }
- else {
- scan(&token);
- }
- // token.print();
- return(token.value);
- }
-
- Token* peek(inout Token ct) {
- Token* t;
-
- if (ct.next) {
- t = ct.next;
- }
- else {
- t = new Token;
- scan(t);
- t.next = null;
- ct.next = t;
- }
- return(t);
- }
-
- // Turn next token in buffer into a token.
-
- void scan(Token* t) {
- // debug writefln("scan token");
- uint lastLine = loc.linnum;
- uint linnum;
-
- t.blockComment = null;
- t.lineComment = null;
- while (true) {
- t.ptr = p;
- // debug writefln( " p = %d, *p = ", cast(uint)p, cast(char)*p );
- switch (*p) {
- case 0:
- case 0x1a:
- t.value = TOK.TOKeof; // end of file
- // debug writefln( " EOF" );
- return;
-
- case ' ':
- case '\t':
- case '\v':
- case '\f':
- p++;
- // debug writefln( " whitespace" );
- continue; // skip white space
-
- case '\r':
- // debug writefln( " cr" );
- p++;
- if (*p != '\n') { // if CR stands by itself
- loc.linnum++;
- }
- continue; // skip white space
-
- case '\n':
- // debug writefln( " nl" );
- p++;
- loc.linnum++;
- continue; // skip white space
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- t.value = number(t);
- return;
-
- /*
- * #if CSTRINGS
- * case '\'':
- * t.value = charConstant(t, 0);
- * return;
- *
- * case '"':
- * t.value = stringConstant(t,0);
- * return;
- *
- * case 'l':
- * case 'L':
- * if( p[1] == '\'')
- * {
- * p++;
- * t.value = charConstant(t, 1);
- * return;
- * }
- * else if( p[1] == '"')
- * {
- * p++;
- * t.value = stringConstant(t, 1);
- * return;
- * }
- * #else
- */
- case '\'':
- // debug writefln( " char" );
- t.value = charConstant(t, 0);
- return;
-
- case 'r':
- // debug writefln( " wysiwyg" );
- if (p[1] != '"') {
- goto case_ident;
- }
- p++;
-
- case '`':
- t.value = wysiwygStringConstant(t, *p);
- return;
-
- case 'x':
- // debug writefln( " hex string" );
- if (p[1] != '"') {
- goto case_ident;
- }
- p++;
- t.value = hexStringConstant(t);
- return;
-
-
- case '"':
- // debug writefln( " string" );
- t.value = escapeStringConstant(t, 0);
- // debug writefln( t.ustring );
- return;
-
- case '\\': // escaped string literal
- // debug writefln( " escaped string literal" );
- uint c;
- stringbuffer.offset = 0;
- do {
- p++;
- c = escapeSequence();
- stringbuffer.write(c);
- } while (*p == '\\');
- // t.len = stringbuffer.offset;
- // stringbuffer.write(cast(byte)0);
- t.ustring = stringbuffer.toString;
- // memcpy( t.ustring.ptr, stringbuffer.data, stringbuffer.offset );
- t.postfix = 0;
- t.value = TOK.TOKstring;
- return;
-
- case 'l':
- case 'L':
- // #endif
-
- case 'a':
- case 'b':
- case 'c':
- case 'd':
- case 'e':
- case 'f':
- case 'g':
- case 'h':
- case 'i':
- case 'j':
- case 'k':
- case 'm':
- case 'n':
- case 'o':
- case 'p':
- case 'q': /*case 'r':*/
- case 's':
- case 't':
- case 'u':
- case 'v':
- case 'w': /*case 'x':*/
- case 'y':
- case 'z':
- case 'A':
- case 'B':
- case 'C':
- case 'D':
- case 'E':
- case 'F':
- case 'G':
- case 'H':
- case 'I':
- case 'J':
- case 'K':
- case 'M':
- case 'N':
- case 'O':
- case 'P':
- case 'Q':
- case 'R':
- case 'S':
- case 'T':
- case 'U':
- case 'V':
- case 'W':
- case 'X':
- case 'Y':
- case 'Z':
- case '_':
- case_ident:
- {
- // debug writefln( " identifier" );
- ubyte c;
- do {
- c = *++p;
- } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
-
- // sv = stringtable.update((char *)t.ptr, p - t.ptr);
- char[] tmp;
- tmp.length = p - t.ptr;
- memcpy(tmp.ptr, t.ptr, p - t.ptr);
- Identifier id;
- Identifier * pid = tmp in stringtable;
- if (pid) {
- id = *pid;
- }
-
- if (id is null) {
- id = new Identifier(tmp, TOK.TOKidentifier);
- stringtable[tmp] = id;
- }
-
- t.ident = id;
- t.value = cast(TOK)id.value;
- anyToken = 1;
-
- // if special identifier token
- if (*t.ptr == '_') {
- static char date[11 + 1];
- static char time[8 + 1];
- static char timestamp[24 + 1];
-
- if (!date[0]) { // lazy evaluation
- //!!
- /+
- * time_t t;
- * char *p;
- * .time(&t);
- * p = ctime(&t);
- * assert(p);
- * sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
- * sprintf(time.ptr, "%.8s", p + 11);
- * sprintf(timestamp.ptr, "%.24s", p);
- +/
- }
-
- if (mod && id is Id.FILE) {
- t.value = TOK.TOKstring;
- if (loc.filename.length) {
- t.ustring = loc.filename;
- }
- else {
- t.ustring = mod.ident.toChars();
- }
- goto Llen;
- }
- else if (mod && id == Id.LINE) {
- t.value = TOK.TOKint64v;
- t.uns64value = loc.linnum;
- }
- else if (id == Id.DATE) {
- t.value = TOK.TOKstring;
- //! t.ustring = date;
- goto Llen;
- }
- else if (id == Id.TIME) {
- t.value = TOK.TOKstring;
- //! t.ustring = time;
- goto Llen;
- }
- else if (id == Id.TIMESTAMP) {
- t.value = TOK.TOKstring;
- //! t.ustring = timestamp;
- Llen:
- t.postfix = 0;
- // t.len = strlen((char *)t.ustring);
- }
- }
- //printf("t.value = %d\n",t.value);
- return;
- }
-
- // comments
- case '/':
- p++;
- switch (*p) {
- case '=':
- p++;
- t.value = TOK.TOKdivass;
- return;
-
- case '*': // '/*'
- p++;
- linnum = loc.linnum;
- while (true) {
- while (true) {
- ubyte c = *p;
- switch (c) {
- case '/':
- break;
-
- case '\n':
- loc.linnum++;
- p++;
- continue;
-
- case '\r':
- p++;
- if (*p != '\n') {
- loc.linnum++;
- }
- continue;
-
- case 0:
- case 0x1A:
- error("unterminated /* */ comment");
- p = end;
- t.value = TOK.TOKeof;
- return;
-
- default:
- if (c & 0x80) {
- uint u = decodeUTF();
- if (u == PS || u == LS) {
- loc.linnum++;
- }
- }
- p++;
- continue;
- }
- break;
- }
- p++;
- if (p[-2] == '*' && p - 3 != t.ptr) {
- break;
- }
- }
-
- if (commentToken) {
- t.value = TOK.TOKcomment;
- return;
- }
- // if /** but not /**/
- else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) {
- getDocComment(t, lastLine == linnum); //! ?
- }
- continue;
-
- case '/': // do // style comments
- linnum = loc.linnum;
- while (1) {
- ubyte c = *++p;
- switch (c) {
- case '\n':
- break;
-
- case '\r':
- if (p[1] == '\n') {
- p++;
- }
- break;
-
- case 0:
- case 0x1a:
- if (commentToken) {
- p = end;
- t.value = TOK.TOKcomment;
- return;
- }
- if (doDocComment && t.ptr[2] == '/') {
- getDocComment(t, lastLine == linnum);
- }
- p = end;
- t.value = TOK.TOKeof;
- return;
-
- default:
- if (c & 0x80) {
- uint u = decodeUTF();
- if (u == PS || u == LS) {
- break;
- }
- }
- continue;
- }
- break;
- }
-
- if (commentToken) {
- p++;
- loc.linnum++;
- t.value = TOK.TOKcomment;
- return;
- }
- if (doDocComment && t.ptr[2] == '/') {
- getDocComment(t, lastLine == linnum);
- }
-
- p++;
- loc.linnum++;
- continue;
-
- case '+':
- {
- int nest;
- linnum = loc.linnum;
- p++;
- nest = 1;
- while (1) {
- ubyte c = *p;
- switch (c) {
- case '/':
- p++;
- if (*p == '+') {
- p++;
- nest++;
- }
- continue;
-
- case '+':
- p++;
- if (*p == '/') {
- p++;
- if (--nest == 0) {
- break;
- }
- }
- continue;
-
- case '\r':
- p++;
- if (*p != '\n') {
- loc.linnum++;
- }
- continue;
-
- case '\n':
- loc.linnum++;
- p++;
- continue;
-
- case 0:
- case 0x1A:
- error("unterminated /+ +/ comment");
- p = end;
- t.value = TOK.TOKeof;
- return;
-
- default:
- if (c & 0x80) {
- uint u = decodeUTF();
- if (u == PS || u == LS) {
- loc.linnum++;
- }
- }
- p++;
- continue;
- }
- break;
- }
- if (commentToken) {
- t.value = TOK.TOKcomment;
- return;
- }
- if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) {
- // if /++ but not /++/
- getDocComment(t, lastLine == linnum);
- }
- continue;
- }
-
- default:
- break;
- }
- t.value = TOK.TOKdiv;
- return;
-
- case '.':
- p++;
- if (isdigit(*p)) {
- p--;
- t.value = inreal(t);
- }
- else if (p[0] == '.') {
- if (p[1] == '.') {
- p += 2;
- t.value = TOK.TOKdotdotdot;
- }
- else {
- p++;
- t.value = TOK.TOKslice;
- }
- }
- else {
- t.value = TOK.TOKdot;
- }
- return;
-
- case '&':
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKandass;
- }
- else if (*p == '&') {
- p++;
- t.value = TOK.TOKandand;
- }
- else {
- t.value = TOK.TOKand;
- }
- return;
-
- // |, ||, |=
- case '|':
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKorass;
- }
- else if (*p == '|') {
- p++;
- t.value = TOK.TOKoror;
- }
- else {
- t.value = TOK.TOKor;
- }
- return;
-
- case '-':
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKminass;
- }
- else if (*p == '-') {
- p++;
- t.value = TOK.TOKminusminus;
- }
- else {
- t.value = TOK.TOKmin;
- }
- return;
-
- // +, +=, ++
- case '+':
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKaddass; // +=
- }
- else if (*p == '+') {
- p++;
- t.value = TOK.TOKplusplus; // ++
- }
- else {
- t.value = TOK.TOKadd; // +
- }
- return;
-
- // <, <=, <<=, <<, <>=, <>
- case '<':
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKle; // <=
- }
- else if (*p == '<') {
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKshlass; // <<=
- }
- else {
- t.value = TOK.TOKshl; // <<
- }
- }
- else if (*p == '>') {
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKleg; // <>=
- }
- else {
- t.value = TOK.TOKlg; // <>
- }
- }
- else {
- t.value = TOK.TOKlt; // <
- }
- return;
-
- // >, >>, >>>, >=, >>=, >>>=
- case '>':
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKge; // >=
- }
- else if (*p == '>') {
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKshrass; // >>=
- }
- else if (*p == '>') {
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKushrass; // >>>=
- }
- else {
- t.value = TOK.TOKushr; // >>>
- }
- }
- else {
- t.value = TOK.TOKshr; // >>
- }
- }
- else {
- t.value = TOK.TOKgt; // >
- }
- return;
-
- case '!':
- p++;
- if (*p == '=') {
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKnotidentity; // !==
- }
- else {
- t.value = TOK.TOKnotequal; // !=
- }
- }
- else if (*p == '<') {
- p++;
- if (*p == '>') {
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKunord; // !<>=
- }
- else {
- t.value = TOK.TOKue; // !<>
- }
- }
- else if (*p == '=') {
- p++;
- t.value = TOK.TOKug; // !<=
- }
- else {
- t.value = TOK.TOKuge; // !<
- }
- }
- else if (*p == '>') {
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKul; // !>=
- }
- else {
- t.value = TOK.TOKule; // !>
- }
- }
- else {
- t.value = TOK.TOKnot; // !
- }
- return;
-
- case '=':
- p++;
- if (*p == '=') {
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKidentity; // ===
- }
- else {
- t.value = TOK.TOKequal; // ==
- }
- }
- else {
- t.value = TOK.TOKassign; // =
- }
- return;
-
- case '~':
- p++;
- if (*p == '=') {
- p++;
- t.value = TOK.TOKcatass; // ~=
- }
- else {
- t.value = TOK.TOKtilde; // ~
- }
- return;
-
- // SINGLE
- case '(': p++; t.value = TOK.TOKlparen; return;
-
- case ')': p++; t.value = TOK.TOKrparen; return;
-
- case '[': p++; t.value = TOK.TOKlbracket; return;
-
- case ']': p++; t.value = TOK.TOKrbracket; return;
-
- case '{': p++; t.value = TOK.TOKlcurly; return;
-
- case '}': p++; t.value = TOK.TOKrcurly; return;
-
- case '?': p++; t.value = TOK.TOKquestion; return;
-
- case ',': p++; t.value = TOK.TOKcomma; return;
-
- case ';': p++; t.value = TOK.TOKsemicolon; return;
-
- case ':': p++; t.value = TOK.TOKcolon; return;
-
- case '$': p++; t.value = TOK.TOKdollar; return;
-
- // DOUBLE
- case '*': p++; if (*p == '=') {
- p++; t.value = TOK.TOKmulass;
- }
- else {
- t.value = TOK.TOKmul;
- } return;
-
- case '%': p++; if (*p == '=') {
- p++; t.value = TOK.TOKmodass;
- }
- else {
- t.value = TOK.TOKmod;
- } return;
-
- case '^': p++; if (*p == '=') {
- p++; t.value = TOK.TOKxorass;
- }
- else {
- t.value = TOK.TOKxor;
- } return;
-
- // removed 148 case '~': p++; if( *p == '=' ) { p++; t.value = TOK.TOKcatass; } else t.value = TOK.TOKtilde; return;
-
-
- case '#':
- p++;
- Pragma();
- continue;
-
- default:
- {
- debug writefln(" default char");
- ubyte c = *p;
- if (c & 0x80) {
- uint u = decodeUTF();
- // Check for start of unicode identifier
- if (isUniAlpha(u)) {
- goto case_ident;
- }
-
- if (u == PS || u == LS) {
- loc.linnum++;
- p++;
- continue;
- }
- }
- if (isprint(c)) {
- error("unsupported char '%s'", cast(char)c);
- }
- else {
- error("unsupported char 0x%02x", cast(ubyte)c);
- }
- p++;
- continue;
- }
- }
- }
- }
-
-
-
- // Parse escape sequence.
- uint escapeSequence() {
- uint c;
- int n;
- int ndigits;
-
- c = *p;
- switch (c) {
- case '\'':
- case '"':
- case '?':
- case '\\':
- Lconsume:
- p++;
- break;
-
- case 'a': c = 7; goto Lconsume;
-
- case 'b': c = 8; goto Lconsume;
-
- case 'f': c = 12; goto Lconsume;
-
- case 'n': c = 10; goto Lconsume;
-
- case 'r': c = 13; goto Lconsume;
-
- case 't': c = 9; goto Lconsume;
-
- case 'v': c = 11; goto Lconsume;
-
- case 'u':
- ndigits = 4;
- goto Lhex;
-
- case 'U':
- ndigits = 8;
- goto Lhex;
-
- case 'x':
- ndigits = 2;
- Lhex:
- p++;
- c = *p;
- if (ishex(c)) {
- uint v;
- n = 0;
- v = 0;
- while (1) {
- if (isdigit(c)) {
- c -= '0';
- }
- else if (islower(c)) {
- c -= 'a' - 10;
- }
- else {
- c -= 'A' - 10;
- }
- v = v * 16 + c;
- c = *++p;
- if (++n == ndigits) {
- break;
- }
- if (!ishex(c)) {
- error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
- break;
- }
- }
- //! if( ndigits != 2 && !utf_isValidDchar(v))
- //! error("invalid UTF character \\U%08x", v);
- c = v;
- }
- else {
- error("undefined escape hex sequence \\%s\n", c);
- }
- break;
-
- case '&': // named character entity
- for (ubyte *idstart = ++p; 1; p++) {
- switch (*p) {
- case ';':
- //!!!
- /+
- * c = HtmlNamedEntity(idstart, p - idstart);
- * if( c == ~0 )
- * {
- * error("unnamed character entity &%.*s;", p - idstart, idstart);
- * c = ' ';
- * }
- *
- * p++;
- +/
- break;
-
- default:
- if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) {
- continue;
- }
- error("unterminated named entity");
- break;
- }
- break;
- }
- break;
-
- case 0:
- case 0x1a: // end of file
- c = '\\';
- break;
-
- default:
- if (isoctal(c)) {
- ubyte v;
- n = 0;
- do {
- v = v * 8 + (c - '0');
- c = *++p;
- } while (++n < 3 && isoctal(c));
- c = v;
- }
- else {
- error("undefined escape sequence \\%s\n", c);
- }
- break;
- }
- return(c);
- }
-
- /**************************************
- */
-
- TOK wysiwygStringConstant(Token *t, int tc) {
- uint c;
- Loc start = loc;
-
- p++;
- stringbuffer.offset = 0;
- while (1) {
- c = *p++;
- switch (c) {
- case '\n':
- loc.linnum++;
- break;
-
- case '\r':
- if (*p == '\n') {
- continue; // ignore
- }
- c = '\n'; // treat EndOfLine as \n character
- loc.linnum++;
- break;
-
- case 0:
- case 0x1a:
- error("unterminated string constant starting at %s", start.toChars());
- t.ustring = "";
- t.postfix = 0;
- return(TOK.TOKstring);
-
- case '"':
- case '`':
- if (c == tc) {
- // t.len = stringbuffer.offset;
- stringbuffer.write(cast(byte)0);
- t.ustring = stringbuffer.toString;
- // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
- // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
- stringPostfix(t);
- return(TOK.TOKstring);
- }
- break;
-
- default:
- if (c & 0x80) {
- p--;
- uint u = decodeUTF();
- p++;
- if (u == PS || u == LS) {
- loc.linnum++;
- }
- stringbuffer.write(u);
- continue;
- }
- break;
- }
- stringbuffer.write(c);
- }
- }
-
- /**************************************
- * Lex hex strings:
- * x"0A ae 34FE BD"
- */
-
- TOK hexStringConstant(Token *t) {
- uint c;
- Loc start = loc;
- uint n = 0;
- uint v;
-
- p++;
- stringbuffer.offset = 0;
- while (1) {
- c = *p++;
- switch (c) {
- case ' ':
- case '\t':
- case '\v':
- case '\f':
- continue; // skip white space
-
- case '\r':
- if (*p == '\n') {
- continue; // ignore
- }
-
- // Treat isolated '\r' as if it were a '\n'
- case '\n':
- loc.linnum++;
- continue;
-
- case 0:
- case 0x1a:
- error("unterminated string constant starting at %s", start.toChars());
- t.ustring = "";
- t.postfix = 0;
- return(TOK.TOKstring);
-
- case '"':
- if (n & 1) {
- error("odd number (%d) of hex characters in hex string", n);
- stringbuffer.write(v);
- }
- // t.len = stringbuffer.offset;
- // stringbuffer.write(cast(byte)0);
- t.ustring = stringbuffer.toString;
- // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
- // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
- stringPostfix(t);
- return(TOK.TOKstring);
-
- default:
- if (c >= '0' && c <= '9') {
- c -= '0';
- }
- else if (c >= 'a' && c <= 'f') {
- c -= 'a' - 10;
- }
- else if (c >= 'A' && c <= 'F') {
- c -= 'A' - 10;
- }
- else if (c & 0x80) {
- p--;
- uint u = decodeUTF();
- p++;
- if (u == PS || u == LS) {
- loc.linnum++;
- }
- else {
- error("non-hex character \\u%x", u);
- }
- }
- else {
- error("non-hex character '%s'", c);
- }
- if (n & 1) {
- v = (v << 4) | c;
- stringbuffer.write(v);
- }
- else {
- v = c;
- }
- n++;
- break;
- }
- }
- }
-
- /**************************************
- */
-
- TOK escapeStringConstant(Token *t, int wide) {
- uint c;
- Loc start = loc;
-
- p++;
- stringbuffer.offset = 0;
- // debug writefln( "escape string constant: %s", std.string.toString( cast(char*)p ) );
- while (1) {
- c = *p++;
- switch (c) {
- case '\\':
- switch (*p) {
- case 'u':
- case 'U':
- case '&':
- c = escapeSequence();
- stringbuffer.write(c);
- continue;
-
- default:
- c = escapeSequence();
- break;
- }
- break;
-
- case '\n':
- loc.linnum++;
- break;
-
- case '\r':
- if (*p == '\n') {
- continue; // ignore
- }
- c = '\n'; // treat EndOfLine as \n character
- loc.linnum++;
- break;
-
- case '"':
- // writefln( "end of string: ", stringbuffer.toString );
- t.ustring = stringbuffer.toString().dup;
- // t.len = stringbuffer.offset;
- // stringbuffer.write(cast(byte)0);
- // t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
- // memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
- stringPostfix(t);
-
- return(TOK.TOKstring);
-
- case 0:
- case 0x1a:
- p--;
- error("unterminated string constant starting at %s", start.toChars());
- t.ustring = "";
- // t.len = 0;
- t.postfix = 0;
- return(TOK.TOKstring);
-
- default:
- if (c & 0x80) {
- p--;
- c = decodeUTF();
- if (c == LS || c == PS) {
- c = '\n';
- loc.linnum++;
- }
- p++;
- stringbuffer.write(cast(char)c);
- continue;
- }
- break;
- }
- stringbuffer.write(cast(char)c);
- // writefln( stringbuffer.toString );
- }
- }
-
- //**************************************
- TOK charConstant(Token *t, int wide) {
- uint c;
- TOK tk = TOK.TOKcharv;
-
- //printf("Lexer.charConstant\n");
- p++;
- c = *p++;
- switch (c) {
- case '\\':
- switch (*p) {
- case 'u':
- t.uns64value = escapeSequence();
- tk = TOK.TOKwcharv;
- break;
-
- case 'U':
- case '&':
- t.uns64value = escapeSequence();
- tk = TOK.TOKdcharv;
- break;
-
- default:
- t.uns64value = escapeSequence();
- break;
- }
- break;
-
- case '\n':
- L1:
- loc.linnum++;
-
- case '\r':
- case 0:
- case 0x1a:
- case '\'':
- error("unterminated character constant");
- return(tk);
-
- default:
- if (c & 0x80) {
- p--;
- c = decodeUTF();
- p++;
- if (c == LS || c == PS) {
- goto L1;
- }
- if (c < 0xd800 || (c >= 0xe000 && c < 0xfffe)) {
- tk = TOK.TOKwcharv;
- }
- else {
- tk = TOK.TOKdcharv;
- }
- }
- t.uns64value = c;
- break;
- }
-
- if (*p != '\'') {
- error("unterminated character constant");
- return(tk);
- }
- p++;
- return(tk);
- }
-
- // Get postfix of string literal.
- void stringPostfix(Token *t) {
- switch (*p) {
- case 'c':
- case 'w':
- case 'd':
- t.postfix = *p;
- p++;
- break;
-
- default:
- t.postfix = 0;
- break;
- }
- }
-
- /***************************************
- * Read \u or \U unicode sequence
- * Input:
- * u 'u' or 'U'
- */
- /*
- * uint Wchar(uint u)
- * {
- * uint value;
- * uint n;
- * ubyte c;
- * uint nchars;
- *
- * nchars = (u == 'U') ? 8 : 4;
- * value = 0;
- * for (n = 0; 1; n++)
- * {
- * ++p;
- * if( n == nchars)
- * break;
- * c = *p;
- * if( !ishex(c))
- * {
- * error("\\%s sequence must be followed by %d hex characters", u, nchars);
- * break;
- * }
- * if( isdigit(c))
- * c -= '0';
- * else if( islower(c))
- * c -= 'a' - 10;
- * else
- * c -= 'A' - 10;
- * value <<= 4;
- * value |= c;
- * }
- * return value;
- * }
- */
-
- /**************************************
- * Read in a number.
- * If it's an integer, store it in tok.TKutok.Vlong.
- * integers can be decimal, octal or hex
- * Handle the suffixes U, UL, LU, L, etc.
- * If it's double, store it in tok.TKutok.Vdouble.
- * Returns:
- * TKnum
- * TKdouble,...
- */
-
- TOK number(Token *t) {
- //debug writefln("Lexer.number()");
- // We use a state machine to collect numbers
- enum STATE {
- STATE_initial,
- STATE_0,
- STATE_decimal,
- STATE_octal,
- STATE_octale,
- STATE_hex,
- STATE_binary,
- STATE_hex0,
- STATE_binary0,
- STATE_hexh,
- STATE_error
- }
-
- enum FLAGS {
- FLAGS_decimal = 1, // decimal
- FLAGS_unsigned = 2, // u or U suffix
- FLAGS_long = 4, // l or L suffix
- }
- FLAGS flags = FLAGS.FLAGS_decimal;
-
- int i;
- TOK result;
- int base;
-
- stringbuffer.offset = 0;
- // stringbuffer.data = null;
- STATE state = STATE.STATE_initial;
- ubyte * start = p;
-
- TOK _isreal() {
- p = start;
- return(inreal(t));
- }
-
- while (true) {
- char c = cast(char)*p;
- switch (state) {
- case STATE.STATE_initial: // opening state
- if (c == '0') {
- state = STATE.STATE_0;
- }
- else {
- state = STATE.STATE_decimal;
- }
- break;
-
- case STATE.STATE_0:
- flags = cast(FLAGS)(flags & ~FLAGS.FLAGS_decimal);
- switch (c) {
- // #if ZEROH
- // case 'H': // 0h
- // case 'h':
- // goto hexh;
- // #endif
- case 'X':
- case 'x':
- state = STATE.STATE_hex0;
- break;
-
- case '.':
- if (p[1] == '.') { // .. is a separate token
- goto done;
- }
-
- case 'i':
- case 'f':
- case 'F':
- goto _Real;
-
- // #if ZEROH
- // case 'E':
- // case 'e':
- // goto case_hex;
- // #endif
- case 'B':
- case 'b':
- state = STATE.STATE_binary0;
- break;
-
- case '0':
- …
Large files files are truncated, but you can click here to view the full file