40007-Lexer.d - This D code appears to be part of a compile…

/tests/output/d/40007-Lexer.d

http://github.com/bengardner/uncrustify · D · 2413 lines · 1731 code · 275 blank · 407 comment · 565 complexity · 8e17c2a2568f7a38960f0145c06ff5c3 MD5 · raw file
Large files are truncated click here to view the full file

/+

 *      Copyright (c) 1999-2006 by Digital Mars

 *      All Rights Reserved

 *      written by Walter Bright www.digitalmars.com

 *      License for redistribution is by either the Artistic License in artistic.txt, or the GNU General Public License in gnu.txt.

 *      See the included readme.txt for details.

 *      D Language conversion by: J Duncan

 +/



/**

 *      d language lexer

 */



module dparser.Lexer;



import dparser.Root;



import dparser.Tokens;

import dparser.Token;

import dparser.Keyword;



import dparser.Types;



import dparser.Module;

import dparser.Identifier;

import dparser.unialpha;



import dparser.OutBuffer;



//private import std.ctype;

//private import std.string;

//import dwf.core.debugapi;



int errno = 0;



//#if _WIN32 && __DMC__

// from \dm\src\include\setlocal.h

//extern "C" char * __cdecl __locale_decpoint;

char* __locale_decpoint;

//#endif

//const uint LS = 0x2028;	// UTF line separator

//const uint PS = 0x2029;	// UTF paragraph separator



//extern int isUniAlpha(unsigned u);

//extern int HtmlNamedEntity(unsigned char *p, int length);



/**

 *      Lexer object

 */



class Lexer

{

    static           Identifier[char[]]       stringtable;

    static OutBuffer stringbuffer;

    static Token     * freelist;



    Token            token;        // current token

    Module           mod;          // current module

    Loc              loc;          // for error messages

    ubyte            *base;        // pointer to start of buffer

    ubyte            *end;         // past end of buffer

    ubyte            *p;           // current character

    int              doDocComment; // collect doc comment information

    int              anyToken;     // !=0 means seen at least one token

    int              commentToken; // !=0 means comments are TOKcomment's





    this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)

    {

        if (stringbuffer is null) {

            stringbuffer = new OutBuffer;

        }

        loc = Loc(mod, 1);



        this.base         = base;

        this.end          = base + endoffset;

        this.p            = base + begoffset;

        this.mod          = mod;

        this.doDocComment = doDocComment;

        this.commentToken = commentToken;



        /*

         *      If first line starts with '#!', ignore the line

         */



        if (p[0] == '#' && p[1] == '!') {

            p += 2;

            while (true) {

                ubyte c = *p;

                switch (c) {

                case '\n':

                    p++;

                    break;



                case '\r':

                    p++;

                    if (*p == '\n') {

                        p++;

                    }

                    break;



                case 0:

                case 0x1A:

                    break;



                default:

                    if (c & 0x80) {

                        uint u = decodeUTF();

                        if (u == PS || u == LS) {

                            break;

                        }

                    }

                    p++;

                    continue;

                }

                break;

            }



            loc.linnum = 2;

        }

    }







    // generate a unique identifier for this string

    static Identifier idPool(in char[] str) {

//	    StringValue sv;

//	    uint len = s.length;

//	    StringValue sv = stringtable.update(s, len);

//	    Identifier* id = cast(Identifier*) sv.ptrvalue;

//	    if( id is null )

        if ((str in stringtable) == null) {

            stringtable[str] = new Identifier(str, TOK.TOKidentifier);

        }

        return(stringtable[str]);

    }



    static void initKeywords() {

        // build character map

        cmtable_init();



        // create keyword tokens & identifiers

        dparser.Keyword.initKeywords();



        // create standard lexer tokens

        dparser.Token.createLexerTokens();

    }



    // Combine two document comments into one.

    static char[] combineComments(char[] c1, char[] c2) {

        char[] c = c2;

        if (c1.length) {

            c = c1;

            if (c2.length) {

                c = c1 ~ "\n" ~ c2;

            }

        }

        return(c);

    }



    // Decode UTF character. Issue error messages for invalid sequences. Return decoded character, advance p to last character in UTF sequence.

    //! fix

    uint decodeUTF() {

        ubyte * s = p;

        ubyte c   = *s;



        assert(c & 0x80);

        if (!(c & 0x80)) {

            return(c);

        }



        return(cast(uint)'X');

        /*

         *  dchar u;

         *  uint len;

         *

         *

         *

         *  // Check length of remaining string up to 6 UTF-8 characters

         *  for( len = 1; len < 6 && s[len]; len++ )

         *  {

         *

         *  }

         *              /+

         *  uint idx = 0;

         *  char* msg = utf_decodeChar( s, len, &idx, &u );

         *  p += idx - 1;

         *  if( msg )

         *  {

         *              error(msg);

         *  }

         * +/

         *  return u;

         */

    }



    void error(...) {

        if ((mod !is null) && !global.gag) {

            writefln(formatLoc(loc, _arguments, _argptr));

            /*

             * char[] p = loc.toChars();

             * if( p.length )

             *  writef( "%s: ", p );

             * writefx( stdout, _arguments, _argptr, 1 );

             */

            if (global.errors >= global.max_errors) {                   // moderate blizzard of cascading messages

                throw new Exception("too many errors");

            }

        }



        global.errors++;

    }



    void errorLoc(Loc loc, ...) {

        if ((mod !is null) && !global.gag) {

            writefln(formatLoc(loc, _arguments, _argptr));

            /*

             * char[] p = loc.toChars();

             * if( p.length )

             *  writef("%s: ", p);

             * writefx(stdout, _arguments, _argptr, 1);

             */

            if (global.errors >= 20) {                  // moderate blizzard of cascading messages

                throw new Exception("too many errors");

            }

        }



        global.errors++;

    }





    TOK nextToken() {

        if (token.next) {

            Token* t = token.next;

            memcpy(&token, t, Token.sizeof);

//			t.next = freelist;

//			freelist = t;

        }

        else {

            scan(&token);

        }

//	    token.print();

        return(token.value);

    }



    Token* peek(inout Token ct) {

        Token* t;



        if (ct.next) {

            t = ct.next;

        }

        else {

            t = new Token;

            scan(t);

            t.next  = null;

            ct.next = t;

        }

        return(t);

    }



    // Turn next token in buffer into a token.



    void scan(Token* t) {

//		debug writefln("scan token");

        uint lastLine = loc.linnum;

        uint linnum;



        t.blockComment = null;

        t.lineComment  = null;

        while (true) {

            t.ptr = p;

//			debug writefln( "    p = %d, *p = ", cast(uint)p, cast(char)*p );

            switch (*p) {

            case 0:

            case 0x1a:

                t.value = TOK.TOKeof;                                           // end of file

//					debug writefln( "    EOF" );

                return;



            case ' ':

            case '\t':

            case '\v':

            case '\f':

                p++;

//					debug writefln( "    whitespace" );

                continue;                                                               // skip white space



            case '\r':

//					debug writefln( "    cr" );

                p++;

                if (*p != '\n') {                                               // if CR stands by itself

                    loc.linnum++;

                }

                continue;                                                               // skip white space



            case '\n':

//					debug writefln( "    nl" );

                p++;

                loc.linnum++;

                continue;                                                               // skip white space



            case '0':

            case '1':

            case '2':

            case '3':

            case '4':

            case '5':

            case '6':

            case '7':

            case '8':

            case '9':

                t.value = number(t);

                return;



/*

 * #if CSTRINGS

 *                          case '\'':

 *                              t.value = charConstant(t, 0);

 *                              return;

 *

 *                          case '"':

 *                              t.value = stringConstant(t,0);

 *                              return;

 *

 *                          case 'l':

 *                          case 'L':

 *                              if( p[1] == '\'')

 *                              {

 *                                  p++;

 *                                  t.value = charConstant(t, 1);

 *                                  return;

 *                              }

 *                              else if( p[1] == '"')

 *                              {

 *                                  p++;

 *                                  t.value = stringConstant(t, 1);

 *                                  return;

 *                              }

 * #else

 */

            case '\'':

//					debug writefln( "    char" );

                t.value = charConstant(t, 0);

                return;



            case 'r':

//					debug writefln( "    wysiwyg" );

                if (p[1] != '"') {

                    goto case_ident;

                }

                p++;



            case '`':

                t.value = wysiwygStringConstant(t, *p);

                return;



            case 'x':

//					debug writefln( "    hex string" );

                if (p[1] != '"') {

                    goto case_ident;

                }

                p++;

                t.value = hexStringConstant(t);

                return;





            case '"':

//					debug writefln( "    string" );

                t.value = escapeStringConstant(t, 0);

//					debug writefln( t.ustring );

                return;



            case '\\':                                  // escaped string literal

//					debug writefln( "    escaped string literal" );

                uint c;

                stringbuffer.offset = 0;

                do {

                    p++;

                    c = escapeSequence();

                    stringbuffer.write(c);

                } while (*p == '\\');

//					t.len = stringbuffer.offset;

//					stringbuffer.write(cast(byte)0);

                t.ustring = stringbuffer.toString;

//					memcpy( t.ustring.ptr, stringbuffer.data, stringbuffer.offset );

                t.postfix = 0;

                t.value   = TOK.TOKstring;

                return;



            case 'l':

            case 'L':

//	#endif



            case 'a':

            case 'b':

            case 'c':

            case 'd':

            case 'e':

            case 'f':

            case 'g':

            case 'h':

            case 'i':

            case 'j':

            case 'k':

            case 'm':

            case 'n':

            case 'o':

            case 'p':

            case 'q':                             /*case 'r':*/

            case 's':

            case 't':

            case 'u':

            case 'v':

            case 'w':                                         /*case 'x':*/

            case 'y':

            case 'z':

            case 'A':

            case 'B':

            case 'C':

            case 'D':

            case 'E':

            case 'F':

            case 'G':

            case 'H':

            case 'I':

            case 'J':

            case 'K':

            case 'M':

            case 'N':

            case 'O':

            case 'P':

            case 'Q':

            case 'R':

            case 'S':

            case 'T':

            case 'U':

            case 'V':

            case 'W':

            case 'X':

            case 'Y':

            case 'Z':

            case '_':

case_ident:

                {

//					debug writefln( "    identifier" );

                    ubyte c;

                    do {

                        c = *++p;

                    } while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));



//					sv = stringtable.update((char *)t.ptr, p - t.ptr);

                    char[] tmp;

                    tmp.length = p - t.ptr;

                    memcpy(tmp.ptr, t.ptr, p - t.ptr);

                    Identifier id;

                    Identifier * pid = tmp in stringtable;

                    if (pid) {

                        id = *pid;

                    }



                    if (id is null) {

                        id               = new Identifier(tmp, TOK.TOKidentifier);

                        stringtable[tmp] = id;

                    }



                    t.ident  = id;

                    t.value  = cast(TOK)id.value;

                    anyToken = 1;



                    // if special identifier token

                    if (*t.ptr == '_') {

                        static char date[11 + 1];

                        static char time[8 + 1];

                        static char timestamp[24 + 1];



                        if (!date[0]) {                         // lazy evaluation

                            //!!

                            /+

                             *      time_t t;

                             *      char *p;

                             *      .time(&t);

                             *      p = ctime(&t);

                             *      assert(p);

                             *      sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);

                             *      sprintf(time.ptr, "%.8s", p + 11);

                             *      sprintf(timestamp.ptr, "%.24s", p);

                             +/

                        }



                        if (mod && id is Id.FILE) {

                            t.value = TOK.TOKstring;

                            if (loc.filename.length) {

                                t.ustring = loc.filename;

                            }

                            else {

                                t.ustring = mod.ident.toChars();

                            }

                            goto Llen;

                        }

                        else if (mod && id == Id.LINE) {

                            t.value      = TOK.TOKint64v;

                            t.uns64value = loc.linnum;

                        }

                        else if (id == Id.DATE) {

                            t.value = TOK.TOKstring;

                            //! t.ustring = date;

                            goto Llen;

                        }

                        else if (id == Id.TIME) {

                            t.value = TOK.TOKstring;

                            //! t.ustring = time;

                            goto Llen;

                        }

                        else if (id == Id.TIMESTAMP) {

                            t.value = TOK.TOKstring;

                            //! t.ustring = timestamp;

Llen:

                            t.postfix = 0;

//							t.len = strlen((char *)t.ustring);

                        }

                    }

                    //printf("t.value = %d\n",t.value);

                    return;

                }



            // comments

            case '/':

                p++;

                switch (*p) {

                case '=':

                    p++;

                    t.value = TOK.TOKdivass;

                    return;



                case '*':                               // '/*'

                    p++;

                    linnum = loc.linnum;

                    while (true) {

                        while (true) {

                            ubyte c = *p;

                            switch (c) {

                            case '/':

                                break;



                            case '\n':

                                loc.linnum++;

                                p++;

                                continue;



                            case '\r':

                                p++;

                                if (*p != '\n') {

                                    loc.linnum++;

                                }

                                continue;



                            case 0:

                            case 0x1A:

                                error("unterminated /* */ comment");

                                p       = end;

                                t.value = TOK.TOKeof;

                                return;



                            default:

                                if (c & 0x80) {

                                    uint u = decodeUTF();

                                    if (u == PS || u == LS) {

                                        loc.linnum++;

                                    }

                                }

                                p++;

                                continue;

                            }

                            break;

                        }

                        p++;

                        if (p[-2] == '*' && p - 3 != t.ptr) {

                            break;

                        }

                    }



                    if (commentToken) {

                        t.value = TOK.TOKcomment;

                        return;

                    }

                    // if /** but not /**/

                    else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) {

                        getDocComment(t, lastLine == linnum);                                           //! ?

                    }

                    continue;



                case '/':                                       // do // style comments

                    linnum = loc.linnum;

                    while (1) {

                        ubyte c = *++p;

                        switch (c) {

                        case '\n':

                            break;



                        case '\r':

                            if (p[1] == '\n') {

                                p++;

                            }

                            break;



                        case 0:

                        case 0x1a:

                            if (commentToken) {

                                p       = end;

                                t.value = TOK.TOKcomment;

                                return;

                            }

                            if (doDocComment && t.ptr[2] == '/') {

                                getDocComment(t, lastLine == linnum);

                            }

                            p       = end;

                            t.value = TOK.TOKeof;

                            return;



                        default:

                            if (c & 0x80) {

                                uint u = decodeUTF();

                                if (u == PS || u == LS) {

                                    break;

                                }

                            }

                            continue;

                        }

                        break;

                    }



                    if (commentToken) {

                        p++;

                        loc.linnum++;

                        t.value = TOK.TOKcomment;

                        return;

                    }

                    if (doDocComment && t.ptr[2] == '/') {

                        getDocComment(t, lastLine == linnum);

                    }



                    p++;

                    loc.linnum++;

                    continue;



                case '+':

                    {

                        int nest;

                        linnum = loc.linnum;

                        p++;

                        nest = 1;

                        while (1) {

                            ubyte c = *p;

                            switch (c) {

                            case '/':

                                p++;

                                if (*p == '+') {

                                    p++;

                                    nest++;

                                }

                                continue;



                            case '+':

                                p++;

                                if (*p == '/') {

                                    p++;

                                    if (--nest == 0) {

                                        break;

                                    }

                                }

                                continue;



                            case '\r':

                                p++;

                                if (*p != '\n') {

                                    loc.linnum++;

                                }

                                continue;



                            case '\n':

                                loc.linnum++;

                                p++;

                                continue;



                            case 0:

                            case 0x1A:

                                error("unterminated /+ +/ comment");

                                p       = end;

                                t.value = TOK.TOKeof;

                                return;



                            default:

                                if (c & 0x80) {

                                    uint u = decodeUTF();

                                    if (u == PS || u == LS) {

                                        loc.linnum++;

                                    }

                                }

                                p++;

                                continue;

                            }

                            break;

                        }

                        if (commentToken) {

                            t.value = TOK.TOKcomment;

                            return;

                        }

                        if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) {

                            // if /++ but not /++/

                            getDocComment(t, lastLine == linnum);

                        }

                        continue;

                    }



                default:

                    break;

                }

                t.value = TOK.TOKdiv;

                return;



            case '.':

                p++;

                if (isdigit(*p)) {

                    p--;

                    t.value = inreal(t);

                }

                else if (p[0] == '.') {

                    if (p[1] == '.') {

                        p      += 2;

                        t.value = TOK.TOKdotdotdot;

                    }

                    else {

                        p++;

                        t.value = TOK.TOKslice;

                    }

                }

                else {

                    t.value = TOK.TOKdot;

                }

                return;



            case '&':

                p++;

                if (*p == '=') {

                    p++;

                    t.value = TOK.TOKandass;

                }

                else if (*p == '&') {

                    p++;

                    t.value = TOK.TOKandand;

                }

                else {

                    t.value = TOK.TOKand;

                }

                return;



            // |, ||, |=

            case '|':

                p++;

                if (*p == '=') {

                    p++;

                    t.value = TOK.TOKorass;

                }

                else if (*p == '|') {

                    p++;

                    t.value = TOK.TOKoror;

                }

                else {

                    t.value = TOK.TOKor;

                }

                return;



            case '-':

                p++;

                if (*p == '=') {

                    p++;

                    t.value = TOK.TOKminass;

                }

                else if (*p == '-') {

                    p++;

                    t.value = TOK.TOKminusminus;

                }

                else {

                    t.value = TOK.TOKmin;

                }

                return;



            // +, +=, ++

            case '+':

                p++;

                if (*p == '=') {

                    p++;

                    t.value = TOK.TOKaddass;                                    // +=

                }

                else if (*p == '+') {

                    p++;

                    t.value = TOK.TOKplusplus;                                          // ++

                }

                else {

                    t.value = TOK.TOKadd;                                                       // +

                }

                return;



            // <, <=, <<=, <<, <>=, <>

            case '<':

                p++;

                if (*p == '=') {

                    p++;

                    t.value = TOK.TOKle;                                                // <=

                }

                else if (*p == '<') {

                    p++;

                    if (*p == '=') {

                        p++;

                        t.value = TOK.TOKshlass;                                        // <<=

                    }

                    else {

                        t.value = TOK.TOKshl;                                           // <<

                    }

                }

                else if (*p == '>') {

                    p++;

                    if (*p == '=') {

                        p++;

                        t.value = TOK.TOKleg;                                           // <>=

                    }

                    else {

                        t.value = TOK.TOKlg;                                            // <>

                    }

                }

                else {

                    t.value = TOK.TOKlt;                                                // <

                }

                return;



            // >, >>, >>>, >=, >>=, >>>=

            case '>':

                p++;

                if (*p == '=') {

                    p++;

                    t.value = TOK.TOKge;                                                // >=

                }

                else if (*p == '>') {

                    p++;

                    if (*p == '=') {

                        p++;

                        t.value = TOK.TOKshrass;                                        // >>=

                    }

                    else if (*p == '>') {

                        p++;

                        if (*p == '=') {

                            p++;

                            t.value = TOK.TOKushrass;                           // >>>=

                        }

                        else {

                            t.value = TOK.TOKushr;                                      // >>>

                        }

                    }

                    else {

                        t.value = TOK.TOKshr;                                           // >>

                    }

                }

                else {

                    t.value = TOK.TOKgt;                                                // >

                }

                return;



            case '!':

                p++;

                if (*p == '=') {

                    p++;

                    if (*p == '=') {

                        p++;

                        t.value = TOK.TOKnotidentity;                                   // !==

                    }

                    else {

                        t.value = TOK.TOKnotequal;                                      // !=

                    }

                }

                else if (*p == '<') {

                    p++;

                    if (*p == '>') {

                        p++;

                        if (*p == '=') {

                            p++;

                            t.value = TOK.TOKunord;                             // !<>=

                        }

                        else {

                            t.value = TOK.TOKue;                                // !<>

                        }

                    }

                    else if (*p == '=') {

                        p++;

                        t.value = TOK.TOKug;                                    // !<=

                    }

                    else {

                        t.value = TOK.TOKuge;                                   // !<

                    }

                }

                else if (*p == '>') {

                    p++;

                    if (*p == '=') {

                        p++;

                        t.value = TOK.TOKul;                                    // !>=

                    }

                    else {

                        t.value = TOK.TOKule;                                   // !>

                    }

                }

                else {

                    t.value = TOK.TOKnot;                                       // !

                }

                return;



            case '=':

                p++;

                if (*p == '=') {

                    p++;

                    if (*p == '=') {

                        p++;

                        t.value = TOK.TOKidentity;                                      // ===

                    }

                    else {

                        t.value = TOK.TOKequal;                                         // ==

                    }

                }

                else {

                    t.value = TOK.TOKassign;                                    // =

                }

                return;



            case '~':

                p++;

                if (*p == '=') {

                    p++;

                    t.value = TOK.TOKcatass;                                            // ~=

                }

                else {

                    t.value = TOK.TOKtilde;                                             // ~

                }

                return;



            // SINGLE

            case '(': p++; t.value = TOK.TOKlparen;     return;



            case ')': p++; t.value = TOK.TOKrparen;     return;



            case '[': p++; t.value = TOK.TOKlbracket;   return;



            case ']': p++; t.value = TOK.TOKrbracket;   return;



            case '{': p++; t.value = TOK.TOKlcurly;     return;



            case '}': p++; t.value = TOK.TOKrcurly;     return;



            case '?': p++; t.value = TOK.TOKquestion;   return;



            case ',': p++; t.value = TOK.TOKcomma;              return;



            case ';': p++; t.value = TOK.TOKsemicolon;  return;



            case ':': p++; t.value = TOK.TOKcolon;              return;



            case '$': p++; t.value = TOK.TOKdollar;     return;



            // DOUBLE

            case '*': p++; if (*p == '=') {

                    p++; t.value = TOK.TOKmulass;

            }

                else {

                    t.value = TOK.TOKmul;

                } return;



            case '%': p++; if (*p == '=') {

                    p++; t.value = TOK.TOKmodass;

            }

                else {

                    t.value = TOK.TOKmod;

                } return;



            case '^': p++; if (*p == '=') {

                    p++; t.value = TOK.TOKxorass;

            }

                else {

                    t.value = TOK.TOKxor;

                } return;



// removed 148	case '~': p++; if( *p == '=' ) { p++; t.value = TOK.TOKcatass; } else t.value = TOK.TOKtilde; return;





            case '#':

                p++;

                Pragma();

                continue;



            default:

                {

                    debug writefln("    default char");

                    ubyte c = *p;

                    if (c & 0x80) {

                        uint u = decodeUTF();

                        // Check for start of unicode identifier

                        if (isUniAlpha(u)) {

                            goto case_ident;

                        }



                        if (u == PS || u == LS) {

                            loc.linnum++;

                            p++;

                            continue;

                        }

                    }

                    if (isprint(c)) {

                        error("unsupported char '%s'", cast(char)c);

                    }

                    else {

                        error("unsupported char 0x%02x", cast(ubyte)c);

                    }

                    p++;

                    continue;

                }

            }

        }

    }







    // Parse escape sequence.

    uint escapeSequence() {

        uint c;

        int  n;

        int  ndigits;



        c = *p;

        switch (c) {

        case '\'':

        case '"':

        case '?':

        case '\\':

Lconsume:

            p++;

            break;



        case 'a':       c = 7;          goto Lconsume;



        case 'b':       c = 8;          goto Lconsume;



        case 'f':       c = 12;         goto Lconsume;



        case 'n':       c = 10;         goto Lconsume;



        case 'r':       c = 13;         goto Lconsume;



        case 't':       c = 9;          goto Lconsume;



        case 'v':       c = 11;         goto Lconsume;



        case 'u':

            ndigits = 4;

            goto Lhex;



        case 'U':

            ndigits = 8;

            goto Lhex;



        case 'x':

            ndigits = 2;

Lhex:

            p++;

            c = *p;

            if (ishex(c)) {

                uint v;

                n = 0;

                v = 0;

                while (1) {

                    if (isdigit(c)) {

                        c -= '0';

                    }

                    else if (islower(c)) {

                        c -= 'a' - 10;

                    }

                    else {

                        c -= 'A' - 10;

                    }

                    v = v * 16 + c;

                    c = *++p;

                    if (++n == ndigits) {

                        break;

                    }

                    if (!ishex(c)) {

                        error("escape hex sequence has %d hex digits instead of %d", n, ndigits);

                        break;

                    }

                }

//!				    if( ndigits != 2 && !utf_isValidDchar(v))

//!						error("invalid UTF character \\U%08x", v);

                c = v;

            }

            else {

                error("undefined escape hex sequence \\%s\n", c);

            }

            break;



        case '&':                                       // named character entity

            for (ubyte *idstart = ++p; 1; p++) {

                switch (*p) {

                case ';':

                    //!!!

                    /+

                     * c = HtmlNamedEntity(idstart, p - idstart);

                     * if( c == ~0 )

                     * {

                     *      error("unnamed character entity &%.*s;", p - idstart, idstart);

                     *      c = ' ';

                     * }

                     *

                     * p++;

                     +/

                    break;



                default:

                    if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) {

                        continue;

                    }

                    error("unterminated named entity");

                    break;

                }

                break;

            }

            break;



        case 0:

        case 0x1a:                                      // end of file

            c = '\\';

            break;



        default:

            if (isoctal(c)) {

                ubyte v;

                n = 0;

                do {

                    v = v * 8 + (c - '0');

                    c = *++p;

                } while (++n < 3 && isoctal(c));

                c = v;

            }

            else {

                error("undefined escape sequence \\%s\n", c);

            }

            break;

        }

        return(c);

    }



    /**************************************

     */



    TOK wysiwygStringConstant(Token *t, int tc) {

        uint c;

        Loc  start = loc;



        p++;

        stringbuffer.offset = 0;

        while (1) {

            c = *p++;

            switch (c) {

            case '\n':

                loc.linnum++;

                break;



            case '\r':

                if (*p == '\n') {

                    continue;                           // ignore

                }

                c = '\n';                               // treat EndOfLine as \n character

                loc.linnum++;

                break;



            case 0:

            case 0x1a:

                error("unterminated string constant starting at %s", start.toChars());

                t.ustring = "";

                t.postfix = 0;

                return(TOK.TOKstring);



            case '"':

            case '`':

                if (c == tc) {

//				    t.len = stringbuffer.offset;

                    stringbuffer.write(cast(byte)0);

                    t.ustring = stringbuffer.toString;

//				    t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);

//				    memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);

                    stringPostfix(t);

                    return(TOK.TOKstring);

                }

                break;



            default:

                if (c & 0x80) {

                    p--;

                    uint u = decodeUTF();

                    p++;

                    if (u == PS || u == LS) {

                        loc.linnum++;

                    }

                    stringbuffer.write(u);

                    continue;

                }

                break;

            }

            stringbuffer.write(c);

        }

    }



    /**************************************

     * Lex hex strings:

     *	x"0A ae 34FE BD"

     */



    TOK hexStringConstant(Token *t) {

        uint c;

        Loc  start = loc;

        uint n     = 0;

        uint v;



        p++;

        stringbuffer.offset = 0;

        while (1) {

            c = *p++;

            switch (c) {

            case ' ':

            case '\t':

            case '\v':

            case '\f':

                continue;                                               // skip white space



            case '\r':

                if (*p == '\n') {

                    continue;                                           // ignore

                }



            // Treat isolated '\r' as if it were a '\n'

            case '\n':

                loc.linnum++;

                continue;



            case 0:

            case 0x1a:

                error("unterminated string constant starting at %s", start.toChars());

                t.ustring = "";

                t.postfix = 0;

                return(TOK.TOKstring);



            case '"':

                if (n & 1) {

                    error("odd number (%d) of hex characters in hex string", n);

                    stringbuffer.write(v);

                }

//				t.len = stringbuffer.offset;

//				stringbuffer.write(cast(byte)0);

                t.ustring = stringbuffer.toString;

//				t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);

//				memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);

                stringPostfix(t);

                return(TOK.TOKstring);



            default:

                if (c >= '0' && c <= '9') {

                    c -= '0';

                }

                else if (c >= 'a' && c <= 'f') {

                    c -= 'a' - 10;

                }

                else if (c >= 'A' && c <= 'F') {

                    c -= 'A' - 10;

                }

                else if (c & 0x80) {

                    p--;

                    uint u = decodeUTF();

                    p++;

                    if (u == PS || u == LS) {

                        loc.linnum++;

                    }

                    else {

                        error("non-hex character \\u%x", u);

                    }

                }

                else {

                    error("non-hex character '%s'", c);

                }

                if (n & 1) {

                    v = (v << 4) | c;

                    stringbuffer.write(v);

                }

                else {

                    v = c;

                }

                n++;

                break;

            }

        }

    }



    /**************************************

     */



    TOK escapeStringConstant(Token *t, int wide) {

        uint c;

        Loc  start = loc;



        p++;

        stringbuffer.offset = 0;

        //    debug writefln( "escape string constant: %s", std.string.toString( cast(char*)p ) );

        while (1) {

            c = *p++;

            switch (c) {

            case '\\':

                switch (*p) {

                case 'u':

                case 'U':

                case '&':

                    c = escapeSequence();

                    stringbuffer.write(c);

                    continue;



                default:

                    c = escapeSequence();

                    break;

                }

                break;



            case '\n':

                loc.linnum++;

                break;



            case '\r':

                if (*p == '\n') {

                    continue;                           // ignore

                }

                c = '\n';                               // treat EndOfLine as \n character

                loc.linnum++;

                break;



            case '"':

//			        writefln( "end of string: ", stringbuffer.toString );

                t.ustring = stringbuffer.toString().dup;

                //				t.len = stringbuffer.offset;

                //				stringbuffer.write(cast(byte)0);

                //				t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);

                //				memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);

                stringPostfix(t);



                return(TOK.TOKstring);



            case 0:

            case 0x1a:

                p--;

                error("unterminated string constant starting at %s", start.toChars());

                t.ustring = "";

//					t.len = 0;

                t.postfix = 0;

                return(TOK.TOKstring);



            default:

                if (c & 0x80) {

                    p--;

                    c = decodeUTF();

                    if (c == LS || c == PS) {

                        c = '\n';

                        loc.linnum++;

                    }

                    p++;

                    stringbuffer.write(cast(char)c);

                    continue;

                }

                break;

            }

            stringbuffer.write(cast(char)c);

//			writefln( stringbuffer.toString );

        }

    }



    //**************************************

    TOK charConstant(Token *t, int wide) {

        uint c;

        TOK  tk = TOK.TOKcharv;



        //printf("Lexer.charConstant\n");

        p++;

        c = *p++;

        switch (c) {

        case '\\':

            switch (*p) {

            case 'u':

                t.uns64value = escapeSequence();

                tk           = TOK.TOKwcharv;

                break;



            case 'U':

            case '&':

                t.uns64value = escapeSequence();

                tk           = TOK.TOKdcharv;

                break;



            default:

                t.uns64value = escapeSequence();

                break;

            }

            break;



        case '\n':

L1:

            loc.linnum++;



        case '\r':

        case 0:

        case 0x1a:

        case '\'':

            error("unterminated character constant");

            return(tk);



        default:

            if (c & 0x80) {

                p--;

                c = decodeUTF();

                p++;

                if (c == LS || c == PS) {

                    goto L1;

                }

                if (c < 0xd800 || (c >= 0xe000 && c < 0xfffe)) {

                    tk = TOK.TOKwcharv;

                }

                else {

                    tk = TOK.TOKdcharv;

                }

            }

            t.uns64value = c;

            break;

        }



        if (*p != '\'') {

            error("unterminated character constant");

            return(tk);

        }

        p++;

        return(tk);

    }



    // Get postfix of string literal.

    void stringPostfix(Token *t) {

        switch (*p) {

        case 'c':

        case 'w':

        case 'd':

            t.postfix = *p;

            p++;

            break;



        default:

            t.postfix = 0;

            break;

        }

    }



    /***************************************

     * Read \u or \U unicode sequence

     * Input:

     *	u	'u' or 'U'

     */

    /*

     * uint Wchar(uint u)

     * {

     *  uint value;

     *  uint n;

     *  ubyte c;

     *  uint nchars;

     *

     *  nchars = (u == 'U') ? 8 : 4;

     *  value = 0;

     *  for (n = 0; 1; n++)

     *  {

     * ++p;

     *              if( n == nchars)

     *                  break;

     *              c = *p;

     *              if( !ishex(c))

     *              {

     *                      error("\\%s sequence must be followed by %d hex characters", u, nchars);

     *                  break;

     *              }

     *              if( isdigit(c))

     *                  c -= '0';

     *              else if( islower(c))

     *                  c -= 'a' - 10;

     *              else

     *                  c -= 'A' - 10;

     *              value <<= 4;

     *              value |= c;

     *  }

     *  return value;

     * }

     */



    /**************************************

     * Read in a number.

     * If it's an integer, store it in tok.TKutok.Vlong.

     *	integers can be decimal, octal or hex

     *	Handle the suffixes U, UL, LU, L, etc.

     * If it's double, store it in tok.TKutok.Vdouble.

     * Returns:

     *	TKnum

     *	TKdouble,...

     */



    TOK number(Token *t) {

        //debug writefln("Lexer.number()");

        // We use a state machine to collect numbers

        enum STATE {

            STATE_initial,

            STATE_0,

            STATE_decimal,

            STATE_octal,

            STATE_octale,

            STATE_hex,

            STATE_binary,

            STATE_hex0,

            STATE_binary0,

            STATE_hexh,

            STATE_error

        }



        enum FLAGS {

            FLAGS_decimal  = 1,                         // decimal

            FLAGS_unsigned = 2,                         // u or U suffix

            FLAGS_long     = 4,                         // l or L suffix

        }

        FLAGS flags = FLAGS.FLAGS_decimal;



        int   i;

        TOK   result;

        int   base;



        stringbuffer.offset = 0;

//		stringbuffer.data = null;

        STATE state   = STATE.STATE_initial;

        ubyte * start = p;



        TOK _isreal() {

            p = start;

            return(inreal(t));

        }



        while (true) {

            char c = cast(char)*p;

            switch (state) {

            case STATE.STATE_initial:                           // opening state

                if (c == '0') {

                    state = STATE.STATE_0;

                }

                else {

                    state = STATE.STATE_decimal;

                }

                break;



            case STATE.STATE_0:

                flags = cast(FLAGS)(flags & ~FLAGS.FLAGS_decimal);

                switch (c) {

                //	#if ZEROH

//					    case 'H':			// 0h

//					    case 'h':

//							goto hexh;

                //	#endif

                case 'X':

                case 'x':

                    state = STATE.STATE_hex0;

                    break;



                case '.':

                    if (p[1] == '.') {                                          // .. is a separate token

                        goto done;

                    }



                case 'i':

                case 'f':

                case 'F':

                    goto _Real;



                //	#if ZEROH

//					    case 'E':

//					    case 'e':

//							goto case_hex;

                //	#endif

                case 'B':

                case 'b':

                    state = STATE.STATE_binary0;

                    break;



                case '0':…
Summary ✨

This D code appears to be part of a compiler or interpreter, specifically handling string values and basic types. It defines a StringValue struct with an union containing integer, pointer, or string values, as well as a character array for storing the string value. The code also includes tables and macros for mapping characters to type identifiers.
Alerts (2)

Complexity hotspot; lines 587 to 588 (total complexity: 7)
587 588