/std/d/lexer.d
D | 2165 lines | 1979 code | 109 blank | 77 comment | 340 complexity | db342afc581162bad932f3eb5d95968f MD5 | raw file
- module std.d.lexer;
- import std.typecons;
- import std.typetuple;
- import std.array;
- import std.algorithm;
- import std.range;
- import std.lexer;
- private enum operators = [
- ",", ".", "..", "...", "/", "/=", "!", "!<", "!<=", "!<>", "!<>=", "!=",
- "!>", "!>=", "$", "%", "%=", "&", "&&", "&=", "(", ")", "*", "*=", "+", "++",
- "+=", "-", "--", "-=", ":", ";", "<", "<<", "<<=", "<=", "<>", "<>=", "=",
- "==", "=>", ">", ">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[", "]", "^",
- "^=", "^^", "^^=", "{", "|", "|=", "||", "}", "~", "~="
- ];
- private enum keywords = [
- "abstract", "alias", "align", "asm", "assert", "auto", "body", "bool",
- "break", "byte", "case", "cast", "catch", "cdouble", "cent", "cfloat",
- "char", "class", "const", "continue", "creal", "dchar", "debug", "default",
- "delegate", "delete", "deprecated", "do", "double", "else", "enum",
- "export", "extern", "false", "final", "finally", "float", "for", "foreach",
- "foreach_reverse", "function", "goto", "idouble", "if", "ifloat",
- "immutable", "import", "in", "inout", "int", "interface", "invariant",
- "ireal", "is", "lazy", "long", "macro", "mixin", "module", "new", "nothrow",
- "null", "out", "override", "package", "pragma", "private", "protected",
- "public", "pure", "real", "ref", "return", "scope", "shared", "short",
- "static", "struct", "super", "switch", "synchronized", "template", "this",
- "throw", "true", "try", "typedef", "typeid", "typeof", "ubyte", "ucent",
- "uint", "ulong", "union", "unittest", "ushort", "version", "virtual", "void",
- "volatile", "wchar", "while", "with", "__DATE__", "__EOF__", "__FILE__",
- "__FUNCTION__", "__gshared", "__LINE__", "__MODULE__", "__parameters",
- "__PRETTY_FUNCTION__", "__TIME__", "__TIMESTAMP__", "__traits", "__vector",
- "__VENDOR__", "__VERSION__"
- ];
- private enum dynamicTokens = [
- "specialTokenSequence", "comment", "identifier", "scriptLine",
- "whitespace", "doubleLiteral", "floatLiteral", "idoubleLiteral",
- "ifloatLiteral", "intLiteral", "longLiteral", "realLiteral",
- "irealLiteral", "uintLiteral", "ulongLiteral", "characterLiteral",
- "dstringLiteral", "stringLiteral", "wstringLiteral"
- ];
- private enum pseudoTokenHandlers = [
- "\"", "lexStringLiteral",
- "`", "lexWysiwygString",
- "//", "lexSlashSlashComment",
- "/*", "lexSlashStarComment",
- "/+", "lexSlashPlusComment",
- ".", "lexDot",
- "'", "lexCharacterLiteral",
- "0", "lexNumber",
- "1", "lexDecimal",
- "2", "lexDecimal",
- "3", "lexDecimal",
- "4", "lexDecimal",
- "5", "lexDecimal",
- "6", "lexDecimal",
- "7", "lexDecimal",
- "8", "lexDecimal",
- "9", "lexDecimal",
- "q\"", "lexDelimitedString",
- "q{", "lexTokenString",
- "r\"", "lexWysiwygString",
- "x\"", "lexHexString",
- " ", "lexWhitespace",
- "\t", "lexWhitespace",
- "\r", "lexWhitespace",
- "\n", "lexWhitespace",
- "\u2028", "lexLongNewline",
- "\u2029", "lexLongNewline",
- "#!", "lexScriptLine",
- "#line", "lexSpecialTokenSequence"
- ];
- public alias IdType = TokenIdType!(operators, dynamicTokens, keywords);
- public alias str = tokenStringRepresentation!(IdType, operators, dynamicTokens, keywords);
- public template tok(string token)
- {
- alias tok = TokenId!(IdType, operators, dynamicTokens, keywords, token);
- }
- private enum extraFields = q{
- string comment;
- int opCmp(size_t i) const pure nothrow @safe {
- if (index < i) return -1;
- if (index > i) return 1;
- return 0;
- }
- };
- public alias Token = std.lexer.TokenStructure!(IdType, extraFields);
- /**
- * Configure string lexing behavior
- */
- public enum StringBehavior : ubyte
- {
- /// Do not include quote characters, process escape sequences
- compiler = 0b0000_0000,
- /// Opening quotes, closing quotes, and string suffixes are included in the
- /// string token
- includeQuoteChars = 0b0000_0001,
- /// String escape sequences are not replaced
- notEscaped = 0b0000_0010,
- /// Not modified at all. Useful for formatters or highlighters
- source = includeQuoteChars | notEscaped
- }
- /**
- * Configure whitespace handling behavior
- */
- public enum WhitespaceBehavior : ubyte
- {
- /// Whitespace is skipped
- skip,
- /// Whitespace is treated as a token
- include
- }
- /**
- * Configure special token handling behavior
- */
- public enum SpecialTokenBehavior : ubyte
- {
- /// Special tokens are skipped
- skip,
- /// Special tokens are treated as a token
- include
- }
- /**
- * Configure comment handling behavior
- */
- public enum CommentBehavior : ubyte
- {
- /// Comments are attached to the non-whitespace token that follows them
- attach,
- /// Comments are tokens, and can be returned by calls to the token range's front()
- include
- }
- public struct LexerConfig
- {
- string fileName;
- StringBehavior stringBehavior;
- WhitespaceBehavior whitespaceBehavior;
- CommentBehavior commentBehavior;
- SpecialTokenBehavior specialTokenBehavior;
- }
- public bool isBasicType(IdType type) nothrow pure @safe
- {
- switch (type)
- {
- case tok!"int":
- case tok!"uint":
- case tok!"double":
- case tok!"idouble":
- case tok!"float":
- case tok!"ifloat":
- case tok!"short":
- case tok!"ushort":
- case tok!"long":
- case tok!"ulong":
- case tok!"char":
- case tok!"wchar":
- case tok!"dchar":
- case tok!"bool":
- case tok!"void":
- case tok!"cent":
- case tok!"ucent":
- case tok!"real":
- case tok!"ireal":
- case tok!"byte":
- case tok!"ubyte":
- case tok!"cdouble":
- case tok!"cfloat":
- case tok!"creal":
- return true;
- default:
- return false;
- }
- }
- public bool isNumberLiteral(IdType type) nothrow pure @safe
- {
- switch (type)
- {
- case tok!"doubleLiteral":
- case tok!"floatLiteral":
- case tok!"idoubleLiteral":
- case tok!"ifloatLiteral":
- case tok!"intLiteral":
- case tok!"longLiteral":
- case tok!"realLiteral":
- case tok!"irealLiteral":
- case tok!"uintLiteral":
- case tok!"ulongLiteral":
- return true;
- default:
- return false;
- }
- }
- public bool isOperator(IdType type) nothrow pure @safe
- {
- switch (type)
- {
- case tok!",":
- case tok!".":
- case tok!"..":
- case tok!"...":
- case tok!"/":
- case tok!"/=":
- case tok!"!":
- case tok!"!<":
- case tok!"!<=":
- case tok!"!<>":
- case tok!"!<>=":
- case tok!"!=":
- case tok!"!>":
- case tok!"!>=":
- case tok!"$":
- case tok!"%":
- case tok!"%=":
- case tok!"&":
- case tok!"&&":
- case tok!"&=":
- case tok!"(":
- case tok!")":
- case tok!"*":
- case tok!"*=":
- case tok!"+":
- case tok!"++":
- case tok!"+=":
- case tok!"-":
- case tok!"--":
- case tok!"-=":
- case tok!":":
- case tok!";":
- case tok!"<":
- case tok!"<<":
- case tok!"<<=":
- case tok!"<=":
- case tok!"<>":
- case tok!"<>=":
- case tok!"=":
- case tok!"==":
- case tok!"=>":
- case tok!">":
- case tok!">=":
- case tok!">>":
- case tok!">>=":
- case tok!">>>":
- case tok!">>>=":
- case tok!"?":
- case tok!"@":
- case tok!"[":
- case tok!"]":
- case tok!"^":
- case tok!"^=":
- case tok!"^^":
- case tok!"^^=":
- case tok!"{":
- case tok!"|":
- case tok!"|=":
- case tok!"||":
- case tok!"}":
- case tok!"~":
- case tok!"~=":
- return true;
- default:
- return false;
- }
- }
- public bool isKeyword(IdType type) pure nothrow @safe
- {
- switch (type)
- {
- case tok!"abstract":
- case tok!"alias":
- case tok!"align":
- case tok!"asm":
- case tok!"assert":
- case tok!"auto":
- case tok!"body":
- case tok!"break":
- case tok!"case":
- case tok!"cast":
- case tok!"catch":
- case tok!"class":
- case tok!"const":
- case tok!"continue":
- case tok!"debug":
- case tok!"default":
- case tok!"delegate":
- case tok!"delete":
- case tok!"deprecated":
- case tok!"do":
- case tok!"else":
- case tok!"enum":
- case tok!"export":
- case tok!"extern":
- case tok!"false":
- case tok!"final":
- case tok!"finally":
- case tok!"for":
- case tok!"foreach":
- case tok!"foreach_reverse":
- case tok!"function":
- case tok!"goto":
- case tok!"if":
- case tok!"immutable":
- case tok!"import":
- case tok!"in":
- case tok!"inout":
- case tok!"interface":
- case tok!"invariant":
- case tok!"is":
- case tok!"lazy":
- case tok!"macro":
- case tok!"mixin":
- case tok!"module":
- case tok!"new":
- case tok!"nothrow":
- case tok!"null":
- case tok!"out":
- case tok!"override":
- case tok!"package":
- case tok!"pragma":
- case tok!"private":
- case tok!"protected":
- case tok!"public":
- case tok!"pure":
- case tok!"ref":
- case tok!"return":
- case tok!"scope":
- case tok!"shared":
- case tok!"static":
- case tok!"struct":
- case tok!"super":
- case tok!"switch":
- case tok!"synchronized":
- case tok!"template":
- case tok!"this":
- case tok!"throw":
- case tok!"true":
- case tok!"try":
- case tok!"typedef":
- case tok!"typeid":
- case tok!"typeof":
- case tok!"union":
- case tok!"unittest":
- case tok!"version":
- case tok!"volatile":
- case tok!"while":
- case tok!"with":
- case tok!"__DATE__":
- case tok!"__EOF__":
- case tok!"__FILE__":
- case tok!"__FUNCTION__":
- case tok!"__gshared":
- case tok!"__LINE__":
- case tok!"__MODULE__":
- case tok!"__parameters":
- case tok!"__PRETTY_FUNCTION__":
- case tok!"__TIME__":
- case tok!"__TIMESTAMP__":
- case tok!"__traits":
- case tok!"__vector":
- case tok!"__VENDOR__":
- case tok!"__VERSION__":
- return true;
- default:
- return false;
- }
- }
- public bool isStringLiteral(IdType type) pure nothrow @safe
- {
- switch (type)
- {
- case tok!"dstringLiteral":
- case tok!"stringLiteral":
- case tok!"wstringLiteral":
- return true;
- default:
- return false;
- }
- }
- public bool isProtection(IdType type) pure nothrow @safe
- {
- switch (type)
- {
- case tok!"export":
- case tok!"package":
- case tok!"private":
- case tok!"public":
- case tok!"protected":
- return true;
- default:
- return false;
- }
- }
- public struct DLexer
- {
- import core.vararg;
- mixin Lexer!(Token, lexIdentifier, isSeparating, operators, dynamicTokens,
- keywords, pseudoTokenHandlers);
- @disable this();
- this(ubyte[] range, const LexerConfig config, StringCache* cache)
- {
- auto r = (range.length >= 3 && range[0] == 0xef && range[1] == 0xbb && range[2] == 0xbf)
- ? range[3 .. $] : range;
- this.range = LexerRange(r);
- this.config = config;
- this.cache = cache;
- popFront();
- }
- private static bool isDocComment(string comment) pure nothrow @safe
- {
- return comment.length >= 3 && (comment[0 .. 3] == "///"
- || comment[0 .. 3] == "/++" || comment[0 .. 3] == "/**");
- }
- public void popFront() pure
- {
- _popFront();
- string comment;
- switch (front.type)
- {
- case tok!"comment":
- if (config.commentBehavior == CommentBehavior.attach)
- {
- import std.string;
- if (isDocComment(front.text))
- {
- comment = comment is null
- ? front.text
- : format("%s\n%s", comment, front.text);
- }
- do _popFront(); while (front == tok!"comment");
- if (front == tok!"whitespace") goto case tok!"whitespace";
- if (front == tok!"specialTokenSequence") goto case tok!"specialTokenSequence";
- }
- break;
- case tok!"whitespace":
- if (config.whitespaceBehavior == WhitespaceBehavior.skip)
- {
- do _popFront(); while (front == tok!"whitespace");
- if (front == tok!"comment") goto case tok!"comment";
- if (front == tok!"specialTokenSequence") goto case tok!"specialTokenSequence";
- }
- break;
- case tok!"specialTokenSequence":
- if (config.specialTokenBehavior == SpecialTokenBehavior.skip)
- {
- do _popFront(); while (front == tok!"specialTokenSequence");
- if (front == tok!"comment") goto case tok!"comment";
- if (front == tok!"whitespace") goto case tok!"whitespace";
- }
- break;
- default:
- break;
- }
- _front.comment = comment;
- }
- bool isWhitespace() pure /*const*/ nothrow
- {
- switch (range.front)
- {
- case ' ':
- case '\r':
- case '\n':
- case '\t':
- return true;
- case 0xe2:
- auto peek = range.peek(2);
- return peek.length == 2
- && peek[0] == 0x80
- && (peek[1] == 0xa8 || peek[1] == 0xa9);
- default:
- return false;
- }
- }
- void popFrontWhitespaceAware() pure nothrow
- {
- switch (range.front)
- {
- case '\r':
- range.popFront();
- if (!range.empty && range.front == '\n')
- {
- range.popFront();
- range.incrementLine();
- }
- else
- range.incrementLine();
- return;
- case '\n':
- range.popFront();
- range.incrementLine();
- return;
- case 0xe2:
- auto lookahead = range.peek(3);
- if (lookahead.length == 3 && lookahead[1] == 0x80
- && (lookahead[2] == 0xa8 || lookahead[2] == 0xa9))
- {
- range.popFront();
- range.popFront();
- range.popFront();
- range.incrementLine();
- return;
- }
- else
- {
- range.popFront();
- return;
- }
- default:
- range.popFront();
- return;
- }
- }
- Token lexWhitespace() pure nothrow
- {
- mixin (tokenStart);
- static if (__VERSION__ > 2065) version (D_InlineAsm_X86_64) while (index + 16 <= range.bytes.length)
- {
- ulong startAddr = (cast(ulong) range.bytes.ptr) + index;
- enum space = (cast(ulong) ' ') * 0x0101010101010101L;
- enum tab = (cast(ulong) '\t') * 0x0101010101010101L;
- enum cr = (cast(ulong) '\r') * 0x0101010101010101L;
- enum lf = (cast(ulong) '\n') * 0x0101010101010101L;
- ulong charsSkipped;
- ulong lineIncrement;
- asm
- {
- mov R10, space;
- mov R11, tab;
- mov R12, cr;
- mov R13, lf;
- mov R8, startAddr;
- movdqu XMM0, [R8];
- mov R9, line;
- // space pattern
- movq XMM1, R10;
- shufpd XMM1, XMM1, 0;
- pcmpeqb XMM1, XMM0;
- // tab pattern
- movq XMM2, R11;
- shufpd XMM2, XMM2, 0;
- pcmpeqb XMM2, XMM0;
- // CR pattern
- movq XMM3, R12;
- shufpd XMM3, XMM3, 0;
- pcmpeqb XMM3, XMM0;
- // LF pattern
- movq XMM4, R13;
- shufpd XMM4, XMM4, 0;
- pcmpeqb XMM4, XMM0;
- // Bit mask-of newlines to r10
- pmovmskb R10, XMM4;
- // and the masks together
- por XMM1, XMM2;
- por XMM1, XMM3;
- por XMM1, XMM4;
- pmovmskb RAX, XMM1;
- not RAX;
- bsf RCX, RAX;
- mov charsSkipped, RCX;
- mov RBX, 1;
- inc CL;
- shl RBX, CL;
- sub RBX, 1;
- and R10, RBX;
- popcnt R10, R10;
- mov lineIncrement, R10;
- }
- range.incrementLine(lineIncrement);
- range.popFrontN(charsSkipped);
- if (charsSkipped < 16)
- goto end;
- index += 16;
- }
- loop: do
- {
- switch (range.front)
- {
- case '\r':
- range.popFront();
- if (!range.empty && range.front == '\n')
- range.popFront();
- range.incrementLine();
- break;
- case '\n':
- range.popFront();
- range.incrementLine();
- break;
- case ' ':
- case '\t':
- range.popFront();
- break;
- case 0xe2:
- auto lookahead = range.peek(3);
- if (lookahead.length != 3)
- break loop;
- if (lookahead[1] != 0x80)
- break loop;
- if (lookahead[2] == 0xa8 || lookahead[2] == 0xa9)
- {
- range.popFront();
- range.popFront();
- range.popFront();
- range.incrementLine();
- break;
- }
- break loop;
- default:
- break loop;
- }
- } while (!range.empty);
- end:
- string text = config.whitespaceBehavior == WhitespaceBehavior.skip
- ? null : cache.intern(range.slice(mark));
- return Token(tok!"whitespace", text, line, column, index);
- }
- Token lexNumber() pure nothrow
- {
- mixin (tokenStart);
- if (range.front == '0' && range.canPeek(1))
- {
- auto ahead = range.peek(1)[1];
- switch (ahead)
- {
- case 'x':
- case 'X':
- range.popFront();
- range.popFront();
- return lexHex(mark, line, column, index);
- case 'b':
- case 'B':
- range.popFront();
- range.popFront();
- return lexBinary(mark, line, column, index);
- default:
- return lexDecimal(mark, line, column, index);
- }
- }
- else
- return lexDecimal(mark, line, column, index);
- }
- Token lexHex() pure nothrow
- {
- mixin (tokenStart);
- return lexHex(mark, line, column, index);
- }
- Token lexHex(size_t mark, size_t line, size_t column, size_t index) pure nothrow
- {
- IdType type = tok!"intLiteral";
- bool foundDot;
- hexLoop: while (!range.empty)
- {
- switch (range.front)
- {
- case 'a': .. case 'f':
- case 'A': .. case 'F':
- case '0': .. case '9':
- case '_':
- range.popFront();
- break;
- case 'u':
- case 'U':
- lexIntSuffix(type);
- break hexLoop;
- case 'i':
- if (foundDot)
- lexFloatSuffix(type);
- break hexLoop;
- case 'L':
- if (foundDot)
- lexFloatSuffix(type);
- else
- lexIntSuffix(type);
- break hexLoop;
- case 'p':
- case 'P':
- lexExponent(type);
- break hexLoop;
- case '.':
- if (foundDot || !range.canPeek(1) || range.peekAt(1) == '.')
- break hexLoop;
- else
- {
- // The following bit of silliness tries to tell the
- // difference between "int dot identifier" and
- // "double identifier".
- if (range.canPeek(1))
- {
- switch (range.peekAt(1))
- {
- case '0': .. case '9':
- case 'A': .. case 'F':
- case 'a': .. case 'f':
- goto doubleLiteral;
- default:
- break hexLoop;
- }
- }
- else
- {
- doubleLiteral:
- range.popFront();
- foundDot = true;
- type = tok!"doubleLiteral";
- }
- }
- break;
- default:
- break hexLoop;
- }
- }
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- Token lexBinary() pure nothrow
- {
- mixin (tokenStart);
- return lexBinary(mark, line, column, index);
- }
- Token lexBinary(size_t mark, size_t line, size_t column, size_t index) pure nothrow
- {
- IdType type = tok!"intLiteral";
- binaryLoop: while (!range.empty)
- {
- switch (range.front)
- {
- case '0':
- case '1':
- case '_':
- range.popFront();
- break;
- case 'u':
- case 'U':
- case 'L':
- lexIntSuffix(type);
- break binaryLoop;
- default:
- break binaryLoop;
- }
- }
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- Token lexDecimal() pure nothrow
- {
- mixin (tokenStart);
- return lexDecimal(mark, line, column, index);
- }
- Token lexDecimal(size_t mark, size_t line, size_t column, size_t index) pure nothrow
- {
- bool foundDot = range.front == '.';
- IdType type = tok!"intLiteral";
- if (foundDot)
- {
- range.popFront();
- type = tok!"doubleLiteral";
- }
- decimalLoop: while (!range.empty)
- {
- switch (range.front)
- {
- case '0': .. case '9':
- case '_':
- range.popFront();
- break;
- case 'u':
- case 'U':
- if (!foundDot)
- lexIntSuffix(type);
- break decimalLoop;
- case 'i':
- lexFloatSuffix(type);
- break decimalLoop;
- case 'L':
- if (foundDot)
- lexFloatSuffix(type);
- else
- lexIntSuffix(type);
- break decimalLoop;
- case 'f':
- case 'F':
- lexFloatSuffix(type);
- break decimalLoop;
- case 'e':
- case 'E':
- lexExponent(type);
- break decimalLoop;
- case '.':
- if (foundDot || !range.canPeek(1) || range.peekAt(1) == '.')
- break decimalLoop;
- else
- {
- // The following bit of silliness tries to tell the
- // difference between "int dot identifier" and
- // "double identifier".
- if (range.canPeek(1))
- {
- auto ch = range.peekAt(1);
- if (ch <= 0x2f
- || (ch >= '0' && ch <= '9')
- || (ch >= ':' && ch <= '@')
- || (ch >= '[' && ch <= '^')
- || (ch >= '{' && ch <= '~')
- || ch == '`' || ch == '_')
- {
- goto doubleLiteral;
- }
- else
- break decimalLoop;
- }
- else
- {
- doubleLiteral:
- range.popFront();
- foundDot = true;
- type = tok!"doubleLiteral";
- }
- }
- break;
- default:
- break decimalLoop;
- }
- }
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- void lexIntSuffix(ref IdType type) pure nothrow @safe
- {
- bool secondPass;
- if (range.front == 'u' || range.front == 'U')
- {
- U:
- if (type == tok!"intLiteral")
- type = tok!"uintLiteral";
- else
- type = tok!"ulongLiteral";
- range.popFront();
- if (secondPass)
- return;
- if (range.front == 'L' || range.front == 'l')
- goto L;
- return;
- }
- if (range.front == 'L' || range.front == 'l')
- {
- L:
- if (type == tok!"uintLiteral")
- type = tok!"ulongLiteral";
- else
- type = tok!"longLiteral";
- range.popFront();
- if (range.front == 'U' || range.front == 'u')
- {
- secondPass = true;
- goto U;
- }
- return;
- }
- }
- void lexFloatSuffix(ref IdType type) pure nothrow @safe
- {
- switch (range.front)
- {
- case 'L':
- range.popFront();
- type = tok!"doubleLiteral";
- break;
- case 'f':
- case 'F':
- range.popFront();
- type = tok!"floatLiteral";
- break;
- default:
- break;
- }
- if (!range.empty && range.front == 'i')
- {
- warning("Complex number literals are deprecated");
- range.popFront();
- if (type == tok!"floatLiteral")
- type = tok!"ifloatLiteral";
- else
- type = tok!"idoubleLiteral";
- }
- }
- void lexExponent(ref IdType type) pure nothrow @safe
- {
- range.popFront();
- bool foundSign = false;
- bool foundDigit = false;
- while (!range.empty)
- {
- switch (range.front)
- {
- case '-':
- case '+':
- if (foundSign)
- {
- if (!foundDigit)
- error("Expected an exponent");
- return;
- }
- foundSign = true;
- range.popFront();
- break;
- case '0': .. case '9':
- case '_':
- foundDigit = true;
- range.popFront();
- break;
- case 'L':
- case 'f':
- case 'F':
- case 'i':
- lexFloatSuffix(type);
- return;
- default:
- if (!foundDigit)
- error("Expected an exponent");
- return;
- }
- }
- }
- Token lexScriptLine() pure
- {
- mixin (tokenStart);
- while (!range.empty && !isNewline)
- range.popFront();
- return Token(tok!"scriptLine", cache.intern(range.slice(mark)),
- line, column, index);
- }
- Token lexSpecialTokenSequence() pure
- {
- mixin (tokenStart);
- while (!range.empty && !isNewline)
- range.popFront();
- return Token(tok!"specialTokenSequence", cache.intern(range.slice(mark)),
- line, column, index);
- }
- Token lexSlashStarComment() pure
- {
- mixin (tokenStart);
- IdType type = tok!"comment";
- range.popFrontN(2);
- static if (__VERSION__ > 2065) version (D_InlineAsm_X86_64) while (range.index + 16 <= range.bytes.length)
- {
- ulong startAddress = cast(ulong) range.bytes.ptr + range.index;
- enum slash = (cast(ulong) '/') * 0x0101010101010101L;
- enum star = (cast(ulong) '*') * 0x0101010101010101L;
- enum lf = (cast(ulong) '\n') * 0x0101010101010101L;
- ulong charsSkipped;
- ulong newlineCount;
- bool done;
- asm
- {
- mov RAX, startAddress;
- movdqu XMM0, [RAX];
- mov R10, lf;
- movq XMM2, R10;
- shufpd XMM2, XMM2, 0;
- pcmpeqb XMM2, XMM0;
- pmovmskb R15, XMM2;
- mov R10, star;
- movq XMM3, R10;
- shufpd XMM3, XMM3, 0;
- pcmpeqb XMM3, XMM0;
- pmovmskb R8, XMM3;
- mov R10, slash;
- movq XMM4, R10;
- shufpd XMM4, XMM4, 0;
- pcmpeqb XMM4, XMM0;
- pmovmskb R9, XMM4;
- loop:
- cmp R8, 0;
- je notFound;
- cmp R9, 0;
- je notFound;
- bsf RAX, R8; // stIndex
- bsf RBX, R9; // slIndex
- mov RDX, RAX;
- inc RDX;
- cmp RDX, RBX;
- je found;
- cmp RAX, RBX;
- jae maskSlash;
- maskStar:
- mov RCX, RAX;
- mov R10, 1;
- shl R10, CL;
- xor R8, R10;
- jmp loop;
- maskSlash:
- mov RCX, RBX;
- mov R10, 1;
- shl R10, CL;
- xor R9, R10;
- jmp loop;
- notFound:
- mov R14, 16;
- mov charsSkipped, R14;
- popcnt R14, R15;
- mov newlineCount, R14;
- jmp asmEnd;
- found:
- inc RBX;
- mov charsSkipped, RBX;
- mov RAX, 1;
- mov done, AL;
- mov RCX, RBX;
- mov RBX, 1;
- shl RBX, CL;
- dec RBX;
- and R15, RBX;
- popcnt R14, R15;
- mov newlineCount, R14;
- asmEnd:
- nop;
- }
- range.popFrontN(charsSkipped);
- range.incrementLine(newlineCount);
- if (done)
- goto end;
- }
- while (!range.empty)
- {
- if (range.front == '*')
- {
- range.popFront();
- if (!range.empty && range.front == '/')
- {
- range.popFront();
- break;
- }
- }
- else
- popFrontWhitespaceAware();
- }
- end:
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- Token lexSlashSlashComment() pure nothrow
- {
- mixin (tokenStart);
- IdType type = tok!"comment";
- range.popFrontN(2);
- static if (__VERSION__ > 2065) version (D_InlineAsm_X86_64) while (range.index + 16 <= range.bytes.length)
- {
- ulong startAddress = cast(ulong) range.bytes.ptr + range.index;
- enum cr = (cast(ulong) '\r') * 0x0101010101010101L;
- enum lf = (cast(ulong) '\n') * 0x0101010101010101L;
- ulong charsSkipped;
- asm
- {
- mov RAX, startAddress;
- movdqu XMM0, [RAX];
- mov R10, cr;
- movq XMM1, R10;
- shufpd XMM1, XMM1, 0;
- pcmpeqb XMM1, XMM0;
- mov R10, lf;
- movq XMM2, R10;
- shufpd XMM2, XMM2, 0;
- pcmpeqb XMM2, XMM0;
- por XMM1, XMM2;
- pmovmskb RBX, XMM1;
- bsf RCX, RBX;
- mov RDX, 16;
- cmp RBX, 0;
- cmove RCX, RDX;
- mov charsSkipped, RCX;
- }
- if (charsSkipped < 16)
- {
- index += charsSkipped;
- column += charsSkipped;
- range.popFrontN(charsSkipped);
- goto end;
- }
- else
- {
- assert (charsSkipped == 16);
- index += 16;
- column += 16;
- range.popFrontN(16);
- }
- }
- while (!range.empty)
- {
- if (range.front == '\r' || range.front == '\n')
- break;
- range.popFront();
- }
- end:
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- Token lexSlashPlusComment() pure nothrow
- {
- mixin (tokenStart);
- IdType type = tok!"comment";
- range.popFront();
- range.popFront();
- int depth = 1;
- while (depth > 0 && !range.empty)
- {
- if (range.front == '+')
- {
- range.popFront();
- if (!range.empty && range.front == '/')
- {
- range.popFront();
- depth--;
- }
- }
- else if (range.front == '/')
- {
- range.popFront();
- if (!range.empty && range.front == '+')
- {
- range.popFront();
- depth++;
- }
- }
- else
- popFrontWhitespaceAware();
- }
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- Token lexStringLiteral() pure nothrow
- {
- mixin (tokenStart);
- range.popFront();
- while (true)
- {
- if (range.empty)
- {
- error("Error: unterminated string literal");
- return Token();
- }
- else if (range.front == '"')
- {
- range.popFront();
- break;
- }
- else if (range.front == '\\')
- {
- lexEscapeSequence();
- }
- else
- popFrontWhitespaceAware();
- }
- IdType type = tok!"stringLiteral";
- lexStringSuffix(type);
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- Token lexWysiwygString() pure nothrow
- {
- mixin (tokenStart);
- IdType type = tok!"stringLiteral";
- bool backtick = range.front == '`';
- if (backtick)
- {
- range.popFront();
- while (true)
- {
- if (range.empty)
- {
- error("Error: unterminated string literal");
- return Token(tok!"");
- }
- else if (range.front == '`')
- {
- range.popFront();
- break;
- }
- else
- popFrontWhitespaceAware();
- }
- }
- else
- {
- range.popFront();
- if (range.empty)
- {
- error("Error: unterminated string literal");
- return Token(tok!"");
- }
- range.popFront();
- while (true)
- {
- if (range.empty)
- {
- error("Error: unterminated string literal");
- return Token(tok!"");
- }
- else if (range.front == '"')
- {
- range.popFront();
- break;
- }
- else
- popFrontWhitespaceAware();
- }
- }
- lexStringSuffix(type);
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- void lexStringSuffix(ref IdType type) pure nothrow
- {
- if (range.empty)
- type = tok!"stringLiteral";
- else
- {
- switch (range.front)
- {
- case 'w': range.popFront(); type = tok!"wstringLiteral"; break;
- case 'd': range.popFront(); type = tok!"dstringLiteral"; break;
- case 'c': range.popFront(); type = tok!"stringLiteral"; break;
- default: type = tok!"stringLiteral"; break;
- }
- }
- }
- Token lexDelimitedString() pure nothrow
- {
- import std.traits;
- mixin (tokenStart);
- range.popFront();
- range.popFront();
- ubyte open;
- ubyte close;
- switch (range.front)
- {
- case '<':
- open = '<';
- close = '>';
- range.popFront();
- return lexNormalDelimitedString(mark, line, column, index, open, close);
- case '{':
- open = '{';
- close = '}';
- range.popFront();
- return lexNormalDelimitedString(mark, line, column, index, open, close);
- case '[':
- open = '[';
- close = ']';
- range.popFront();
- return lexNormalDelimitedString(mark, line, column, index, open, close);
- case '(':
- open = '(';
- close = ')';
- range.popFront();
- return lexNormalDelimitedString(mark, line, column, index, open, close);
- default:
- return lexHeredocString(mark, line, column, index);
- }
- }
- Token lexNormalDelimitedString(size_t mark, size_t line, size_t column,
- size_t index, ubyte open, ubyte close)
- pure nothrow
- {
- int depth = 1;
- while (!range.empty && depth > 0)
- {
- if (range.front == open)
- {
- depth++;
- range.popFront();
- }
- else if (range.front == close)
- {
- depth--;
- range.popFront();
- if (depth <= 0)
- {
- if (range.front == '"')
- range.popFront();
- else
- {
- error("Error: \" expected to end delimited string literal");
- return Token(tok!"");
- }
- }
- }
- else
- popFrontWhitespaceAware();
- }
- IdType type = tok!"stringLiteral";
- lexStringSuffix(type);
- return Token(type, cache.intern(range.slice(mark)), line, column, index);
- }
- Token lexHeredocString(size_t mark, size_t line, size_t column, size_t index)
- pure nothrow
- {
- import std.regex;
- Token ident = lexIdentifier();
- if (isNewline())
- popFrontWhitespaceAware();
- else
- error("Newline expected");
- while (!range.empty)
- {
- if (isNewline())
- {
- popFrontWhitespaceAware();
- if (!range.canPeek(ident.text.length))
- {
- error(ident.text ~ " expected");
- break;
- }
- if (range.peek(ident.text.length - 1) == ident.text)
- {
- range.popFrontN(ident.text.length);
- break;
- }
- }
- else
- range.popFront();
- }
- if (!range.empty() && range.front == '"')
- range.popFront();
- else
- error(`" expected`);
- IdType type = tok!"stringLiteral";
- lexStringSuffix(type);
- return Token(type, cache.intern(range.slice(mark)), line, column, index);
- }
- Token lexTokenString() pure
- {
- mixin (tokenStart);
- assert (range.front == 'q');
- range.popFront();
- assert (range.front == '{');
- range.popFront();
- auto app = appender!string();
- app.put("q{");
- int depth = 1;
- LexerConfig c = config;
- scope(exit) config = c;
- config.whitespaceBehavior = WhitespaceBehavior.include;
- config.stringBehavior = StringBehavior.source;
- config.commentBehavior = CommentBehavior.include;
- _front = advance();
- while (depth > 0 && !empty)
- {
- auto t = front();
- if (t.text is null)
- app.put(str(t.type));
- else
- app.put(t.text);
- if (t.type == tok!"}")
- {
- depth--;
- if (depth > 0)
- popFront();
- }
- else if (t.type == tok!"{")
- {
- depth++;
- popFront();
- }
- else
- popFront();
- }
- IdType type = tok!"stringLiteral";
- lexStringSuffix(type);
- return Token(type, cache.intern(cast(const(ubyte)[]) app.data), line,
- column, index);
- }
- Token lexHexString() pure nothrow
- {
- mixin (tokenStart);
- range.popFront();
- range.popFront();
- loop: while (true)
- {
- if (range.empty)
- {
- error("Error: unterminated hex string literal");
- return Token();
- }
- else if (isWhitespace())
- popFrontWhitespaceAware();
- else switch (range.front)
- {
- case '0': .. case '9':
- case 'A': .. case 'F':
- case 'a': .. case 'f':
- range.popFront();
- break;
- case '"':
- range.popFront();
- break loop;
- default:
- error("Error: invalid character in hex string");
- return Token();
- }
- }
- IdType type = tok!"stringLiteral";
- lexStringSuffix(type);
- return Token(type, cache.intern(range.slice(mark)), line, column,
- index);
- }
- bool lexEscapeSequence() pure nothrow
- {
- range.popFront();
- if (range.empty)
- {
- error("Error: non-terminated character escape sequence.");
- return false;
- }
- switch (range.front)
- {
- case '\'':
- case '"':
- case '?':
- case '\\':
- case '0':
- case 'a':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- range.popFront();
- break;
- case 'x':
- range.popFront();
- foreach (i; 0 .. 2)
- {
- if (range.empty)
- {
- error("Error: 2 hex digits expected.");
- return false;
- }
- switch (range.front)
- {
- case '0': .. case '9':
- case 'a': .. case 'f':
- case 'A': .. case 'F':
- range.popFront();
- break;
- default:
- error("Error: 2 hex digits expected.");
- return false;
- }
- }
- break;
- case '1': .. case '7':
- for (size_t i = 0; i < 3 && !range.empty && range.front >= '0' && range.front <= '7'; i++)
- range.popFront();
- break;
- case 'u':
- range.popFront();
- foreach (i; 0 .. 4)
- {
- if (range.empty)
- {
- error("Error: at least 4 hex digits expected.");
- return false;
- }
- switch (range.front)
- {
- case '0': .. case '9':
- case 'a': .. case 'f':
- case 'A': .. case 'F':
- range.popFront();
- break;
- default:
- error("Error: at least 4 hex digits expected.");
- return false;
- }
- }
- break;
- case 'U':
- range.popFront();
- foreach (i; 0 .. 8)
- {
- if (range.empty)
- {
- error("Error: at least 8 hex digits expected.");
- return false;
- }
- switch (range.front)
- {
- case '0': .. case '9':
- case 'a': .. case 'f':
- case 'A': .. case 'F':
- range.popFront();
- break;
- default:
- error("Error: at least 8 hex digits expected.");
- return false;
- }
- }
- break;
- default:
- while (true)
- {
- if (range.empty)
- {
- error("Error: non-terminated character escape sequence.");
- return false;
- }
- if (range.front == ';')
- {
- range.popFront();
- break;
- }
- else
- range.popFront();
- }
- }
- return true;
- }
- Token lexCharacterLiteral() pure nothrow
- {
- mixin (tokenStart);
- range.popFront();
- if (range.front == '\\')
- {
- lexEscapeSequence();
- goto close;
- }
- else if (range.front == '\'')
- {
- range.popFront();
- return Token(tok!"characterLiteral", cache.intern(range.slice(mark)),
- line, column, index);
- }
- else if (range.front & 0x80)
- {
- while (range.front & 0x80)
- range.popFront();
- goto close;
- }
- else
- {
- popFrontWhitespaceAware();
- goto close;
- }
- close:
- if (range.front == '\'')
- {
- range.popFront();
- return Token(tok!"characterLiteral", cache.intern(range.slice(mark)),
- line, column, index);
- }
- else
- {
- error("Error: Expected ' to end character literal");
- return Token();
- }
- }
- Token lexIdentifier() pure nothrow
- {
- import std.stdio;
- mixin (tokenStart);
- uint hash = 0;
- if (isSeparating(0) || range.empty)
- {
- error("Invalid identifier");
- range.popFront();
- }
- while (!range.empty && !isSeparating(0))
- {
- hash = StringCache.hashStep(range.front, hash);
- range.popFront();
- }
- return Token(tok!"identifier", cache.intern(range.slice(mark), hash), line,
- column, index);
- }
- Token lexDot() pure nothrow
- {
- mixin (tokenStart);
- if (!range.canPeek(1))
- {
- range.popFront();
- return Token(tok!".", null, line, column, index);
- }
- switch (range.peekAt(1))
- {
- case '0': .. case '9':
- return lexNumber();
- case '.':
- range.popFront();
- range.popFront();
- if (!range.empty && range.front == '.')
- {
- range.popFront();
- return Token(tok!"...", null, line, column, index);
- }
- else
- return Token(tok!"..", null, line, column, index);
- default:
- range.popFront();
- return Token(tok!".", null, line, column, index);
- }
- }
- Token lexLongNewline() pure nothrow
- {
- mixin (tokenStart);
- range.popFront();
- range.popFront();
- range.popFront();
- range.incrementLine();
- return Token(tok!"whitespace", cache.intern(range.slice(mark)), line,
- column, index);
- }
- bool isNewline() pure @safe nothrow
- {
- if (range.front == '\n') return true;
- if (range.front == '\r') return true;
- return (range.front & 0x80) && range.canPeek(2)
- && (range.peek(2) == "\u2028" || range.peek(2) == "\u2029");
- }
- bool isSeparating(size_t offset) pure nothrow @safe
- {
- if (!range.canPeek(offset)) return true;
- auto c = range.peekAt(offset);
- if (c >= 'A' && c <= 'Z') return false;
- if (c >= 'a' && c <= 'z') return false;
- if (c <= 0x2f) return true;
- if (c >= ':' && c <= '@') return true;
- if (c >= '[' && c <= '^') return true;
- if (c >= '{' && c <= '~') return true;
- if (c == '`') return true;
- if (c & 0x80)
- {
- auto r = range;
- range.popFrontN(offset);
- return (r.canPeek(2) && (r.peek(2) == "\u2028"
- || r.peek(2) == "\u2029"));
- }
- return false;
- }
- enum tokenStart = q{
- size_t index = range.index;
- size_t column = range.column;
- size_t line = range.line;
- auto mark = range.mark();
- };
- void error(string message) pure nothrow @safe
- {
- messages ~= Message(range.line, range.column, message, true);
- }
- void warning(string message) pure nothrow @safe
- {
- messages ~= Message(range.line, range.column, message, false);
- assert (messages.length > 0);
- }
- struct Message
- {
- size_t line;
- size_t column;
- string message;
- bool isError;
- }
- Message[] messages;
- StringCache* cache;
- LexerConfig config;
- }
- public auto byToken(ubyte[] range)
- {
- LexerConfig config;
- StringCache* cache = new StringCache(StringCache.defaultBucketCount);
- return DLexer(range, config, cache);
- }
- public auto byToken(ubyte[] range, StringCache* cache)
- {
- LexerConfig config;
- return DLexer(range, config, cache);
- }
- public auto byToken(ubyte[] range, const LexerConfig config, StringCache* cache)
- {
- return DLexer(range, config, cache);
- }
- /**
- * Removes "decoration" such as leading whitespace, leading + and * characters,
- * and places the result into the given output range
- */
- public void unDecorateComment(T)(string comment, auto ref T outputRange)
- if (isOutputRange!(T, string))
- in
- {
- assert (comment.length >= 3);
- }
- body
- {
- switch (comment[0 .. 3])
- {
- case "///":
- size_t i = 3;
- while (comment[i] == ' ' || comment[i] == '\t')
- i++;
- outputRange.put(comment[i .. $]);
- break;
- case "/++":
- case "/**":
- size_t i = 3;
- immutable char c = comment[1];
- // Skip leading * and + characters
- while (comment[i] == c) i++;
- // Skip trailing * and + characters
- size_t j = comment.length - 2;
- while (j > i && comment[j] == c)
- j--;
- while (j > i && (comment[j] == ' ' || comment[j] == '\t'))
- j--;
- if (comment[i] == '\r') i++;
- if (comment[i] == '\n') i++;
- while (comment[i] == ' ' || comment[i] == '\t') i++;
- immutable bool skipBeginningChar = comment[i] == c;
- if (skipBeginningChar)
- i++;
- size_t whitespaceToSkip;
- while (comment[i] == ' ' || comment[i] == '\t')
- {
- whitespaceToSkip++;
- i++;
- }
- size_t l = i;
- while (i < j)
- {
- if (comment[i++] == '\n')
- break;
- }
- outputRange.put(comment[l .. i]);
- while (true)
- {
- if (skipBeginningChar)
- {
- while (i < j && (comment[i] == ' ' || comment[i] == '\t')) i++;
- if (i < j && comment[i] == c) i++;
- }
- for (size_t s = 0; (i < j) && (s <= whitespaceToSkip)
- && (comment[i] == ' ' || comment[i] == '\t');)
- {
- s++;
- i++;
- }
- size_t k = i;
- inner: while (k < j)
- {
- if (comment[k] == '\n')
- {
- k++;
- break inner;
- }
- k++;
- }
- outputRange.put(comment[i .. k]);
- i = k;
- if (i >= j)
- break;
- }
- break;
- default:
- assert (false, "Invalid doc comment");
- }
- }
- struct StringCache
- {
- public:
- @disable this();
- /**
- * Params: bucketCount = the initial number of buckets. Must be a
- * power of two
- */
- this(size_t bucketCount)
- {
- buckets = (cast(Node**) calloc((Node*).sizeof, bucketCount))[0 .. bucketCount];
- }
- ~this()
- {
- Block* current = rootBlock;
- while (current !is null)
- {
- Block* prev = current;
- current = current.next;
- free(cast(void*) prev.bytes.ptr);
- free(cast(void*) prev);
- }
- foreach (nodePointer; buckets)
- {
- Node* currentNode = nodePointer;
- while (currentNode !is null)
- {
- Node* prev = currentNode;
- currentNode = currentNode.next;
- free(prev);
- }
- }
- rootBlock = null;
- free(buckets.ptr);
- buckets = null;
- }
- /**
- * Caches a string.
- */
- string intern(const(ubyte)[] str) pure nothrow @safe
- {
- if (str is null || str.length == 0)
- return "";
- immutable uint hash = hashBytes(str);
- return intern(str, hash);
- }
- /**
- * ditto
- */
- string intern(string str) pure nothrow @trusted
- {
- return intern(cast(ubyte[]) str);
- }
- /**
- * Caches a string as above, but uses the given hash code instead of
- * calculating one itself. Use this alongside $(LREF hashStep)() can reduce the
- * amount of work necessary when lexing dynamic tokens.
- */
- string intern(const(ubyte)[] str, uint hash) pure nothrow @safe
- in
- {
- assert (str.length > 0);
- }
- body
- {
- return _intern(str, hash);
- // string s = _intern(str, hash);
- // size_t* ptr = s in debugMap;
- // if (ptr is null)
- // debugMap[s] = cast(size_t) s.ptr;
- // else
- // assert (*ptr == cast(size_t) s.ptr);
- // return s;
- }
- /**
- * Incremental hashing.
- * Params:
- * b = the byte to add to the hash
- * h = the hash that has been calculated so far
- * Returns: the new hash code for the string.
- */
- static uint hashStep(ubyte b, uint h) pure nothrow @safe
- {
- return (h ^ sbox[b]) * 3;
- }
- /**
- * The default bucket count for the string cache.
- */
- static enum defaultBucketCount = 4096;
- size_t allocated() pure nothrow @safe @property
- {
- return _allocated;
- }
- private:
- string _intern(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
- {
- if (bytes is null || bytes.length == 0)
- return "";
- immutable size_t index = hash & (buckets.length - 1);
- Node* s = find(bytes, hash);
- if (s !is null)
- return cast(string) s.str;
- _allocated += bytes.length;
- ubyte[] mem = allocate(bytes.length);
- mem[] = bytes[];
- Node* node = cast(Node*) malloc(Node.sizeof);
- node.str = mem;
- node.hash = hash;
- node.next = buckets[index];
- buckets[index] = node;
- return cast(string) mem;
- }
- Node* find(const(ubyte)[] bytes, uint hash) pure nothrow @trusted
- {
- import std.algorithm;
- immutable size_t index = hash & (buckets.length - 1);
- Node* node = buckets[index];
- while (node !is null)
- {
- if (node.hash == hash && bytes.equal(cast(ubyte[]) node.str))
- return node;
- node = node.next;
- }
- return node;
- }
- static uint hashBytes(const(ubyte)[] data) pure nothrow @trusted
- in
- {
- assert (data !is null);
- assert (data.length > 0);
- }
- body
- {
- uint hash = 0;
- foreach (ubyte b; data)
- {
- hash ^= sbox[b];
- hash *= 3;
- }
- return hash;
- }
- ubyte[] allocate(size_t numBytes) pure nothrow @trusted
- in
- {
- assert (numBytes != 0);
- }
- out (result)
- {
- assert (result.length == numBytes);
- }
- body
- {
- if (numBytes > (blockSize / 4))
- return (cast(ubyte*) malloc(numBytes))[0 .. numBytes];
- Block* r = rootBlock;
- size_t i = 0;
- while (i <= 3 && r !is null)
- {
- immutable size_t available = r.bytes.length;
- immutable size_t oldUsed = r.used;
- immutable size_t newUsed = oldUsed + numBytes;
- if (newUsed <= available)
- {
- r.used = newUsed;
- return r.bytes[oldUsed .. newUsed];
- }
- i++;
- r = r.next;
- }
- Block* b = cast(Block*) malloc(Block.sizeof);
- b.bytes = (cast(ubyte*) malloc(blockSize))[0 .. blockSize];
- b.used = numBytes;
- b.next = rootBlock;
- rootBlock = b;
- return b.bytes[0 .. numBytes];
- }
- static struct Node
- {
- ubyte[] str;
- uint hash;
- Node* next;
- }
- static struct Block
- {
- ubyte[] bytes;
- size_t used;
- Block* next;
- }
- static enum blockSize = 1024 * 16;
- static immutable uint[] sbox = [
- 0xF53E1837, 0x5F14C86B, 0x9EE3964C, 0xFA796D53,
- 0x32223FC3, 0x4D82BC98, 0xA0C7FA62, 0x63E2C982,
- 0x24994A5B, 0x1ECE7BEE, 0x292B38EF, 0xD5CD4E56,
- 0x514F4303, 0x7BE12B83, 0x7192F195, 0x82DC7300,
- 0x084380B4, 0x480B55D3, 0x5F430471, 0x13F75991,
- 0x3F9CF22C, 0x2FE0907A, 0xFD8E1E69, 0x7B1D5DE8,
- 0xD575A85C, 0xAD01C50A, 0x7EE00737, 0x3CE981E8,
- 0x0E447EFA, 0x23089DD6, 0xB59F149F, 0x13600EC7,
- 0xE802C8E6, 0x670921E4, 0x7207EFF0, 0xE74761B0,
- 0x69035234, 0xBFA40F19, 0xF63651A0, 0x29E64C26,
- 0x1F98CCA7, 0xD957007E, 0xE71DDC75, 0x3E729595,
- 0x7580B7CC, 0xD7FAF60B, 0x92484323, 0xA44113EB,
- 0xE4CBDE08, 0x346827C9, 0x3CF32AFA, 0x0B29BCF1,
- 0x6E29F7DF, 0xB01E71CB, 0x3BFBC0D1, 0x62EDC5B8,
- 0xB7DE789A, 0xA4748EC9, 0xE17A4C4F, 0x67E5BD03,
- 0xF3B33D1A, 0x97D8D3E9, 0x09121BC0, 0x347B2D2C,
- 0x79A1913C, 0x504172DE, 0x7F1F8483, 0x13AC3CF6,
- 0x7A2094DB, 0xC778FA12, 0xADF7469F, 0x21786B7B,
- 0x71A445D0, 0xA8896C1B, 0x656F62FB, 0x83A059B3,
- 0x972DFE6E, 0x4122000C, 0x97D9DA19, 0x17D5947B,
- 0xB1AFFD0C, 0x6EF83B97, 0xAF7F780B, 0x4613138A,
- 0x7C3E73A6, 0xCF15E03D, 0x41576322, 0x672DF292,
- 0xB658588D, 0x33EBEFA9, 0x938CBF06, 0x06B67381,
- 0x07F192C6, 0x2BDA5855, 0x348EE0E8, 0x19DBB6E3,
- 0x3222184B, 0xB69D5DBA, 0x7E760B88, 0xAF4D8154,
- 0x007A51AD, 0x35112500, 0xC9CD2D7D, 0x4F4FB761,
- 0x694772E3, 0x694C8351, 0x4A7E3AF5, 0x67D65CE1,
- 0x9287DE92, 0x2518DB3C, 0x8CB4EC06, 0xD154D38F,
- 0xE19A26BB, 0x295EE439, 0xC50A1104, 0x2153C6A7,
- 0x82366656, 0x0713BC2F, 0x6462215A, 0x21D9BFCE,
- 0xBA8EACE6, 0xAE2DF4C1, 0x2A8D5E80, 0x3F7E52D1,
- 0x29359399, 0xFEA1D19C, 0x18879313, 0x455AFA81,
- 0xFADFE838, 0x62609838, 0xD1028839, 0x0736E92F,
- 0x3BCA22A3, 0x1485B08A, 0x2DA7900B, 0x852C156D,
- 0xE8F24803, 0x00078472, 0x13F0D332, 0x2ACFD0CF,
- 0x5F747F5C, 0x87BB1E2F, 0xA7EFCB63, 0x23F432F0,
- 0xE6CE7C5C, 0x1F954EF6, 0xB609C91B, 0x3B4571BF,
- 0xEED17DC0, 0xE556CDA0, 0xA7846A8D, 0xFF105F94,
- 0x52B7CCDE, 0x0E33E801, 0x664455EA, 0xF2C70414,
- 0x73E7B486, 0x8F830661, 0x8B59E826, 0xBB8AEDCA,
- 0xF3D70AB9, 0xD739F2B9, 0x4A04C34A, 0x88D0F089,
- 0xE02191A2, 0xD89D9C78, 0x192C2749, 0xFC43A78F,
- 0x0AAC88CB, 0x9438D42D, 0x9E280F7A, 0x36063802,
- 0x38E8D018, 0x1C42A9CB, 0x92AAFF6C, 0xA24820C5,
- 0x007F077F, 0xCE5BC543, 0x69668D58, 0x10D6FF74,
- 0xBE00F621, 0x21300BBE, 0x2E9E8F46, 0x5ACEA629,
- 0xFA1F86C7, 0x52F206B8, 0x3EDF1A75, 0x6DA8D843,
- 0xCF719928, 0x73E3891F, 0xB4B95DD6, 0xB2A42D27,
- 0xEDA20BBF, 0x1A58DBDF, 0xA449AD03, 0x6DDEF22B,
- 0x900531E6, 0x3D3BFF35, 0x5B24ABA2, 0x472B3E4C,
- 0x387F2D75, 0x4D8DBA36, 0x71CB5641, 0xE3473F3F,
- 0xF6CD4B7F, 0xBF7D1428, 0x344B64D0, 0xC5CDFCB6,
- 0xFE2E0182, 0x2C37A673, 0xDE4EB7A3, 0x63FDC933,
- 0x01DC4063, 0x611F3571, 0xD167BFAF, 0x4496596F,
- 0x3DEE0689, 0xD8704910, 0x7052A114, 0x068C9EC5,
- 0x75D0E766, 0x4D54CC20, 0xB44ECDE2, 0x4ABC653E,
- 0x2C550A21, 0x1A52C0DB, 0xCFED03D0, 0x119BAFE2,
- 0x876A6133, 0xBC232088, 0x435BA1B2, 0xAE99BBFA,
- 0xBB4F08E4, 0xA62B5F49, 0x1DA4B695, 0x336B84DE,
- 0xDC813D31, 0x00C134FB, 0x397A98E6, 0x151F0E64,
- 0xD9EB3E69, 0xD3C7DF60, 0xD2F2C336, 0x2DDD067B,
- 0xBD122835, 0xB0B3BD3A, 0xB0D54E46, 0x8641F1E4,
- 0xA0B38F96, 0x51D39199, 0x37A6AD75, 0xDF84EE41,
- 0x3C034CBA, 0xACDA62FC, 0x11923B8B, 0x45EF170A,
- ];
- // deprecated size_t[string] debugMap;
- size_t _allocated;
- Node*[] buckets;
- Block* rootBlock;
- }
- private extern(C) void* calloc(size_t, size_t) nothrow pure;
- private extern(C) void* malloc(size_t) nothrow pure;
- private extern(C) void free(void*) nothrow pure;
- unittest
- {
- import std.stdio;
- auto source = cast(ubyte[]) q{ import std.stdio;}c;
- auto tokens = byToken(source);
- assert (tokens.map!"a.type"().equal([tok!"import", tok!"identifier", tok!".",
- tok!"identifier", tok!";"]));
- }
- /// Test \x char sequence
- unittest
- {
- auto toks = (string s) => byToken(cast(ubyte[])s);
- // valid
- enum hex = ['0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f','A','B','C','D','E','F'];
- auto source = "";
- foreach (h1; hex)
- foreach (h2; hex)
- source ~= "'\\x" ~ h1 ~ h2 ~ "'";
- assert (toks(source).filter!(t => t.type != tok!"characterLiteral").empty);
- // invalid
- assert (toks(`'\x'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
- assert (toks(`'\x_'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
- assert (toks(`'\xA'`).messages[0] == DLexer.Message(1,5,"Error: 2 hex digits expected.",true));
- assert (toks(`'\xAY'`).messages[0] == DLexer.Message(1,5,"Error: 2 hex digits expected.",true));
- assert (toks(`'\xXX'`).messages[0] == DLexer.Message(1,4,"Error: 2 hex digits expected.",true));
- }