parseBase.d | searchcode

/parseBase.d

http://github.com/FeepingCreature/fcc · D · 1078 lines · 902 code · 101 blank · 75 comment · 366 complexity · c7bb0b978a7438cbcdda3fdbab87098b MD5 · raw file

module parseBase;

import casts;

version(Windows) {
  int bcmp (char* from, char* to, int count) {
    while (count-- > 0) {
      if (*from++ != *to++) return 1;
    }
    return 0;
  }
} else {
  extern(C) int bcmp(char*, char*, int);
}

version(Windows) { } else pragma(set_attribute, faststreq_samelen_nonz, optimize("-fomit-frame-pointer"));
int faststreq_samelen_nonz(string a, string b) {
  // the chance of this happening is approximately 0.1% (I benched it)
  // as such, it's not worth it
  // if (a.ptr == b.ptr) return true; // strings are assumed immutable
  if (a.length > 4) {
    if ((cast(int*) a.ptr)[0] != (cast(int*) b.ptr)[0]) return false;
    return bcmp(a.ptr + 4, b.ptr + 4, a.length - 4) == 0;
  }
  int ai = *cast(int*) a.ptr, bi = *cast(int*) b.ptr;
  /**
   1 => 0x000000ff => 2^8 -1
   2 => 0x0000ffff => 2^16-1
   3 => 0x00ffffff => 2^24-1
   4 => 0xffffffff => 2^32-1
   **/
  uint mask = (0x01010101U >> ((4-a.length)*8))*0xff;
  // uint mask = (1<<((a.length<<3)&0x1f))-(((a.length<<3)&32)>>5)-1;
  // uint mask = (((1<<((a.length<<3)-1))-1)<<1)|1;
  return (ai & mask) == (bi & mask);
}

// version(Windows) { } else pragma(set_attribute, faststreq, optimize("-O3"));
int faststreq(string a, string b) {
  if (a.length != b.length) return false;
  if (!b.length) return true;
  return faststreq_samelen_nonz(a, b);
}

int[int] accesses;

char takech(ref string s, char deflt) {
  if (!s.length) return deflt;
  else {
    auto res = s[0];
    s = s[1 .. $];
    return res;
  }
}

import tools.base, errors;
struct StatCache {
  tools.base.Stuple!(Object, char*, int)[char*] cache;
  int depth;
  tools.base.Stuple!(Object, char*)* opIn_r(char* p) {
    auto res = p in cache;
    if (!res) return null;
    auto delta = depth - res._2;
    if (!(delta in accesses)) accesses[delta] = 0;
    accesses[delta] ++;
    return cast(tools.base.Stuple!(Object, char*)*) res;
  }
  void opIndexAssign(tools.base.Stuple!(Object, char*) thing, char* idx) {
    cache[idx] = stuple(thing._0, thing._1, depth++);
  }
}

struct SpeedCache {
  tools.base.Stuple!(char*, Object, char*)[24] cache;
  int curPos;
  tools.base.Stuple!(Object, char*)* opIn_r(char* p) {
    int start = curPos - 1;
    if (start == -1) start += cache.length;
    int i = start;
    do {
      if (cache[i]._0 == p)
        return cast(tools.base.Stuple!(Object, char*)*) &cache[i]._1;
      if (--i == -1) i += cache.length;
    } while (i != start);
    return null;
  }
  void opIndexAssign(tools.base.Stuple!(Object, char*) thing, char* idx) {
    cache[curPos++] = stuple(idx, thing._0, thing._1);
    if (curPos == cache.length) curPos = 0;
  }
}

enum Scheme { Binary, Octal, Decimal, Hex }
bool gotInt(ref string text, out int i, bool* unsigned = null) {
  auto t2 = text;
  t2.eatComments();
  if (auto rest = t2.startsWith("-")) {
    return gotInt(rest, i)
      && (
        i = -i,
        (text = rest),
        true
      );
  }
  ubyte ub;
  bool accept(ubyte from, ubyte to = 0xff) {
    if (!t2.length) return false;
    ubyte nub = t2[0];
    if (nub < from) return false;
    if (to != 0xff) { if (nub > to) return false; }
    else { if (nub > from) return false; }
    nub -= from;
    t2.take();
    ub = nub;
    return true;
  }
  
  int res;
  bool must_uint;
  bool getDigits(Scheme scheme) {
    static int[4] factor = [2, 8, 10, 16];
    bool gotSomeDigits = false;
    outer:while (true) {
      // if it starts with _, it's an identifier
      while (gotSomeDigits && accept('_')) { }
      switch (scheme) {
        case Scheme.Hex:
          if (accept('a', 'f')) { ub += 10; break; }
          if (accept('A', 'F')) { ub += 10; break; }
        case Scheme.Decimal: if (accept('0', '9')) break;
        case Scheme.Octal:   if (accept('0', '7')) break;
        case Scheme.Binary:  if (accept('0', '1')) break;
        default: break outer;
      }
      gotSomeDigits = true;
      assert(ub < factor[scheme]);
      long nres = cast(long) res * cast(long) factor[scheme] + cast(long) ub;
      if (cast(long) cast(int) nres != nres) must_uint = true; // prevent this check from passing once via uint, once via int. See test169fail.nt
      if ((must_uint || cast(long) cast(int) nres != nres) && cast(long) cast(uint) nres != nres) {
        text.setError("Number too large for 32-bit integer representation");
        return false;
      } 
      res = cast(int) nres;
    }
    if (gotSomeDigits && unsigned) *unsigned = must_uint;
    return gotSomeDigits;
  }

  if (accept('0')) {
    if (accept('b') || accept('B')) {
      if (!getDigits(Scheme.Binary)) return false;
    } else if (accept('x') || accept('X')) {
      if (!getDigits(Scheme.Hex)) return false;
    } else {
      if (!getDigits(Scheme.Octal)) res = 0;
    }
  } else {
    if (!getDigits(Scheme.Decimal)) return false;
  }
  i = res;
  text = t2;
  return true;
}

import tools.compat: replace;
import tools.base: Stuple, stuple;

string returnTrueIf(dstring list, string var) {
  string res = "switch ("~var~") {";
  foreach (dchar d; list) {
    string myu; myu ~= d;
    res ~= "case '"~myu~"': return true;";
  }
  res ~= "default: break; }";
  return res;
}

// copypasted from phobos to enable inlining
version(Windows) { } else pragma(set_attribute, decode, optimize("-fomit-frame-pointer"));
dchar decode(char[] s, ref size_t idx) {
  size_t len = s.length;
  dchar V;
  size_t i = idx;
  char u = s[i];
  
  if (u & 0x80)
  {
    uint n;
    char u2;

    /* The following encodings are valid, except for the 5 and 6 byte
      * combinations:
      *  0xxxxxxx
      *  110xxxxx 10xxxxxx
      *  1110xxxx 10xxxxxx 10xxxxxx
      *  11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
      *  111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
      *  1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
      */
    for (n = 1; ; n++)
    {
      if (n > 4) goto Lerr; // only do the first 4 of 6 encodings
      if (((u << n) & 0x80) == 0) {
        if (n == 1) goto Lerr;
        break;
      }
    }

    // Pick off (7 - n) significant bits of B from first byte of octet
    V = cast(dchar)(u & ((1 << (7 - n)) - 1));

    if (i + (n - 1) >= len) goto Lerr; // off end of string

    /* The following combinations are overlong, and illegal:
      *  1100000x (10xxxxxx)
      *  11100000 100xxxxx (10xxxxxx)
      *  11110000 1000xxxx (10xxxxxx 10xxxxxx)
      *  11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
      *  11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
      */
    u2 = s[i + 1];
    if ((u & 0xFE) == 0xC0 ||
        (u == 0xE0 && (u2 & 0xE0) == 0x80) ||
        (u == 0xF0 && (u2 & 0xF0) == 0x80) ||
        (u == 0xF8 && (u2 & 0xF8) == 0x80) ||
        (u == 0xFC && (u2 & 0xFC) == 0x80))
        goto Lerr;                      // overlong combination

    for (uint j = 1; j != n; j++)
    {
      u = s[i + j];
      if ((u & 0xC0) != 0x80) goto Lerr;                  // trailing bytes are 10xxxxxx
      V = (V << 6) | (u & 0x3F);
    }
    // if (!isValidDchar(V)) goto Lerr;
    i += n;
  } else {
    V = cast(dchar) u;
    i++;
  }

  idx = i;
  return V;

Lerr:
  //printf("\ndecode: idx = %d, i = %d, length = %d s = \n'%.*s'\n%x\n'%.*s'\n", idx, i, s.length, s, s[i], s[i .. length]);
  throw new UtfException("4invalid UTF-8 sequence", i);
}

// TODO: unicode
bool isNormal(dchar c) {
  if (c < 128) {
    return (c >= 'a' && c <= 'z') ||
           (c >= 'A' && c <= 'Z') ||
           (c >= '0' && c <= '9') ||
           c == '_';
  }
  mixin(returnTrueIf(
    "µð" // different mu
    "αβγδεζηθικλμνξοπρσςτυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ"
    , "c"));
  return false;
}

string lastAccepted, lastAccepted_stripped;
template acceptT(bool USECACHE) {
  // pragma(attribute, optimize("-O3"))
  bool acceptT(ref string s, string t) {
    string s2;
    debug if (t !is t.strip()) {
      logln("bad t: '", t, "'");
      fail;
    }
    static if (USECACHE) {
      if (s.ptr == lastAccepted.ptr && s.length == lastAccepted.length) {
        s2 = lastAccepted_stripped;
      } else {
        s2 = s;
        s2.eatComments();
        lastAccepted = s;
        lastAccepted_stripped = s2;
      }
    } else {
      s2 = s;
      s2.eatComments();
    }
    
    size_t idx = t.length, zero = 0;
    if (!s2.startsWith(t)) return false;
    if (isNormal(t.decode(zero)) && s2.length > t.length && isNormal(s2.decode(idx))) {
      return false;
    }
    s = s2[t.length .. $];
    if (!(t.length && t[$-1] == ' ') || !s.length) return true;
    if (s[0] != ' ') return false;
    s = s[1 .. $];
    return true;
  }
}

alias acceptT!(true)/*.acceptT*/ accept;
alias acceptT!(false)/*.acceptT*/ accept_mt;

bool hadABracket(string s) {
  auto s2 = (s.ptr - 1)[0..s.length + 1];
  if (s2.accept("}")) return true;
  return false;
}

// statement terminator.
// multiple semicolons can be substituted with a single one
// and "}" counts as "};"
bool acceptTerminatorSoft(ref string s) {
  if (!s.ptr) return true; // yeagh. Just assume it worked and leave me alone.
  if (s.accept(";")) return true;
  auto s2 = (s.ptr - 1)[0..s.length + 1];
  if (s2.accept(";") || s2.accept("}")) return true;
  return false;
}

bool mustAcceptTerminatorSoft(ref string s, lazy string err) {
  string s2 = s;
  if (!acceptTerminatorSoft(s2)) s.failparse(err());
  s = s2;
  return true;
}

bool mustAccept(ref string s, string t, lazy string err) {
  if (s.accept(t)) return true;
  s.failparse(err());
}

bool bjoin(ref string s, lazy bool c1, lazy bool c2, void delegate() dg,
           bool allowEmpty = true) {
  auto s2 = s;
  if (!c1) { s = s2; return allowEmpty; }
  dg();
  while (true) {
    s2 = s;
    if (!c2) { s = s2; return true; }
    s2 = s;
    if (!c1) { s = s2; return false; }
    dg();
  }
}

// while expr
bool many(ref string s, lazy bool b, void delegate() dg = null, string abort = null) {
  while (true) {
    auto s2 = s, s3 = s2;
    if (abort && s3.accept(abort)
        ||
        !b()
    ) {
      s = s2;
      break;
    }
    if (dg) dg();
  }
  return true;
}

import std.utf;
version(Windows) { } else pragma(set_attribute, gotIdentifier, optimize("-fomit-frame-pointer"));
bool gotIdentifier(ref string text, out string ident, bool acceptDots = false, bool acceptNumbers = false) {
  auto t2 = text;
  t2.eatComments();
  bool isValid(dchar d, bool first = false) {
    return isNormal(d) || (!first && d == '-') || (acceptDots && d == '.');
  }
  // array length special handling
  if (t2.length && t2[0] == '$') { text = t2; ident = "$"; return true; }
  if (!acceptNumbers && t2.length && t2[0] >= '0' && t2[0] <= '9') { return false; /* identifiers must not start with numbers */ }
  size_t idx = 0;
  if (!t2.length || !isValid(t2.decode(idx), true)) return false;
  size_t prev_idx = 0;
  dchar cur;
  do {
    prev_idx = idx;
    if (idx == t2.length) break;
    cur = t2.decode(idx);
  // } while (isValid(cur));
  } while (isNormal(cur) || (cur == '-') || (acceptDots && cur == '.'));
  // prev_idx now is the start of the first invalid character
  /*if (ident in reserved) {
    // logln("reject ", ident);
    return false;
  }*/
  ident = t2[0 .. prev_idx];
  if (ident == "λ") return false;
  text = t2[prev_idx .. $];
  return true;
}

bool isCNormal(char ch) {
  return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9';
}

bool gotCIdentifier(ref string text, out string ident) {
  auto t2 = text;
  t2.eatComments();
  if (t2.length && t2[0] >= '0' && t2[0] <= '9') { return false; /* identifiers must not start with numbers */ }
  size_t idx = 0;
  if (!t2.length || !isCNormal(t2[idx++])) return false;
  size_t prev_idx = 0;
  dchar cur;
  do {
    prev_idx = idx;
    if (idx == t2.length) break;
    cur = t2[idx++];
  } while (isCNormal(cur));
  ident = t2[0 .. prev_idx];
  text = t2[prev_idx .. $];
  return true;
}

bool[string] reserved, reservedPropertyNames;
static this() {
  reserved["auto"] = true;
  reserved["return"] = true;
  reserved["function"] = true;
  reserved["delegate"] = true;
  reserved["type-of"] = true;
  reserved["string-of"] = true;
  reservedPropertyNames["eval"] = true;
  reservedPropertyNames["iterator"] = true;
}

// This isn't a symbol! Maybe I was wrong about the dash .. 
// Remove the last dash part from "id" that was taken from "text"
// and re-add it to "text" via dark pointer magic.
bool eatDash(ref string text, ref string id) {
  auto dp = id.rfind("-");
  if (dp == -1) return false;
  auto removed = id.length - dp;
  id = id[0 .. dp];
  // give back
  text = (text.ptr - removed)[0 .. text.length + removed];
  return true;
}

// see above
bool eatDot(ref string text, ref string id) {
  auto dp = id.rfind(".");
  if (dp == -1) return false;
  auto removed = id.length - dp;
  id = id[0 .. dp];
  // give back also
  text = (text.ptr - removed)[0 .. text.length + removed];
  return true;
}

bool ckbranch(ref string s, bool delegate()[] dgs...) {
  auto s2 = s;
  foreach (dg; dgs) {
    if (dg()) return true;
    s = s2;
  }
  return false;
}

bool verboseParser = false;

string[bool delegate(string)] condInfo;

bool sectionStartsWith(string section, string rule) {
  if (section.length == rule.length && section.faststreq_samelen_nonz(rule)) return true;
  if (section.length < rule.length) return false;
  if (!section[0..rule.length].faststreq_samelen_nonz(rule)) return false;
  if (section.length == rule.length) return true;
  // only count hits that match a complete section
  return section[rule.length] == '.';
}

string matchrule_static(string rules) {
  // string res = "false /apply/ delegate bool(ref bool hit, string text) {";
  string res;
  int i;
  int falses;
  string preparams;
  while (rules.length) {
    string passlabel = "pass"~ctToString(i);
    string flagname  = "flag"~ctToString(i);
    i++;
    auto rule = ctSlice(rules, " ");
    auto first = rule[0], rest = rule[1 .. $];
    bool smaller, greater, equal, before, after, after_incl;
    switch (first) {
      case '<': smaller = true; rule = rest; break;
      case '>': greater = true; rule = rest; break;
      case '=': equal = true; rule = rest; break;
      case '^': before = true; preparams ~= "bool "~flagname~", "; falses ++; rule = rest; break;
      case '_': after = true; preparams ~= "bool "~flagname~", "; falses ++; rule = rest; break;
      case ',': after_incl = true; preparams ~= "bool "~flagname~", "; falses ++; rule = rest; break;
      default: break;
    }
    if (!smaller && !greater && !equal && !before && !after && !after_incl)
      smaller = equal = true; // default (see below)
    if (smaller) res ~= "if (text.sectionStartsWith(\""~rule~"\")) goto "~passlabel~";\n";
    if (equal)   res ~= "if (text == \""~rule~"\") goto "~passlabel~";\n";
    if (greater) res ~= "if (!text.sectionStartsWith(\""~rule~"\")) goto "~passlabel~";\n";
    if (before)  res ~= "if (text.sectionStartsWith(\""~rule~"\")) hit = true; if (!hit) goto "~passlabel~";\n";
    if (after)   res ~= "if (text.sectionStartsWith(\""~rule~"\")) hit = true; else if (hit) goto "~passlabel~";\n";
    if (after_incl)res~="if (text.sectionStartsWith(\""~rule~"\")) hit = true; if (hit) goto "~passlabel~";\n";
    res ~= "return false; "~passlabel~": \n";
  }
  string falsestr;
  if (falses == 1) falsestr = "false /apply/ ";
  else if (falses > 1) {
    falsestr = "false";
    for (int k = 1; k < falses; ++k) falsestr ~= ", false";
    falsestr = "stuple("~falsestr~") /apply/ ";
  }
  return falsestr ~ "delegate bool("~preparams~"string text) { \n" ~ res ~ "return true; \n}";
}

bool delegate(string)[string] rulefuns;

bool delegate(string) matchrule(string rules) {
  if (auto p = rules in rulefuns) return *p;
  bool delegate(string) res;
  auto rules_backup = rules;
  while (rules.length) {
    auto rule = rules.slice(" ");
    bool smaller, greater, equal, before, after, after_incl;
    assert(rule.length);
  restartRule:
    auto first = rule[0], rest = rule[1 .. $];
    switch (first) {
      case '<': smaller = true; rule = rest; goto restartRule;
      case '>': greater = true; rule = rest; goto restartRule;
      case '=': equal = true; rule = rest; goto restartRule;
      case '^': before = true; rule = rest; goto restartRule;
      case '_': after = true; rule = rest; goto restartRule;
      case ',': after_incl = true; rule = rest; goto restartRule;
      default: break;
    }
    
    if (!smaller && !greater && !equal && !before && !after && !after_incl)
      smaller = equal = true; // default
    // different modes
    assert((smaller || greater || equal) ^ before ^ after ^ after_incl);
    
    res = stuple(smaller, greater, equal, before, after, after_incl, rule, res, false) /apply/
    (bool smaller, bool greater, bool equal, bool before, bool after, bool after_incl, string rule, bool delegate(string) prev, ref bool hit, string text) {
      // logln(smaller, " ", greater, " ", equal, " ", before, " ", after, " ", after_incl, " ", hit, " and ", rule, " onto ", text, ":", text.sectionStartsWith(rule));
      if (prev && !prev(text)) return false;
      if (equal && text == rule) return true;
      
      bool startswith = text.sectionStartsWith(rule);
      if (smaller && startswith) return true; // all "below" in the tree
      if (greater && !startswith) return true; // arguable
      
      if (before) {
        if (startswith)
          hit = true;
        return !hit;
      }
      if (after) {
        if (startswith)
          hit = true;
        else return hit;
      }
      if (after_incl) {
        if (startswith)
          hit = true;
        return hit;
      }
      return false;
    };
  }
  rulefuns[rules_backup] = res;
  return res;
}

import tools.functional;
struct ParseCb {
  Object delegate(ref string text,
    bool delegate(string)
  ) dg;
  bool delegate(string) cur;
  Object opCall(T...)(ref string text, T t) {
    bool delegate(string) matchdg;
    
    static if (T.length && is(T[0]: char[])) {
      alias T[1..$] Rest1;
      matchdg = matchrule(t[0]);
      auto rest1 = t[1..$];
    } else static if (T.length && is(T[0] == bool delegate(string))) {
      alias T[1..$] Rest1;
      matchdg = t[1];
      auto rest1 = t[1..$];
    } else {
      matchdg = cur;
      alias T Rest1;
      alias t rest1;
    }
    
    static if (Rest1.length == 1 && is(typeof(*rest1[0])) && !is(MustType))
      alias typeof(*rest1[0]) MustType;
    static if (Rest1.length == 1 && is(Rest1[0] == delegate)) {
      alias Params!(Rest1[0])[0] MustType;
      auto callback = rest1[0];
    }
    
    static if (Rest1.length == 1 && is(typeof(*rest1[0])) || is(typeof(callback))) {
      auto backup = text;
      static if (is(typeof(callback))) {
        // if the type doesn't match, error?
        auto res = dg(text, matchdg);
        if (!res) text = backup;
        else {
          auto t = fastcast!(MustType) (res);
          if (!t) backup.failparse("Type (", MustType.stringof, ") not matched: ", res);
          callback(t);
        }
        return fastcast!(Object) (res);
      } else {
        *rest1[0] = fastcast!(typeof(*rest1[0])) (dg(text, matchdg));
        if (!*rest1[0]) text = backup;
        return fastcast!(Object) (*rest1[0]);
      }
    } else {
      static assert(!Rest1.length, "Left: "~Rest1.stringof~" of "~T.stringof);
      return dg(text, matchdg);
    }
  }
}

// used to be class, flattened for speed
struct Parser {
  string key, id;
  Object delegate
    (ref string text, 
     ParseCb cont,
     ParseCb rest) match;
}

// stuff that it's unsafe to memoize due to side effects
bool delegate(string)[] globalStateMatchers;

int cachedepth;
int[] cachecount;
static this() { cachecount = new int[8]; }

void delegate() pushCache() {
  cachedepth ++;
  if (cachecount.length <= cachedepth) cachecount.length = cachedepth + 1;
  cachecount[cachedepth] ++;
  return { cachedepth --; };
}

struct Stash(T) {
  const StaticSize = 4;
  T[StaticSize] static_backing_array;
  int[StaticSize] static_backing_lastcount;
  T[] backing_array;
  int[] backing_lastcount;
  T* getPointerInternal(ref int* countp) {
    int i = cachedepth;
    if (i < StaticSize) {
      countp = &static_backing_lastcount[i];
      return &static_backing_array[i];
    }
    i -= StaticSize;
    
    if (!backing_array.length) {
      backing_array    .length = 1;
      backing_lastcount.length = 1;
    }
    
    while (i >= backing_array.length) {
      backing_array    .length = backing_array    .length * 2;
      backing_lastcount.length = backing_lastcount.length * 2;
    }
    
    countp = &backing_lastcount[i];
    return &backing_array[i];
  }
  T* getPointer() {
    int* countp;
    auto p = getPointerInternal(countp);
    int cmp = cachecount[cachedepth];
    if (*countp != cmp) {
      *countp = cmp;
      *p = Init!(T);
    }
    return p;
  }
}

bool[string] unreserved;
static this() {
  unreserved["enum"] = true;
  unreserved["sum"] = true;
  unreserved["prefix"] = true;
  unreserved["suffix"] = true;
  unreserved["vec"] = true;
  unreserved["context"] = true;
  unreserved["do"] = true;
}

void reserve(string key) {
  if (key in unreserved) return;
  reserved[key] = true;
}

template DefaultParserImpl(alias Fn, string Id, bool Memoize, string Key, bool MemoizeForever) {
  final class DefaultParserImpl {
    Object info;
    bool dontMemoMe;
    static this() {
      static if (Key) reserve(Key);
    }
    this(Object obj = null) {
      info = obj;
      foreach (dg; globalStateMatchers)
        if (dg(Id)) { dontMemoMe = true; break; }
    }
    Parser genParser() {
      Parser res;
      res.key = Key;
      res.id = Id;
      res.match = &match;
      return res;
    }
    Object fnredir(ref string text, ParseCb cont, ParseCb rest) {
      static if (is(typeof((&Fn)(info, text, cont, rest))))
        return Fn(info, text, cont, rest);
      else static if (is(typeof((&Fn)(info, text, cont, rest))))
        return Fn(info, text, cont, rest);
      else static if (is(typeof((&Fn)(text, cont, rest))))
        return Fn(text, cont, rest);
      else
        return Fn(text, cont, rest);
    }
    static if (!Memoize) {
      Object match(ref string text, ParseCb cont, ParseCb rest) {
        return fnredir(text, cont, rest);
      }
    } else {
      // Stuple!(Object, char*)[char*] cache;
      static if (MemoizeForever) {
        Stash!(Stuple!(Object, char*)[char*]) cachestash;
      } else {
        Stash!(SpeedCache) cachestash;
      }
      Object match(ref string text, ParseCb cont, ParseCb rest) {
        auto t2 = text;
        if (.accept(t2, "]")) return null; // never a valid start
        if (dontMemoMe) {
          static if (Key) if (!.accept(t2, Key)) return null;
          auto res = fnredir(t2, cont, rest);
          if (res) text = t2;
          return res;
        }
        auto ptr = t2.ptr;
        auto cache = cachestash.getPointer();
        if (auto p = ptr in *cache) {
          if (!p._1) text = null;
          else text = p._1[0 .. t2.ptr + t2.length - p._1];
          return p._0;
        }
        static if (Key) if (!.accept(t2, Key)) return null;
        auto res = fnredir(t2, cont, rest);
        (*cache)[ptr] = stuple(res, t2.ptr);
        if (res) text = t2;
        return res;
      }
    }
  }
}

import tools.threads, tools.compat: rfind;
static this() { New(sync); }

template DefaultParser(alias Fn, string Id, string Prec = null, string Key = null, bool Memoize = true, bool MemoizeForever = false) {
  static this() {
    static if (Prec) addParser((new DefaultParserImpl!(Fn, Id, Memoize, Key, MemoizeForever)).genParser(), Prec);
    else addParser((new DefaultParserImpl!(Fn, Id, Memoize, Key, MemoizeForever)).genParser());
  }
}

import tools.log;
struct SplitIter(T) {
  T data, sep;
  T front, frontIncl, all;
  T pop() {
    for (int i = 0; i <= cast(int) data.length - cast(int) sep.length; ++i) {
      if (data[i .. i + sep.length] == sep) {
        auto res = data[0 .. i];
        data = data[i + sep.length .. $];
        front = all[0 .. $ - data.length - sep.length - res.length];
        frontIncl = all[0 .. front.length + res.length];
        return res;
      }
    }
    auto res = data;
    data = null;
    front = null;
    frontIncl = all;
    return res;
  }
}

SplitIter!(T) splitIter(T)(T d, T s) {
  SplitIter!(T) res;
  res.data = d; res.sep = s;
  res.all = res.data;
  return res;
}

void delegate(string) justAcceptedCallback;

int[string] idepth;

Parser[] parsers;
Parser[][bool delegate(string)] prefiltered_parsers;
string[string] prec; // precedence mapping
Object sync;

void addPrecedence(string id, string val) { synchronized(sync) { prec[id] = val; } }

string lookupPrecedence(string id) {
  synchronized(sync)
    if (auto p = id in prec) return *p;
  return null;
}

import tools.compat: split, join;
string dumpInfo() {
  if (listModified) resort;
  string res;
  int maxlen;
  foreach (parser; parsers) {
    auto id = parser.id;
    if (id.length > maxlen) maxlen = id.length;
  }
  auto reserved = maxlen + 2;
  string[] prevId;
  foreach (parser; parsers) {
    auto id = parser.id;
    auto n = id.dup.split(".");
    foreach (i, str; n[0 .. min(n.length, prevId.length)]) {
      if (str == prevId[i]) foreach (ref ch; str) ch = ' ';
    }
    prevId = id.split(".");
    res ~= n.join(".");
    if (auto p = id in prec) {
      for (int i = 0; i < reserved - id.length; ++i)
        res ~= " ";
      res ~= ":" ~ *p;;
    }
    res ~= "\n";
  }
  return res;
}

bool idSmaller(Parser pa, Parser pb) {
  auto a = splitIter(pa.id, "."), b = splitIter(pb.id, ".");
  string ap, bp;
  while (true) {
    ap = a.pop(); bp = b.pop();
    if (!ap && !bp) return false; // equal
    if (ap && !bp) return true; // longer before shorter
    if (bp && !ap) return false;
    if (ap == bp) continue; // no information here
    auto aprec = lookupPrecedence(a.frontIncl), bprec = lookupPrecedence(b.frontIncl);
    if (!aprec && bprec)
      throw new Exception("Patterns "~a.frontIncl~" vs. "~b.frontIncl~": first is missing precedence info! ");
    if (!bprec && aprec)
      throw new Exception("Patterns "~a.frontIncl~" vs. "~b.frontIncl~": second is missing precedence info! ");
    if (!aprec && !bprec) return ap < bp; // lol
    if (aprec == bprec) throw new Exception("Error: patterns '"~a.frontIncl~"' and '"~b.frontIncl~"' have the same precedence! ");
    for (int i = 0; i < min(aprec.length, bprec.length); ++i) {
      // precedence needs to be _inverted_, ie. lower-precedence rules must come first
      // this is because "higher-precedence" means it binds tighter.
      // if (aprec[i] > bprec[i]) return true;
      // if (aprec[i] < bprec[i]) return false;
      if (aprec[i] < bprec[i]) return true;
      if (aprec[i] > bprec[i]) return false;
    }
    bool flip;
    // this gets a bit hairy
    // 50 before 5, 509 before 5, but 51 after 5.
    if (aprec.length < bprec.length) { swap(aprec, bprec); flip = true; }
    if (aprec[bprec.length] != '0') return flip;
    return !flip;
  }
}

bool listModified;
void addParser(Parser p) {
  parsers ~= p;
  listModified = true;
}

void addParser(Parser p, string pred) {
  addParser(p);
  addPrecedence(p.id, pred);
}

import quicksort: qsort_ = qsort;
import tools.time: sec, µsec;
void resort() {
  parsers.qsort_(&idSmaller);
  prefiltered_parsers = null; // invalid; regenerate
  rulefuns = null; // also reset this to regenerate the closures
  listModified = false;
}

Object parse(ref string text, bool delegate(string) cond,
    int offs = 0)
{
  if (verboseParser) return _parse!(true).parse(text, cond, offs);
  else return _parse!(false).parse(text, cond, offs);
}

string condStr;
Object parse(ref string text, string cond) {
  condStr = cond;
  scope(exit) condStr = null;
  
  try return parse(text, matchrule(cond));
  catch (ParseEx pe) { pe.addRule(cond); throw pe; }
  catch (Exception ex) throw new Exception(Format("Matching rule '"~cond~"': ", ex));
}

template _parse(bool Verbose) {
  Object parse(ref string text, bool delegate(string) cond,
      int offs = 0) {
    if (!text.length) return null;
    if (listModified) resort;
    bool matched;
    static if (Verbose)
      logln("BEGIN PARSE '", text.nextText(16), "'");
    
    ParseCb cont = void, rest = void;
    cont.dg = null; // needed for null check further in
    int i = void;
    Object cont_dg(ref string text, bool delegate(string) cond) {
      return parse(text, cond, offs + i + 1); // same verbosity - it's a global flag
    }
    Object rest_dg(ref string text, bool delegate(string) cond) {
      return parse(text, cond, 0);
    }
    
    const ProfileMode = false;
    static if (ProfileMode) {
      auto start_time = µsec();
      auto start_text = text;
      static float min_speed = float.infinity;
      scope(exit) if (text.ptr !is start_text.ptr) {
        auto delta = (µsec() - start_time) / 1000f;
        auto speed = (text.ptr - start_text.ptr) / delta;
        if (speed < min_speed) {
          min_speed = speed;
          if (delta > 5) {
            logln("New worst slowdown: '",
              condStr, "' at '", start_text.nextText(), "'"
              ": ", speed, " characters/ms "
              "(", (text.ptr - start_text.ptr), " in ", delta, "ms). ");
          }
        }
        // min_speed *= 1.01;
      }
    }
    
    Parser[] pref_parsers;
    if (auto p = cond in prefiltered_parsers) pref_parsers = *p;
    else {
      foreach (parser; parsers) if (cond(parser.id)) pref_parsers ~= parser;
      prefiltered_parsers[cond] = pref_parsers;
    }
    
    auto tx = text;
    tx.eatComments();
    if (!tx.length) return null;
    bool tried;
    // logln("use ", pref_parsers /map/ ex!("p -> p.id"), " [", offs, "..", pref_parsers.length, "]");
    foreach (j, ref parser; pref_parsers[offs..$]) {
      i = j;
      
      // auto tx = text;
      // skip early. accept is slightly faster than cond.
      // if (parser.key && !.accept(tx, parser.key)) continue;
      if (parser.key.ptr) {
        auto pk = parser.key;
        if (tx.length < pk.length) continue;
        if (pk.length >= 4) {
          if (*cast(int*) pk.ptr != *cast(int*) tx.ptr) continue;
        }
        if (tx[0..pk.length] != pk) continue;
      }
      
      // rulestack ~= stuple(id, text);
      // scope(exit) rulestack = rulestack[0 .. $-1];
      
      auto id = parser.id;
      
      static if (Verbose) {
        if (!(id in idepth)) idepth[id] = 0;
        idepth[id] ++;
        scope(exit) idepth[id] --;
        logln("TRY PARSER [", idepth[id], " ", id, "] for '", text.nextText(16), "'");
      }
      
      matched = true;
      
      if (!cont.dg) {
        cont.dg = &cont_dg;
        cont.cur = cond;
        rest.dg = &rest_dg;
        rest.cur = cond;
      }
      
      auto backup = text;
      if (auto res = parser.match(text, cont, rest)) {
        static if (Verbose) logln("    PARSER [", idepth[id], " ", id, "] succeeded with ", res, ", left '", text.nextText(16), "'");
        if (justAcceptedCallback) justAcceptedCallback(text);
        return res;
      } else {
        static if (Verbose) logln("    PARSER [", idepth[id], " ", id, "] failed");
      }
      text = backup;
    }
    return null;
  }
  // version(Windows) { } else pragma(set_attribute, parse, optimize("-O3", "-fno-tree-vrp"));
}

bool test(T)(T t) { if (t) return true; else return false; }

void noMoreHeredoc(string text) {
  if (text.accept("<<"))
    text.failparse("Please switch from heredoc to {}!");
}

string startsWith(string text, string match)
{
  if (text.length < match.length) return null;
  // if (!match.length) return text; // doesn't actually happen
  if (!text.ptr[0..match.length].faststreq_samelen_nonz(match)) return null;
  return text.ptr[match.length .. text.length];
}

string hex(ubyte u) {
  auto hs = "0123456789ABCDEF";
  return ""~hs[u>>8]~hs[u&0xf];
}

string cleanup(string s) {
  string res;
  foreach (b; cast(ubyte[]) s) {
    if (b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' || b >= '0' && b <= '9' || b == '_') {
      res ~= b;
    } else {
      res ~= "_"~hex(b)~"_";
    }
  }
  return res;
}

bool acceptLeftArrow(ref string text) {
  return text.accept("<-") || text.accept("←");
}

string filenamepart(string path) {
  if (path.find("/") == -1) return path;
  auto rpos = path.rfind("/");
  return path[rpos + 1 .. $];
}

string dirpart(string path) {
  if (path.find("/") == -1) return null;
  auto rpos = path.rfind("/");
  return path[0 .. rpos];
}