PageRenderTime 43ms CodeModel.GetById 7ms RepoModel.GetById 1ms app.codeStats 0ms

/parseBase.d

http://github.com/FeepingCreature/fcc
D | 1078 lines | 902 code | 101 blank | 75 comment | 366 complexity | c7bb0b978a7438cbcdda3fdbab87098b MD5 | raw file
  1. module parseBase;
  2. import casts;
  3. version(Windows) {
  4. int bcmp (char* from, char* to, int count) {
  5. while (count-- > 0) {
  6. if (*from++ != *to++) return 1;
  7. }
  8. return 0;
  9. }
  10. } else {
  11. extern(C) int bcmp(char*, char*, int);
  12. }
  13. version(Windows) { } else pragma(set_attribute, faststreq_samelen_nonz, optimize("-fomit-frame-pointer"));
  14. int faststreq_samelen_nonz(string a, string b) {
  15. // the chance of this happening is approximately 0.1% (I benched it)
  16. // as such, it's not worth it
  17. // if (a.ptr == b.ptr) return true; // strings are assumed immutable
  18. if (a.length > 4) {
  19. if ((cast(int*) a.ptr)[0] != (cast(int*) b.ptr)[0]) return false;
  20. return bcmp(a.ptr + 4, b.ptr + 4, a.length - 4) == 0;
  21. }
  22. int ai = *cast(int*) a.ptr, bi = *cast(int*) b.ptr;
  23. /**
  24. 1 => 0x000000ff => 2^8 -1
  25. 2 => 0x0000ffff => 2^16-1
  26. 3 => 0x00ffffff => 2^24-1
  27. 4 => 0xffffffff => 2^32-1
  28. **/
  29. uint mask = (0x01010101U >> ((4-a.length)*8))*0xff;
  30. // uint mask = (1<<((a.length<<3)&0x1f))-(((a.length<<3)&32)>>5)-1;
  31. // uint mask = (((1<<((a.length<<3)-1))-1)<<1)|1;
  32. return (ai & mask) == (bi & mask);
  33. }
  34. // version(Windows) { } else pragma(set_attribute, faststreq, optimize("-O3"));
  35. int faststreq(string a, string b) {
  36. if (a.length != b.length) return false;
  37. if (!b.length) return true;
  38. return faststreq_samelen_nonz(a, b);
  39. }
  40. int[int] accesses;
  41. char takech(ref string s, char deflt) {
  42. if (!s.length) return deflt;
  43. else {
  44. auto res = s[0];
  45. s = s[1 .. $];
  46. return res;
  47. }
  48. }
  49. import tools.base, errors;
  50. struct StatCache {
  51. tools.base.Stuple!(Object, char*, int)[char*] cache;
  52. int depth;
  53. tools.base.Stuple!(Object, char*)* opIn_r(char* p) {
  54. auto res = p in cache;
  55. if (!res) return null;
  56. auto delta = depth - res._2;
  57. if (!(delta in accesses)) accesses[delta] = 0;
  58. accesses[delta] ++;
  59. return cast(tools.base.Stuple!(Object, char*)*) res;
  60. }
  61. void opIndexAssign(tools.base.Stuple!(Object, char*) thing, char* idx) {
  62. cache[idx] = stuple(thing._0, thing._1, depth++);
  63. }
  64. }
  65. struct SpeedCache {
  66. tools.base.Stuple!(char*, Object, char*)[24] cache;
  67. int curPos;
  68. tools.base.Stuple!(Object, char*)* opIn_r(char* p) {
  69. int start = curPos - 1;
  70. if (start == -1) start += cache.length;
  71. int i = start;
  72. do {
  73. if (cache[i]._0 == p)
  74. return cast(tools.base.Stuple!(Object, char*)*) &cache[i]._1;
  75. if (--i == -1) i += cache.length;
  76. } while (i != start);
  77. return null;
  78. }
  79. void opIndexAssign(tools.base.Stuple!(Object, char*) thing, char* idx) {
  80. cache[curPos++] = stuple(idx, thing._0, thing._1);
  81. if (curPos == cache.length) curPos = 0;
  82. }
  83. }
  84. enum Scheme { Binary, Octal, Decimal, Hex }
  85. bool gotInt(ref string text, out int i, bool* unsigned = null) {
  86. auto t2 = text;
  87. t2.eatComments();
  88. if (auto rest = t2.startsWith("-")) {
  89. return gotInt(rest, i)
  90. && (
  91. i = -i,
  92. (text = rest),
  93. true
  94. );
  95. }
  96. ubyte ub;
  97. bool accept(ubyte from, ubyte to = 0xff) {
  98. if (!t2.length) return false;
  99. ubyte nub = t2[0];
  100. if (nub < from) return false;
  101. if (to != 0xff) { if (nub > to) return false; }
  102. else { if (nub > from) return false; }
  103. nub -= from;
  104. t2.take();
  105. ub = nub;
  106. return true;
  107. }
  108. int res;
  109. bool must_uint;
  110. bool getDigits(Scheme scheme) {
  111. static int[4] factor = [2, 8, 10, 16];
  112. bool gotSomeDigits = false;
  113. outer:while (true) {
  114. // if it starts with _, it's an identifier
  115. while (gotSomeDigits && accept('_')) { }
  116. switch (scheme) {
  117. case Scheme.Hex:
  118. if (accept('a', 'f')) { ub += 10; break; }
  119. if (accept('A', 'F')) { ub += 10; break; }
  120. case Scheme.Decimal: if (accept('0', '9')) break;
  121. case Scheme.Octal: if (accept('0', '7')) break;
  122. case Scheme.Binary: if (accept('0', '1')) break;
  123. default: break outer;
  124. }
  125. gotSomeDigits = true;
  126. assert(ub < factor[scheme]);
  127. long nres = cast(long) res * cast(long) factor[scheme] + cast(long) ub;
  128. if (cast(long) cast(int) nres != nres) must_uint = true; // prevent this check from passing once via uint, once via int. See test169fail.nt
  129. if ((must_uint || cast(long) cast(int) nres != nres) && cast(long) cast(uint) nres != nres) {
  130. text.setError("Number too large for 32-bit integer representation");
  131. return false;
  132. }
  133. res = cast(int) nres;
  134. }
  135. if (gotSomeDigits && unsigned) *unsigned = must_uint;
  136. return gotSomeDigits;
  137. }
  138. if (accept('0')) {
  139. if (accept('b') || accept('B')) {
  140. if (!getDigits(Scheme.Binary)) return false;
  141. } else if (accept('x') || accept('X')) {
  142. if (!getDigits(Scheme.Hex)) return false;
  143. } else {
  144. if (!getDigits(Scheme.Octal)) res = 0;
  145. }
  146. } else {
  147. if (!getDigits(Scheme.Decimal)) return false;
  148. }
  149. i = res;
  150. text = t2;
  151. return true;
  152. }
  153. import tools.compat: replace;
  154. import tools.base: Stuple, stuple;
  155. string returnTrueIf(dstring list, string var) {
  156. string res = "switch ("~var~") {";
  157. foreach (dchar d; list) {
  158. string myu; myu ~= d;
  159. res ~= "case '"~myu~"': return true;";
  160. }
  161. res ~= "default: break; }";
  162. return res;
  163. }
  164. // copypasted from phobos to enable inlining
  165. version(Windows) { } else pragma(set_attribute, decode, optimize("-fomit-frame-pointer"));
  166. dchar decode(char[] s, ref size_t idx) {
  167. size_t len = s.length;
  168. dchar V;
  169. size_t i = idx;
  170. char u = s[i];
  171. if (u & 0x80)
  172. {
  173. uint n;
  174. char u2;
  175. /* The following encodings are valid, except for the 5 and 6 byte
  176. * combinations:
  177. * 0xxxxxxx
  178. * 110xxxxx 10xxxxxx
  179. * 1110xxxx 10xxxxxx 10xxxxxx
  180. * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  181. * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  182. * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  183. */
  184. for (n = 1; ; n++)
  185. {
  186. if (n > 4) goto Lerr; // only do the first 4 of 6 encodings
  187. if (((u << n) & 0x80) == 0) {
  188. if (n == 1) goto Lerr;
  189. break;
  190. }
  191. }
  192. // Pick off (7 - n) significant bits of B from first byte of octet
  193. V = cast(dchar)(u & ((1 << (7 - n)) - 1));
  194. if (i + (n - 1) >= len) goto Lerr; // off end of string
  195. /* The following combinations are overlong, and illegal:
  196. * 1100000x (10xxxxxx)
  197. * 11100000 100xxxxx (10xxxxxx)
  198. * 11110000 1000xxxx (10xxxxxx 10xxxxxx)
  199. * 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
  200. * 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
  201. */
  202. u2 = s[i + 1];
  203. if ((u & 0xFE) == 0xC0 ||
  204. (u == 0xE0 && (u2 & 0xE0) == 0x80) ||
  205. (u == 0xF0 && (u2 & 0xF0) == 0x80) ||
  206. (u == 0xF8 && (u2 & 0xF8) == 0x80) ||
  207. (u == 0xFC && (u2 & 0xFC) == 0x80))
  208. goto Lerr; // overlong combination
  209. for (uint j = 1; j != n; j++)
  210. {
  211. u = s[i + j];
  212. if ((u & 0xC0) != 0x80) goto Lerr; // trailing bytes are 10xxxxxx
  213. V = (V << 6) | (u & 0x3F);
  214. }
  215. // if (!isValidDchar(V)) goto Lerr;
  216. i += n;
  217. } else {
  218. V = cast(dchar) u;
  219. i++;
  220. }
  221. idx = i;
  222. return V;
  223. Lerr:
  224. //printf("\ndecode: idx = %d, i = %d, length = %d s = \n'%.*s'\n%x\n'%.*s'\n", idx, i, s.length, s, s[i], s[i .. length]);
  225. throw new UtfException("4invalid UTF-8 sequence", i);
  226. }
  227. // TODO: unicode
  228. bool isNormal(dchar c) {
  229. if (c < 128) {
  230. return (c >= 'a' && c <= 'z') ||
  231. (c >= 'A' && c <= 'Z') ||
  232. (c >= '0' && c <= '9') ||
  233. c == '_';
  234. }
  235. mixin(returnTrueIf(
  236. "µð" // different mu
  237. "αβγδεζηθικλμνξοπρσςτυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ"
  238. , "c"));
  239. return false;
  240. }
  241. string lastAccepted, lastAccepted_stripped;
  242. template acceptT(bool USECACHE) {
  243. // pragma(attribute, optimize("-O3"))
  244. bool acceptT(ref string s, string t) {
  245. string s2;
  246. debug if (t !is t.strip()) {
  247. logln("bad t: '", t, "'");
  248. fail;
  249. }
  250. static if (USECACHE) {
  251. if (s.ptr == lastAccepted.ptr && s.length == lastAccepted.length) {
  252. s2 = lastAccepted_stripped;
  253. } else {
  254. s2 = s;
  255. s2.eatComments();
  256. lastAccepted = s;
  257. lastAccepted_stripped = s2;
  258. }
  259. } else {
  260. s2 = s;
  261. s2.eatComments();
  262. }
  263. size_t idx = t.length, zero = 0;
  264. if (!s2.startsWith(t)) return false;
  265. if (isNormal(t.decode(zero)) && s2.length > t.length && isNormal(s2.decode(idx))) {
  266. return false;
  267. }
  268. s = s2[t.length .. $];
  269. if (!(t.length && t[$-1] == ' ') || !s.length) return true;
  270. if (s[0] != ' ') return false;
  271. s = s[1 .. $];
  272. return true;
  273. }
  274. }
  275. alias acceptT!(true)/*.acceptT*/ accept;
  276. alias acceptT!(false)/*.acceptT*/ accept_mt;
  277. bool hadABracket(string s) {
  278. auto s2 = (s.ptr - 1)[0..s.length + 1];
  279. if (s2.accept("}")) return true;
  280. return false;
  281. }
  282. // statement terminator.
  283. // multiple semicolons can be substituted with a single one
  284. // and "}" counts as "};"
  285. bool acceptTerminatorSoft(ref string s) {
  286. if (!s.ptr) return true; // yeagh. Just assume it worked and leave me alone.
  287. if (s.accept(";")) return true;
  288. auto s2 = (s.ptr - 1)[0..s.length + 1];
  289. if (s2.accept(";") || s2.accept("}")) return true;
  290. return false;
  291. }
  292. bool mustAcceptTerminatorSoft(ref string s, lazy string err) {
  293. string s2 = s;
  294. if (!acceptTerminatorSoft(s2)) s.failparse(err());
  295. s = s2;
  296. return true;
  297. }
  298. bool mustAccept(ref string s, string t, lazy string err) {
  299. if (s.accept(t)) return true;
  300. s.failparse(err());
  301. }
  302. bool bjoin(ref string s, lazy bool c1, lazy bool c2, void delegate() dg,
  303. bool allowEmpty = true) {
  304. auto s2 = s;
  305. if (!c1) { s = s2; return allowEmpty; }
  306. dg();
  307. while (true) {
  308. s2 = s;
  309. if (!c2) { s = s2; return true; }
  310. s2 = s;
  311. if (!c1) { s = s2; return false; }
  312. dg();
  313. }
  314. }
  315. // while expr
  316. bool many(ref string s, lazy bool b, void delegate() dg = null, string abort = null) {
  317. while (true) {
  318. auto s2 = s, s3 = s2;
  319. if (abort && s3.accept(abort)
  320. ||
  321. !b()
  322. ) {
  323. s = s2;
  324. break;
  325. }
  326. if (dg) dg();
  327. }
  328. return true;
  329. }
  330. import std.utf;
  331. version(Windows) { } else pragma(set_attribute, gotIdentifier, optimize("-fomit-frame-pointer"));
  332. bool gotIdentifier(ref string text, out string ident, bool acceptDots = false, bool acceptNumbers = false) {
  333. auto t2 = text;
  334. t2.eatComments();
  335. bool isValid(dchar d, bool first = false) {
  336. return isNormal(d) || (!first && d == '-') || (acceptDots && d == '.');
  337. }
  338. // array length special handling
  339. if (t2.length && t2[0] == '$') { text = t2; ident = "$"; return true; }
  340. if (!acceptNumbers && t2.length && t2[0] >= '0' && t2[0] <= '9') { return false; /* identifiers must not start with numbers */ }
  341. size_t idx = 0;
  342. if (!t2.length || !isValid(t2.decode(idx), true)) return false;
  343. size_t prev_idx = 0;
  344. dchar cur;
  345. do {
  346. prev_idx = idx;
  347. if (idx == t2.length) break;
  348. cur = t2.decode(idx);
  349. // } while (isValid(cur));
  350. } while (isNormal(cur) || (cur == '-') || (acceptDots && cur == '.'));
  351. // prev_idx now is the start of the first invalid character
  352. /*if (ident in reserved) {
  353. // logln("reject ", ident);
  354. return false;
  355. }*/
  356. ident = t2[0 .. prev_idx];
  357. if (ident == "λ") return false;
  358. text = t2[prev_idx .. $];
  359. return true;
  360. }
  361. bool isCNormal(char ch) {
  362. return ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9';
  363. }
  364. bool gotCIdentifier(ref string text, out string ident) {
  365. auto t2 = text;
  366. t2.eatComments();
  367. if (t2.length && t2[0] >= '0' && t2[0] <= '9') { return false; /* identifiers must not start with numbers */ }
  368. size_t idx = 0;
  369. if (!t2.length || !isCNormal(t2[idx++])) return false;
  370. size_t prev_idx = 0;
  371. dchar cur;
  372. do {
  373. prev_idx = idx;
  374. if (idx == t2.length) break;
  375. cur = t2[idx++];
  376. } while (isCNormal(cur));
  377. ident = t2[0 .. prev_idx];
  378. text = t2[prev_idx .. $];
  379. return true;
  380. }
  381. bool[string] reserved, reservedPropertyNames;
  382. static this() {
  383. reserved["auto"] = true;
  384. reserved["return"] = true;
  385. reserved["function"] = true;
  386. reserved["delegate"] = true;
  387. reserved["type-of"] = true;
  388. reserved["string-of"] = true;
  389. reservedPropertyNames["eval"] = true;
  390. reservedPropertyNames["iterator"] = true;
  391. }
  392. // This isn't a symbol! Maybe I was wrong about the dash ..
  393. // Remove the last dash part from "id" that was taken from "text"
  394. // and re-add it to "text" via dark pointer magic.
  395. bool eatDash(ref string text, ref string id) {
  396. auto dp = id.rfind("-");
  397. if (dp == -1) return false;
  398. auto removed = id.length - dp;
  399. id = id[0 .. dp];
  400. // give back
  401. text = (text.ptr - removed)[0 .. text.length + removed];
  402. return true;
  403. }
  404. // see above
  405. bool eatDot(ref string text, ref string id) {
  406. auto dp = id.rfind(".");
  407. if (dp == -1) return false;
  408. auto removed = id.length - dp;
  409. id = id[0 .. dp];
  410. // give back also
  411. text = (text.ptr - removed)[0 .. text.length + removed];
  412. return true;
  413. }
  414. bool ckbranch(ref string s, bool delegate()[] dgs...) {
  415. auto s2 = s;
  416. foreach (dg; dgs) {
  417. if (dg()) return true;
  418. s = s2;
  419. }
  420. return false;
  421. }
  422. bool verboseParser = false;
  423. string[bool delegate(string)] condInfo;
  424. bool sectionStartsWith(string section, string rule) {
  425. if (section.length == rule.length && section.faststreq_samelen_nonz(rule)) return true;
  426. if (section.length < rule.length) return false;
  427. if (!section[0..rule.length].faststreq_samelen_nonz(rule)) return false;
  428. if (section.length == rule.length) return true;
  429. // only count hits that match a complete section
  430. return section[rule.length] == '.';
  431. }
  432. string matchrule_static(string rules) {
  433. // string res = "false /apply/ delegate bool(ref bool hit, string text) {";
  434. string res;
  435. int i;
  436. int falses;
  437. string preparams;
  438. while (rules.length) {
  439. string passlabel = "pass"~ctToString(i);
  440. string flagname = "flag"~ctToString(i);
  441. i++;
  442. auto rule = ctSlice(rules, " ");
  443. auto first = rule[0], rest = rule[1 .. $];
  444. bool smaller, greater, equal, before, after, after_incl;
  445. switch (first) {
  446. case '<': smaller = true; rule = rest; break;
  447. case '>': greater = true; rule = rest; break;
  448. case '=': equal = true; rule = rest; break;
  449. case '^': before = true; preparams ~= "bool "~flagname~", "; falses ++; rule = rest; break;
  450. case '_': after = true; preparams ~= "bool "~flagname~", "; falses ++; rule = rest; break;
  451. case ',': after_incl = true; preparams ~= "bool "~flagname~", "; falses ++; rule = rest; break;
  452. default: break;
  453. }
  454. if (!smaller && !greater && !equal && !before && !after && !after_incl)
  455. smaller = equal = true; // default (see below)
  456. if (smaller) res ~= "if (text.sectionStartsWith(\""~rule~"\")) goto "~passlabel~";\n";
  457. if (equal) res ~= "if (text == \""~rule~"\") goto "~passlabel~";\n";
  458. if (greater) res ~= "if (!text.sectionStartsWith(\""~rule~"\")) goto "~passlabel~";\n";
  459. if (before) res ~= "if (text.sectionStartsWith(\""~rule~"\")) hit = true; if (!hit) goto "~passlabel~";\n";
  460. if (after) res ~= "if (text.sectionStartsWith(\""~rule~"\")) hit = true; else if (hit) goto "~passlabel~";\n";
  461. if (after_incl)res~="if (text.sectionStartsWith(\""~rule~"\")) hit = true; if (hit) goto "~passlabel~";\n";
  462. res ~= "return false; "~passlabel~": \n";
  463. }
  464. string falsestr;
  465. if (falses == 1) falsestr = "false /apply/ ";
  466. else if (falses > 1) {
  467. falsestr = "false";
  468. for (int k = 1; k < falses; ++k) falsestr ~= ", false";
  469. falsestr = "stuple("~falsestr~") /apply/ ";
  470. }
  471. return falsestr ~ "delegate bool("~preparams~"string text) { \n" ~ res ~ "return true; \n}";
  472. }
  473. bool delegate(string)[string] rulefuns;
  474. bool delegate(string) matchrule(string rules) {
  475. if (auto p = rules in rulefuns) return *p;
  476. bool delegate(string) res;
  477. auto rules_backup = rules;
  478. while (rules.length) {
  479. auto rule = rules.slice(" ");
  480. bool smaller, greater, equal, before, after, after_incl;
  481. assert(rule.length);
  482. restartRule:
  483. auto first = rule[0], rest = rule[1 .. $];
  484. switch (first) {
  485. case '<': smaller = true; rule = rest; goto restartRule;
  486. case '>': greater = true; rule = rest; goto restartRule;
  487. case '=': equal = true; rule = rest; goto restartRule;
  488. case '^': before = true; rule = rest; goto restartRule;
  489. case '_': after = true; rule = rest; goto restartRule;
  490. case ',': after_incl = true; rule = rest; goto restartRule;
  491. default: break;
  492. }
  493. if (!smaller && !greater && !equal && !before && !after && !after_incl)
  494. smaller = equal = true; // default
  495. // different modes
  496. assert((smaller || greater || equal) ^ before ^ after ^ after_incl);
  497. res = stuple(smaller, greater, equal, before, after, after_incl, rule, res, false) /apply/
  498. (bool smaller, bool greater, bool equal, bool before, bool after, bool after_incl, string rule, bool delegate(string) prev, ref bool hit, string text) {
  499. // logln(smaller, " ", greater, " ", equal, " ", before, " ", after, " ", after_incl, " ", hit, " and ", rule, " onto ", text, ":", text.sectionStartsWith(rule));
  500. if (prev && !prev(text)) return false;
  501. if (equal && text == rule) return true;
  502. bool startswith = text.sectionStartsWith(rule);
  503. if (smaller && startswith) return true; // all "below" in the tree
  504. if (greater && !startswith) return true; // arguable
  505. if (before) {
  506. if (startswith)
  507. hit = true;
  508. return !hit;
  509. }
  510. if (after) {
  511. if (startswith)
  512. hit = true;
  513. else return hit;
  514. }
  515. if (after_incl) {
  516. if (startswith)
  517. hit = true;
  518. return hit;
  519. }
  520. return false;
  521. };
  522. }
  523. rulefuns[rules_backup] = res;
  524. return res;
  525. }
  526. import tools.functional;
  527. struct ParseCb {
  528. Object delegate(ref string text,
  529. bool delegate(string)
  530. ) dg;
  531. bool delegate(string) cur;
  532. Object opCall(T...)(ref string text, T t) {
  533. bool delegate(string) matchdg;
  534. static if (T.length && is(T[0]: char[])) {
  535. alias T[1..$] Rest1;
  536. matchdg = matchrule(t[0]);
  537. auto rest1 = t[1..$];
  538. } else static if (T.length && is(T[0] == bool delegate(string))) {
  539. alias T[1..$] Rest1;
  540. matchdg = t[1];
  541. auto rest1 = t[1..$];
  542. } else {
  543. matchdg = cur;
  544. alias T Rest1;
  545. alias t rest1;
  546. }
  547. static if (Rest1.length == 1 && is(typeof(*rest1[0])) && !is(MustType))
  548. alias typeof(*rest1[0]) MustType;
  549. static if (Rest1.length == 1 && is(Rest1[0] == delegate)) {
  550. alias Params!(Rest1[0])[0] MustType;
  551. auto callback = rest1[0];
  552. }
  553. static if (Rest1.length == 1 && is(typeof(*rest1[0])) || is(typeof(callback))) {
  554. auto backup = text;
  555. static if (is(typeof(callback))) {
  556. // if the type doesn't match, error?
  557. auto res = dg(text, matchdg);
  558. if (!res) text = backup;
  559. else {
  560. auto t = fastcast!(MustType) (res);
  561. if (!t) backup.failparse("Type (", MustType.stringof, ") not matched: ", res);
  562. callback(t);
  563. }
  564. return fastcast!(Object) (res);
  565. } else {
  566. *rest1[0] = fastcast!(typeof(*rest1[0])) (dg(text, matchdg));
  567. if (!*rest1[0]) text = backup;
  568. return fastcast!(Object) (*rest1[0]);
  569. }
  570. } else {
  571. static assert(!Rest1.length, "Left: "~Rest1.stringof~" of "~T.stringof);
  572. return dg(text, matchdg);
  573. }
  574. }
  575. }
  576. // used to be class, flattened for speed
  577. struct Parser {
  578. string key, id;
  579. Object delegate
  580. (ref string text,
  581. ParseCb cont,
  582. ParseCb rest) match;
  583. }
  584. // stuff that it's unsafe to memoize due to side effects
  585. bool delegate(string)[] globalStateMatchers;
  586. int cachedepth;
  587. int[] cachecount;
  588. static this() { cachecount = new int[8]; }
  589. void delegate() pushCache() {
  590. cachedepth ++;
  591. if (cachecount.length <= cachedepth) cachecount.length = cachedepth + 1;
  592. cachecount[cachedepth] ++;
  593. return { cachedepth --; };
  594. }
  595. struct Stash(T) {
  596. const StaticSize = 4;
  597. T[StaticSize] static_backing_array;
  598. int[StaticSize] static_backing_lastcount;
  599. T[] backing_array;
  600. int[] backing_lastcount;
  601. T* getPointerInternal(ref int* countp) {
  602. int i = cachedepth;
  603. if (i < StaticSize) {
  604. countp = &static_backing_lastcount[i];
  605. return &static_backing_array[i];
  606. }
  607. i -= StaticSize;
  608. if (!backing_array.length) {
  609. backing_array .length = 1;
  610. backing_lastcount.length = 1;
  611. }
  612. while (i >= backing_array.length) {
  613. backing_array .length = backing_array .length * 2;
  614. backing_lastcount.length = backing_lastcount.length * 2;
  615. }
  616. countp = &backing_lastcount[i];
  617. return &backing_array[i];
  618. }
  619. T* getPointer() {
  620. int* countp;
  621. auto p = getPointerInternal(countp);
  622. int cmp = cachecount[cachedepth];
  623. if (*countp != cmp) {
  624. *countp = cmp;
  625. *p = Init!(T);
  626. }
  627. return p;
  628. }
  629. }
  630. bool[string] unreserved;
  631. static this() {
  632. unreserved["enum"] = true;
  633. unreserved["sum"] = true;
  634. unreserved["prefix"] = true;
  635. unreserved["suffix"] = true;
  636. unreserved["vec"] = true;
  637. unreserved["context"] = true;
  638. unreserved["do"] = true;
  639. }
  640. void reserve(string key) {
  641. if (key in unreserved) return;
  642. reserved[key] = true;
  643. }
  644. template DefaultParserImpl(alias Fn, string Id, bool Memoize, string Key, bool MemoizeForever) {
  645. final class DefaultParserImpl {
  646. Object info;
  647. bool dontMemoMe;
  648. static this() {
  649. static if (Key) reserve(Key);
  650. }
  651. this(Object obj = null) {
  652. info = obj;
  653. foreach (dg; globalStateMatchers)
  654. if (dg(Id)) { dontMemoMe = true; break; }
  655. }
  656. Parser genParser() {
  657. Parser res;
  658. res.key = Key;
  659. res.id = Id;
  660. res.match = &match;
  661. return res;
  662. }
  663. Object fnredir(ref string text, ParseCb cont, ParseCb rest) {
  664. static if (is(typeof((&Fn)(info, text, cont, rest))))
  665. return Fn(info, text, cont, rest);
  666. else static if (is(typeof((&Fn)(info, text, cont, rest))))
  667. return Fn(info, text, cont, rest);
  668. else static if (is(typeof((&Fn)(text, cont, rest))))
  669. return Fn(text, cont, rest);
  670. else
  671. return Fn(text, cont, rest);
  672. }
  673. static if (!Memoize) {
  674. Object match(ref string text, ParseCb cont, ParseCb rest) {
  675. return fnredir(text, cont, rest);
  676. }
  677. } else {
  678. // Stuple!(Object, char*)[char*] cache;
  679. static if (MemoizeForever) {
  680. Stash!(Stuple!(Object, char*)[char*]) cachestash;
  681. } else {
  682. Stash!(SpeedCache) cachestash;
  683. }
  684. Object match(ref string text, ParseCb cont, ParseCb rest) {
  685. auto t2 = text;
  686. if (.accept(t2, "]")) return null; // never a valid start
  687. if (dontMemoMe) {
  688. static if (Key) if (!.accept(t2, Key)) return null;
  689. auto res = fnredir(t2, cont, rest);
  690. if (res) text = t2;
  691. return res;
  692. }
  693. auto ptr = t2.ptr;
  694. auto cache = cachestash.getPointer();
  695. if (auto p = ptr in *cache) {
  696. if (!p._1) text = null;
  697. else text = p._1[0 .. t2.ptr + t2.length - p._1];
  698. return p._0;
  699. }
  700. static if (Key) if (!.accept(t2, Key)) return null;
  701. auto res = fnredir(t2, cont, rest);
  702. (*cache)[ptr] = stuple(res, t2.ptr);
  703. if (res) text = t2;
  704. return res;
  705. }
  706. }
  707. }
  708. }
  709. import tools.threads, tools.compat: rfind;
  710. static this() { New(sync); }
  711. template DefaultParser(alias Fn, string Id, string Prec = null, string Key = null, bool Memoize = true, bool MemoizeForever = false) {
  712. static this() {
  713. static if (Prec) addParser((new DefaultParserImpl!(Fn, Id, Memoize, Key, MemoizeForever)).genParser(), Prec);
  714. else addParser((new DefaultParserImpl!(Fn, Id, Memoize, Key, MemoizeForever)).genParser());
  715. }
  716. }
  717. import tools.log;
  718. struct SplitIter(T) {
  719. T data, sep;
  720. T front, frontIncl, all;
  721. T pop() {
  722. for (int i = 0; i <= cast(int) data.length - cast(int) sep.length; ++i) {
  723. if (data[i .. i + sep.length] == sep) {
  724. auto res = data[0 .. i];
  725. data = data[i + sep.length .. $];
  726. front = all[0 .. $ - data.length - sep.length - res.length];
  727. frontIncl = all[0 .. front.length + res.length];
  728. return res;
  729. }
  730. }
  731. auto res = data;
  732. data = null;
  733. front = null;
  734. frontIncl = all;
  735. return res;
  736. }
  737. }
  738. SplitIter!(T) splitIter(T)(T d, T s) {
  739. SplitIter!(T) res;
  740. res.data = d; res.sep = s;
  741. res.all = res.data;
  742. return res;
  743. }
  744. void delegate(string) justAcceptedCallback;
  745. int[string] idepth;
  746. Parser[] parsers;
  747. Parser[][bool delegate(string)] prefiltered_parsers;
  748. string[string] prec; // precedence mapping
  749. Object sync;
  750. void addPrecedence(string id, string val) { synchronized(sync) { prec[id] = val; } }
  751. string lookupPrecedence(string id) {
  752. synchronized(sync)
  753. if (auto p = id in prec) return *p;
  754. return null;
  755. }
  756. import tools.compat: split, join;
  757. string dumpInfo() {
  758. if (listModified) resort;
  759. string res;
  760. int maxlen;
  761. foreach (parser; parsers) {
  762. auto id = parser.id;
  763. if (id.length > maxlen) maxlen = id.length;
  764. }
  765. auto reserved = maxlen + 2;
  766. string[] prevId;
  767. foreach (parser; parsers) {
  768. auto id = parser.id;
  769. auto n = id.dup.split(".");
  770. foreach (i, str; n[0 .. min(n.length, prevId.length)]) {
  771. if (str == prevId[i]) foreach (ref ch; str) ch = ' ';
  772. }
  773. prevId = id.split(".");
  774. res ~= n.join(".");
  775. if (auto p = id in prec) {
  776. for (int i = 0; i < reserved - id.length; ++i)
  777. res ~= " ";
  778. res ~= ":" ~ *p;;
  779. }
  780. res ~= "\n";
  781. }
  782. return res;
  783. }
  784. bool idSmaller(Parser pa, Parser pb) {
  785. auto a = splitIter(pa.id, "."), b = splitIter(pb.id, ".");
  786. string ap, bp;
  787. while (true) {
  788. ap = a.pop(); bp = b.pop();
  789. if (!ap && !bp) return false; // equal
  790. if (ap && !bp) return true; // longer before shorter
  791. if (bp && !ap) return false;
  792. if (ap == bp) continue; // no information here
  793. auto aprec = lookupPrecedence(a.frontIncl), bprec = lookupPrecedence(b.frontIncl);
  794. if (!aprec && bprec)
  795. throw new Exception("Patterns "~a.frontIncl~" vs. "~b.frontIncl~": first is missing precedence info! ");
  796. if (!bprec && aprec)
  797. throw new Exception("Patterns "~a.frontIncl~" vs. "~b.frontIncl~": second is missing precedence info! ");
  798. if (!aprec && !bprec) return ap < bp; // lol
  799. if (aprec == bprec) throw new Exception("Error: patterns '"~a.frontIncl~"' and '"~b.frontIncl~"' have the same precedence! ");
  800. for (int i = 0; i < min(aprec.length, bprec.length); ++i) {
  801. // precedence needs to be _inverted_, ie. lower-precedence rules must come first
  802. // this is because "higher-precedence" means it binds tighter.
  803. // if (aprec[i] > bprec[i]) return true;
  804. // if (aprec[i] < bprec[i]) return false;
  805. if (aprec[i] < bprec[i]) return true;
  806. if (aprec[i] > bprec[i]) return false;
  807. }
  808. bool flip;
  809. // this gets a bit hairy
  810. // 50 before 5, 509 before 5, but 51 after 5.
  811. if (aprec.length < bprec.length) { swap(aprec, bprec); flip = true; }
  812. if (aprec[bprec.length] != '0') return flip;
  813. return !flip;
  814. }
  815. }
  816. bool listModified;
  817. void addParser(Parser p) {
  818. parsers ~= p;
  819. listModified = true;
  820. }
  821. void addParser(Parser p, string pred) {
  822. addParser(p);
  823. addPrecedence(p.id, pred);
  824. }
  825. import quicksort: qsort_ = qsort;
  826. import tools.time: sec, µsec;
  827. void resort() {
  828. parsers.qsort_(&idSmaller);
  829. prefiltered_parsers = null; // invalid; regenerate
  830. rulefuns = null; // also reset this to regenerate the closures
  831. listModified = false;
  832. }
  833. Object parse(ref string text, bool delegate(string) cond,
  834. int offs = 0)
  835. {
  836. if (verboseParser) return _parse!(true).parse(text, cond, offs);
  837. else return _parse!(false).parse(text, cond, offs);
  838. }
  839. string condStr;
  840. Object parse(ref string text, string cond) {
  841. condStr = cond;
  842. scope(exit) condStr = null;
  843. try return parse(text, matchrule(cond));
  844. catch (ParseEx pe) { pe.addRule(cond); throw pe; }
  845. catch (Exception ex) throw new Exception(Format("Matching rule '"~cond~"': ", ex));
  846. }
  847. template _parse(bool Verbose) {
  848. Object parse(ref string text, bool delegate(string) cond,
  849. int offs = 0) {
  850. if (!text.length) return null;
  851. if (listModified) resort;
  852. bool matched;
  853. static if (Verbose)
  854. logln("BEGIN PARSE '", text.nextText(16), "'");
  855. ParseCb cont = void, rest = void;
  856. cont.dg = null; // needed for null check further in
  857. int i = void;
  858. Object cont_dg(ref string text, bool delegate(string) cond) {
  859. return parse(text, cond, offs + i + 1); // same verbosity - it's a global flag
  860. }
  861. Object rest_dg(ref string text, bool delegate(string) cond) {
  862. return parse(text, cond, 0);
  863. }
  864. const ProfileMode = false;
  865. static if (ProfileMode) {
  866. auto start_time = µsec();
  867. auto start_text = text;
  868. static float min_speed = float.infinity;
  869. scope(exit) if (text.ptr !is start_text.ptr) {
  870. auto delta = (µsec() - start_time) / 1000f;
  871. auto speed = (text.ptr - start_text.ptr) / delta;
  872. if (speed < min_speed) {
  873. min_speed = speed;
  874. if (delta > 5) {
  875. logln("New worst slowdown: '",
  876. condStr, "' at '", start_text.nextText(), "'"
  877. ": ", speed, " characters/ms "
  878. "(", (text.ptr - start_text.ptr), " in ", delta, "ms). ");
  879. }
  880. }
  881. // min_speed *= 1.01;
  882. }
  883. }
  884. Parser[] pref_parsers;
  885. if (auto p = cond in prefiltered_parsers) pref_parsers = *p;
  886. else {
  887. foreach (parser; parsers) if (cond(parser.id)) pref_parsers ~= parser;
  888. prefiltered_parsers[cond] = pref_parsers;
  889. }
  890. auto tx = text;
  891. tx.eatComments();
  892. if (!tx.length) return null;
  893. bool tried;
  894. // logln("use ", pref_parsers /map/ ex!("p -> p.id"), " [", offs, "..", pref_parsers.length, "]");
  895. foreach (j, ref parser; pref_parsers[offs..$]) {
  896. i = j;
  897. // auto tx = text;
  898. // skip early. accept is slightly faster than cond.
  899. // if (parser.key && !.accept(tx, parser.key)) continue;
  900. if (parser.key.ptr) {
  901. auto pk = parser.key;
  902. if (tx.length < pk.length) continue;
  903. if (pk.length >= 4) {
  904. if (*cast(int*) pk.ptr != *cast(int*) tx.ptr) continue;
  905. }
  906. if (tx[0..pk.length] != pk) continue;
  907. }
  908. // rulestack ~= stuple(id, text);
  909. // scope(exit) rulestack = rulestack[0 .. $-1];
  910. auto id = parser.id;
  911. static if (Verbose) {
  912. if (!(id in idepth)) idepth[id] = 0;
  913. idepth[id] ++;
  914. scope(exit) idepth[id] --;
  915. logln("TRY PARSER [", idepth[id], " ", id, "] for '", text.nextText(16), "'");
  916. }
  917. matched = true;
  918. if (!cont.dg) {
  919. cont.dg = &cont_dg;
  920. cont.cur = cond;
  921. rest.dg = &rest_dg;
  922. rest.cur = cond;
  923. }
  924. auto backup = text;
  925. if (auto res = parser.match(text, cont, rest)) {
  926. static if (Verbose) logln(" PARSER [", idepth[id], " ", id, "] succeeded with ", res, ", left '", text.nextText(16), "'");
  927. if (justAcceptedCallback) justAcceptedCallback(text);
  928. return res;
  929. } else {
  930. static if (Verbose) logln(" PARSER [", idepth[id], " ", id, "] failed");
  931. }
  932. text = backup;
  933. }
  934. return null;
  935. }
  936. // version(Windows) { } else pragma(set_attribute, parse, optimize("-O3", "-fno-tree-vrp"));
  937. }
  938. bool test(T)(T t) { if (t) return true; else return false; }
  939. void noMoreHeredoc(string text) {
  940. if (text.accept("<<"))
  941. text.failparse("Please switch from heredoc to {}!");
  942. }
  943. string startsWith(string text, string match)
  944. {
  945. if (text.length < match.length) return null;
  946. // if (!match.length) return text; // doesn't actually happen
  947. if (!text.ptr[0..match.length].faststreq_samelen_nonz(match)) return null;
  948. return text.ptr[match.length .. text.length];
  949. }
  950. string hex(ubyte u) {
  951. auto hs = "0123456789ABCDEF";
  952. return ""~hs[u>>8]~hs[u&0xf];
  953. }
  954. string cleanup(string s) {
  955. string res;
  956. foreach (b; cast(ubyte[]) s) {
  957. if (b >= 'a' && b <= 'z' || b >= 'A' && b <= 'Z' || b >= '0' && b <= '9' || b == '_') {
  958. res ~= b;
  959. } else {
  960. res ~= "_"~hex(b)~"_";
  961. }
  962. }
  963. return res;
  964. }
  965. bool acceptLeftArrow(ref string text) {
  966. return text.accept("<-") || text.accept("←");
  967. }
  968. string filenamepart(string path) {
  969. if (path.find("/") == -1) return path;
  970. auto rpos = path.rfind("/");
  971. return path[rpos + 1 .. $];
  972. }
  973. string dirpart(string path) {
  974. if (path.find("/") == -1) return null;
  975. auto rpos = path.rfind("/");
  976. return path[0 .. rpos];
  977. }