PageRenderTime 23ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/d/scrapple/dparser/tag/version_1/convert.d

https://github.com/alvatar/snippets
D | 637 lines | 483 code | 141 blank | 13 comment | 44 complexity | 6ccb14997ed40ad0eeb636a5d2754e76 MD5 | raw file
  1. import std.stdio;
  2. import std.string;
  3. import syntax.dparse;
  4. class data : IParser
  5. {
  6. char[] dat;
  7. uint i =0;
  8. uint pos() {return i;}
  9. void pos(uint j){ i = j;}
  10. void mark()
  11. {
  12. writef(">>\"%s\"\n", dat[i..($-i)>60? i+60: $]);
  13. }
  14. }
  15. class GrammarParser
  16. {
  17. PObject Terminal(char[] str : "NAME")(IParser p)
  18. {
  19. debug(dParse_runtime) writef("in %s\n", str);
  20. auto lex = cast(data) p;
  21. assert(lex !is null);
  22. int i = lex.i;
  23. for({} i < lex.dat.length; i++)
  24. switch(lex.dat[i])
  25. {
  26. case ' ', '\t', '\n', '\r': continue;
  27. case 'a','b','c','d','e','f','g','h','i','j','k','l','m','n',
  28. 'o','p','q','r','s','t','u','v','w','x','y','z',
  29. 'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
  30. 'O','P','Q','R','S','T','U','V','W','X','Y','Z',
  31. '_':
  32. goto more;
  33. default:
  34. debug(dParse_runtime) writef("fail at %s:%d with %x\n", str,__LINE__, lex.dat[i]);
  35. return new PObjectFail();
  36. }
  37. debug(dParse_runtime) writef("fail at %s:%d with EOF\n", str,__LINE__);
  38. return new PObjectFail();
  39. more:
  40. int start = i;
  41. i++;
  42. for({} i < lex.dat.length; i++)
  43. if(!(
  44. ('a' <= lex.dat[i] && lex.dat[i] <= 'z') ||
  45. ('A' <= lex.dat[i] && lex.dat[i] <= 'Z') ||
  46. ('0' <= lex.dat[i] && lex.dat[i] <= '9') ||
  47. (lex.dat[i] == '_')
  48. ))
  49. break;
  50. lex.i = i;
  51. assert(i <= lex.dat.length);
  52. return new PObjectBox!(char[])(lex.dat[start..i]);
  53. }
  54. PObject Terminal(char[] str : "COLLON")(IParser p)
  55. {
  56. debug(dParse_runtime) writef("in %s\n", str);
  57. auto lex = cast(data) p;
  58. assert(lex !is null);
  59. foreach(int i, char c; lex.dat[lex.i..$])
  60. switch(c)
  61. {
  62. case ':':
  63. lex.i += 1+i;
  64. return new PObjectPass();
  65. case ' ', '\t', '\n', '\r': continue;
  66. default: return new PObjectFail();
  67. }
  68. return new PObjectFail();
  69. }
  70. PObject Terminal(char[] str : "STAR")(IParser p)
  71. {
  72. debug(dParse_runtime) writef("in %s\n", str);
  73. auto lex = cast(data) p;
  74. assert(lex !is null);
  75. foreach(int i, char c; lex.dat[lex.i..$])
  76. switch(c)
  77. {
  78. case '*':
  79. lex.i += 1+i;
  80. return new PObjectPass();
  81. case ' ', '\t', '\n', '\r': continue;
  82. default: return new PObjectFail();
  83. }
  84. return new PObjectFail();
  85. }
  86. PObject Terminal(char[] str : "PLUS")(IParser p)
  87. {
  88. debug(dParse_runtime) writef("in %s\n", str);
  89. auto lex = cast(data) p;
  90. assert(lex !is null);
  91. foreach(int i, char c; lex.dat[lex.i..$])
  92. switch(c)
  93. {
  94. case '+':
  95. lex.i += 1+i;
  96. return new PObjectPass();
  97. case ' ', '\t', '\n', '\r': continue;
  98. default: return new PObjectFail();
  99. }
  100. return new PObjectFail();
  101. }
  102. PObject Terminal(char[] str : "QMARK")(IParser p)
  103. {
  104. debug(dParse_runtime) writef("in %s\n", str);
  105. auto lex = cast(data) p;
  106. assert(lex !is null);
  107. foreach(int i, char c; lex.dat[lex.i..$])
  108. switch(c)
  109. {
  110. case '?':
  111. lex.i += 1+i;
  112. return new PObjectPass();
  113. case ' ', '\t', '\n', '\r': continue;
  114. default: return new PObjectFail();
  115. }
  116. return new PObjectFail();
  117. }
  118. PObject Terminal(char[] str : "PIPE")(IParser p)
  119. {
  120. debug(dParse_runtime) writef("in %s\n", str);
  121. auto lex = cast(data) p;
  122. assert(lex !is null);
  123. foreach(int i, char c; lex.dat[lex.i..$])
  124. {
  125. switch(c)
  126. {
  127. case '|':
  128. lex.i += 1+i;
  129. return new PObjectPass();
  130. case ' ', '\t', '\n', '\r': continue;
  131. default: return new PObjectFail();
  132. }
  133. }
  134. return new PObjectFail();
  135. }
  136. PObject Terminal(char[] str : "SEMICOLLON")(IParser p)
  137. {
  138. debug(dParse_runtime) writef("in %s\n", str);
  139. auto lex = cast(data) p;
  140. assert(lex !is null);
  141. foreach(int i, char c; lex.dat[lex.i..$])
  142. {
  143. switch(c)
  144. {
  145. case ';':
  146. lex.i += 1+i;
  147. return new PObjectPass();
  148. case ' ', '\t', '\n', '\r': continue;
  149. default: return new PObjectFail();
  150. }
  151. }
  152. return new PObjectFail();
  153. }
  154. PObject Terminal(char[] str : "SLASH")(IParser p)
  155. {
  156. debug(dParse_runtime) writef("in %s\n", str);
  157. auto lex = cast(data) p;
  158. assert(lex !is null);
  159. foreach(int i, char c; lex.dat[lex.i..$])
  160. switch(c)
  161. {
  162. case '/':
  163. lex.i += 1+i;
  164. return new PObjectPass();
  165. case ' ', '\t', '\n', '\r': continue;
  166. default: return new PObjectFail();
  167. }
  168. return new PObjectFail();
  169. }
  170. template Terminal(char[] str ) { PObject Terminal(IParser i); pragma(msg, "<need type=ter>"~str~"</need>"); }
  171. template Action(char[]str) { PObject Action(PObject[] p); pragma(msg, "<need type=act>"~str~"</need>"); }
  172. PObject Action(char[]s:"pass1")(PObject[1]p){return p[0];}
  173. PObject Action(char[]s:"pass2nd")(PObject[2]p){return p[1];}
  174. PObject Action(char[]s:"formRule")(PObject[5]p)
  175. {
  176. // NAME COLLON Option ElseOption* SEMICOLLON
  177. auto name = cast(PObjectBox!(char[])) p[0];
  178. auto first = cast(PObjectBox!(Opt)) p[2];
  179. auto rest = cast(PObjectSet) p[3];
  180. assert(name !is null, p[0].BaseName ~" != " ~ typeof(name).stringof);
  181. assert(first !is null, p[2].BaseName ~" != " ~ typeof(first).stringof);
  182. assert(rest !is null, p[3].BaseName ~" != " ~ typeof(rest).stringof);
  183. GRule ret;
  184. ret.name = name.Get;
  185. ret.opts.length = 1 + rest.Count();
  186. ret.opts[0] = first.Get;
  187. foreach(int i, po; rest.get)
  188. {
  189. auto box = cast(PObjectBox!(Opt))po;
  190. assert(box !is null);
  191. ret.opts[i+1] = box.Get;
  192. }
  193. return new PObjectBox!(GRule)(ret);
  194. }
  195. PObject Action(char[]s:"formOpt")(PObject[3]p)
  196. {
  197. // NAME SLASH Parts*
  198. auto name = cast(PObjectBox!(char[])) p[0];
  199. auto rest = cast(PObjectSet) p[2];
  200. Opt ret;
  201. ret.name = name.Get;
  202. ret.parts.length = rest.Count();
  203. foreach(int i, po; rest.get)
  204. {
  205. auto box = cast(PObjectBox!(Part))po;
  206. assert(box !is null);
  207. ret.parts[i] = box.Get;
  208. }
  209. return new PObjectBox!(Opt)(ret);
  210. }
  211. PObject Action(char[]s:"any")(PObject[2]p)
  212. {
  213. auto name = cast(PObjectBox!(char[])) p[0];
  214. Part ret;
  215. ret.name = name.Get;
  216. ret.type = Part.Type.any;
  217. return new PObjectBox!(Part)(ret);
  218. }
  219. PObject Action(char[]s:"many")(PObject[2]p)
  220. {
  221. auto name = cast(PObjectBox!(char[])) p[0];
  222. Part ret;
  223. ret.name = name.Get;
  224. ret.type = Part.Type.many;
  225. return new PObjectBox!(Part)(ret);
  226. }
  227. PObject Action(char[]s:"maybe")(PObject[2]p)
  228. {
  229. auto name = cast(PObjectBox!(char[])) p[0];
  230. Part ret;
  231. ret.name = name.Get;
  232. ret.type = Part.Type.maybe;
  233. return new PObjectBox!(Part)(ret);
  234. }
  235. PObject Action(char[]s:"one")(PObject[1]p)
  236. {
  237. auto name = cast(PObjectBox!(char[])) p[0];
  238. Part ret;
  239. ret.name = name.Get;
  240. ret.type = Part.Type.one;
  241. return new PObjectBox!(Part)(ret);
  242. }
  243. static const char[] gram =
  244. "
  245. Gram : pass1 / Rule+ ;
  246. Rule : formRule / NAME COLLON Option ElseOption* RuleEnd ;
  247. RuleEnd :
  248. pass1 / PIPE |
  249. pass1 / SEMICOLLON;
  250. Option : formOpt / NAME SLASH Parts*;
  251. ElseOption : pass2nd / PIPE Option ;
  252. Parts:
  253. any / NAME STAR |
  254. many / NAME PLUS |
  255. maybe / NAME QMARK |
  256. one / NAME;
  257. ";
  258. static const char[] mix = MakeMixin!("Gram",ReduceWhite(gram));
  259. //pragma(msg,mix);
  260. mixin(mix);
  261. }
  262. struct GRule
  263. {
  264. char[] name;
  265. Opt[] opts;
  266. char[] toString()
  267. {
  268. char[] ret = "";
  269. // writef("%s > %d\n", name, opts.length);
  270. foreach(Opt p; opts)
  271. ret ~= p.toString ~ " |\n";
  272. // writef("--%s==\n\n", ret);
  273. return name ~ " : \n" ~ ret[0..$-3] ~ " ;\n\n";
  274. }
  275. }
  276. struct Opt
  277. {
  278. char[] name;
  279. Part[] parts;
  280. char[] toString()
  281. {
  282. char[] ret = "";
  283. foreach(Part p; parts)
  284. ret ~= p.toString ~ " ";
  285. return \t ~ name ~ " / " ~ ret;
  286. }
  287. }
  288. struct Part
  289. {
  290. char[] name;
  291. enum Type { any=0, many=1, maybe=2, one=3 }
  292. Type type;
  293. char[] toString() { return name ~ "*+? "[type]; }
  294. }
  295. import std.file;
  296. import std.cstream;
  297. void main(char[][] argv)
  298. {
  299. bool
  300. printOrg = false,
  301. ConvertRec = false,
  302. TestRec = false,
  303. DmpFinal = true;
  304. char[] filename = "";
  305. foreach(arg;argv[1..$])
  306. if(arg.length >= 2)
  307. switch(arg[0..2])
  308. {
  309. case "-o": printOrg = (arg[2..$] != "-"); break;
  310. case "-c": ConvertRec = (arg[2..$] != "-"); break;
  311. case "-t": TestRec = (arg[2..$] != "-"); break;
  312. case "-f": DmpFinal = (arg[2..$] != "-"); break;
  313. default: filename = arg; break;
  314. }
  315. else
  316. {filename = arg;}
  317. data d = new data;
  318. if(filename != "")
  319. d.dat = ReduceWhite(cast(char[])std.file.read(filename));
  320. else
  321. {
  322. //d.dat = ReduceWhite(GrammarParser.gram.dup);
  323. //d.dat = ReduceWhite("AttributeSpecifier: dummy / Attribute opCollin | dummy / Attribute DeclarationBlock | ;".dup);
  324. //d.dat = "Foo: A1 / Foo B | A2 / Foo C D | A3 / Foo E | A4 / F | A5 / G H ;".dup;
  325. assert(false, "no file");
  326. }
  327. GrammarParser gp = new GrammarParser;
  328. auto g = gp.Parser(d);
  329. assert(g !is null);
  330. assert(!g.fail);
  331. GRule[] inp;
  332. GRule[] outp;
  333. auto parsed = (cast(PObjectSet)g).get;
  334. inp.length = parsed.length;
  335. //writef("Parsing returned: %s\n", g.BaseName);
  336. foreach(ind, r;parsed)
  337. {
  338. //writef(" Object #%d is: %s\n", ind, r.BaseName);
  339. inp[ind] = (cast(PObjectBox!(GRule ))r).Get;
  340. }
  341. if(printOrg)
  342. {
  343. foreach(r; inp) writef(">>%s", r.toString);
  344. }
  345. bool b;
  346. if(ConvertRec)
  347. do
  348. {
  349. b = false;
  350. b |= Convert2(inp);
  351. int index = 0;
  352. foreach(r; inp) b |= Convert(r, outp, index);
  353. inp = outp[0..index];
  354. } while(b)
  355. if(TestRec)
  356. {
  357. GRule*[char[]] rules;
  358. int[GRule*] state;
  359. foreach(inout r; inp)
  360. {
  361. rules[r.name] = &r;
  362. state[&r] = 0;
  363. }
  364. bool newc = true;
  365. char[] breaker;
  366. int Walk(GRule* r)
  367. {
  368. int max = state[r];
  369. if(max != 0) return max;
  370. state[r] = -1;
  371. foreach(o; r.opts)
  372. {
  373. GRule* first;
  374. //derr.writef("%s\n", o.toString);
  375. assert(o.parts.length > 0, r.name~":"~o.name~" has no parts");
  376. if(auto s = o.parts[0].name in rules)
  377. first = *s;
  378. else
  379. continue;
  380. int step = Walk(rules[first.name]);
  381. if(step == -1)
  382. {
  383. newc = false;
  384. if(breaker == null)
  385. breaker = first.name;
  386. writef("%s<-", first.name);
  387. if(breaker == r.name)
  388. {
  389. breaker= null;
  390. writef("%s\n", r.name);
  391. continue;
  392. }
  393. return -1;
  394. }
  395. max = max > step ? max : step;
  396. }
  397. max++;
  398. state[r] = max;
  399. return max;
  400. }
  401. int max = 0;
  402. foreach(inout r; inp)
  403. {
  404. int step = Walk(&r);
  405. max = max > step ? max : step;
  406. }
  407. if(!newc)
  408. {
  409. writef("\nfound cycle\n\n");
  410. return;
  411. }
  412. }
  413. if(DmpFinal)
  414. {
  415. foreach(r; inp) writef("%s", r.toString);
  416. }
  417. }
  418. bool Convert(inout GRule gr, inout GRule[] outp, inout int index)
  419. {
  420. foreach(Opt o; gr.opts)
  421. if(o.parts.length > 0 && o.parts[0].name == gr.name)
  422. goto patch;
  423. if(outp.length <= index)
  424. outp.length = outp.length + 10;
  425. outp[index] = gr;
  426. index++;
  427. return false;
  428. patch:
  429. GRule first, rest;
  430. rest.name = "__LRT_"~gr.name;
  431. first.name = gr.name;
  432. Part p;
  433. p.name = rest.name;
  434. p.type = Part.Type.any;
  435. Opt n;
  436. foreach(Opt o; gr.opts)
  437. {
  438. //writef("%s\n", o.name);
  439. if(o.parts.length > 0 && o.parts[0].name == gr.name)
  440. { // recusive
  441. n.name = format("$(L,%d,%s)",o.parts.length-1,o.name);
  442. n.parts = o.parts[1..$];
  443. rest.opts ~= n;
  444. }
  445. else
  446. { // non recursive
  447. n.name = format("$(T,%d,%s)",o.parts.length,o.name);
  448. n.parts = o.parts;
  449. n.parts ~= p;
  450. first.opts ~= n;
  451. }
  452. }
  453. if(outp.length <= index+1)
  454. outp.length = outp.length + 10;
  455. outp[index] = first;
  456. index++;
  457. outp[index] = rest;
  458. index++;
  459. return true;
  460. }
  461. bool Convert2(GRule[] outp)
  462. {
  463. int[char[]] map;
  464. foreach(int i, GRule g; outp) map[g.name] = i;
  465. bool ret = false;
  466. outer: foreach(first; outp)
  467. {
  468. // foreach(o; first.opts) if(o.parts[0].name == first.name) continue outer; writef("%s not direclt LR\n", first.name);
  469. Opt[] add, tmp;
  470. mid: foreach(inout fo; first.opts)
  471. {
  472. if(fo.parts.length != 1) continue mid;
  473. if(!(fo.parts[0].name in map)) continue mid;
  474. auto second = &outp[map[fo.parts[0].name]];
  475. if(second.opts.length == 0) continue mid;
  476. foreach(Opt so; second.opts)
  477. if(so.parts[0].name != first.name)
  478. continue mid;
  479. fo.name = format("$(N,%d,%s,%s)",second.opts[0].parts.length ,second.opts[0].name, fo.name);
  480. fo.parts = second.opts[0].parts;
  481. tmp.length = second.opts.length - 1;
  482. foreach(i, so; second.opts[1..$])
  483. {
  484. tmp[i].name = format("$(N,%d,%s,%s)",so.parts.length ,so.name, fo.name);
  485. tmp[i].parts = so.parts;
  486. }
  487. add ~= tmp;
  488. }
  489. if(add.length > 0)
  490. {
  491. ret |= true;
  492. first.opts ~= add;
  493. }
  494. }
  495. return ret;
  496. }