PageRenderTime 71ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/lpeg.c

https://gitlab.com/g10h4ck/nmap-gsoc2015
C | 3706 lines | 2569 code | 513 blank | 624 comment | 489 complexity | 8375c196726095a13fc4ca029271b715 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-2.0, Apache-2.0, LGPL-2.0, LGPL-2.1, MIT
  1. /*
  2. ** $Id: lptypes.h,v 1.8 2013/04/12 16:26:38 roberto Exp $
  3. ** LPeg - PEG pattern matching for Lua
  4. ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
  5. ** written by Roberto Ierusalimschy
  6. */
  7. #if !defined(lptypes_h)
  8. #define lptypes_h
  9. #if !defined(LPEG_DEBUG)
  10. #define NDEBUG
  11. #endif
  12. #include <assert.h>
  13. #include <limits.h>
  14. #include "lua.h"
  15. #define VERSION "0.12"
  16. #define PATTERN_T "lpeg-pattern"
  17. #define MAXSTACKIDX "lpeg-maxstack"
  18. /*
  19. ** compatibility with Lua 5.2
  20. */
  21. #if (LUA_VERSION_NUM == 502)
  22. #undef lua_equal
  23. #define lua_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
  24. #undef lua_getfenv
  25. #define lua_getfenv lua_getuservalue
  26. #undef lua_setfenv
  27. #define lua_setfenv lua_setuservalue
  28. #undef lua_objlen
  29. #define lua_objlen lua_rawlen
  30. #undef luaL_register
  31. #define luaL_register(L,n,f) \
  32. { if ((n) == NULL) luaL_setfuncs(L,f,0); else luaL_newlib(L,f); }
  33. #endif
  34. /* default maximum size for call/backtrack stack */
  35. #if !defined(MAXBACK)
  36. #define MAXBACK 100
  37. #endif
  38. /* maximum number of rules in a grammar */
  39. #define MAXRULES 200
  40. /* initial size for capture's list */
  41. #define INITCAPSIZE 32
  42. /* index, on Lua stack, for subject */
  43. #define SUBJIDX 2
  44. /* number of fixed arguments to 'match' (before capture arguments) */
  45. #define FIXEDARGS 3
  46. /* index, on Lua stack, for capture list */
  47. #define caplistidx(ptop) ((ptop) + 2)
  48. /* index, on Lua stack, for pattern's ktable */
  49. #define ktableidx(ptop) ((ptop) + 3)
  50. /* index, on Lua stack, for backtracking stack */
  51. #define stackidx(ptop) ((ptop) + 4)
  52. typedef unsigned char byte;
  53. #define BITSPERCHAR 8
  54. #define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1)
  55. typedef struct Charset {
  56. byte cs[CHARSETSIZE];
  57. } Charset;
  58. #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
  59. /* access to charset */
  60. #define treebuffer(t) ((byte *)((t) + 1))
  61. /* number of slots needed for 'n' bytes */
  62. #define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1)
  63. /* set 'b' bit in charset 'cs' */
  64. #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
  65. /*
  66. ** in capture instructions, 'kind' of capture and its offset are
  67. ** packed in field 'aux', 4 bits for each
  68. */
  69. #define getkind(op) ((op)->i.aux & 0xF)
  70. #define getoff(op) (((op)->i.aux >> 4) & 0xF)
  71. #define joinkindoff(k,o) ((k) | ((o) << 4))
  72. #define MAXOFF 0xF
  73. #define MAXAUX 0xFF
  74. /* maximum number of bytes to look behind */
  75. #define MAXBEHIND MAXAUX
  76. /* maximum size (in elements) for a pattern */
  77. #define MAXPATTSIZE (SHRT_MAX - 10)
  78. /* size (in elements) for an instruction plus extra l bytes */
  79. #define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
  80. /* size (in elements) for a ISet instruction */
  81. #define CHARSETINSTSIZE instsize(CHARSETSIZE)
  82. /* size (in elements) for a IFunc instruction */
  83. #define funcinstsize(p) ((p)->i.aux + 2)
  84. #define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
  85. #endif
  86. /*
  87. ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $
  88. */
  89. #if !defined(lptree_h)
  90. #define lptree_h
  91. /*
  92. ** types of trees
  93. */
  94. typedef enum TTag {
  95. TChar = 0, TSet, TAny, /* standard PEG elements */
  96. TTrue, TFalse,
  97. TRep,
  98. TSeq, TChoice,
  99. TNot, TAnd,
  100. TCall,
  101. TOpenCall,
  102. TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */
  103. TGrammar, /* sib1 is initial (and first) rule */
  104. TBehind, /* match behind */
  105. TCapture, /* regular capture */
  106. TRunTime /* run-time capture */
  107. } TTag;
  108. /* number of siblings for each tree */
  109. extern const byte numsiblings[];
  110. /*
  111. ** Tree trees
  112. ** The first sibling of a tree (if there is one) is immediately after
  113. ** the tree. A reference to a second sibling (ps) is its position
  114. ** relative to the position of the tree itself. A key in ktable
  115. ** uses the (unique) address of the original tree that created that
  116. ** entry. NULL means no data.
  117. */
  118. typedef struct TTree {
  119. byte tag;
  120. byte cap; /* kind of capture (if it is a capture) */
  121. unsigned short key; /* key in ktable for Lua data (0 if no key) */
  122. union {
  123. int ps; /* occasional second sibling */
  124. int n; /* occasional counter */
  125. } u;
  126. } TTree;
  127. /*
  128. ** A complete pattern has its tree plus, if already compiled,
  129. ** its corresponding code
  130. */
  131. typedef struct Pattern {
  132. union Instruction *code;
  133. int codesize;
  134. TTree tree[1];
  135. } Pattern;
  136. /* number of siblings for each tree */
  137. extern const byte numsiblings[];
  138. /* access to siblings */
  139. #define sib1(t) ((t) + 1)
  140. #define sib2(t) ((t) + (t)->u.ps)
  141. #endif
  142. /*
  143. ** $Id: lpcap.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
  144. */
  145. #if !defined(lpcap_h)
  146. #define lpcap_h
  147. /* kinds of captures */
  148. typedef enum CapKind {
  149. Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
  150. Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup
  151. } CapKind;
  152. typedef struct Capture {
  153. const char *s; /* subject position */
  154. short idx; /* extra info about capture (group name, arg index, etc.) */
  155. byte kind; /* kind of capture */
  156. byte siz; /* size of full capture + 1 (0 = not a full capture) */
  157. } Capture;
  158. typedef struct CapState {
  159. Capture *cap; /* current capture */
  160. Capture *ocap; /* (original) capture list */
  161. lua_State *L;
  162. int ptop; /* index of last argument to 'match' */
  163. const char *s; /* original string */
  164. int valuecached; /* value stored in cache slot */
  165. } CapState;
  166. int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
  167. int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
  168. int finddyncap (Capture *cap, Capture *last);
  169. #endif
  170. /*
  171. ** $Id: lpvm.h,v 1.2 2013/04/03 20:37:18 roberto Exp $
  172. */
  173. #if !defined(lpvm_h)
  174. #define lpvm_h
  175. /* Virtual Machine's instructions */
  176. typedef enum Opcode {
  177. IAny, /* if no char, fail */
  178. IChar, /* if char != aux, fail */
  179. ISet, /* if char not in buff, fail */
  180. ITestAny, /* in no char, jump to 'offset' */
  181. ITestChar, /* if char != aux, jump to 'offset' */
  182. ITestSet, /* if char not in buff, jump to 'offset' */
  183. ISpan, /* read a span of chars in buff */
  184. IBehind, /* walk back 'aux' characters (fail if not possible) */
  185. IRet, /* return from a rule */
  186. IEnd, /* end of pattern */
  187. IChoice, /* stack a choice; next fail will jump to 'offset' */
  188. IJmp, /* jump to 'offset' */
  189. ICall, /* call rule at 'offset' */
  190. IOpenCall, /* call rule number 'key' (must be closed to a ICall) */
  191. ICommit, /* pop choice and jump to 'offset' */
  192. IPartialCommit, /* update top choice to current position and jump */
  193. IBackCommit, /* "fails" but jump to its own 'offset' */
  194. IFailTwice, /* pop one choice and then fail */
  195. IFail, /* go back to saved state on choice and jump to saved offset */
  196. IGiveup, /* internal use */
  197. IFullCapture, /* complete capture of last 'off' chars */
  198. IOpenCapture, /* start a capture */
  199. ICloseCapture,
  200. ICloseRunTime
  201. } Opcode;
  202. typedef union Instruction {
  203. struct Inst {
  204. byte code;
  205. byte aux;
  206. short key;
  207. } i;
  208. int offset;
  209. byte buff[1];
  210. } Instruction;
  211. int getposition (lua_State *L, int t, int i);
  212. void printpatt (Instruction *p, int n);
  213. const char *match (lua_State *L, const char *o, const char *s, const char *e,
  214. Instruction *op, Capture *capture, int ptop);
  215. int verify (lua_State *L, Instruction *op, const Instruction *p,
  216. Instruction *e, int postable, int rule);
  217. void checkrule (lua_State *L, Instruction *op, int from, int to,
  218. int postable, int rule);
  219. #endif
  220. /*
  221. ** $Id: lpcode.h,v 1.5 2013/04/04 21:24:45 roberto Exp $
  222. */
  223. #if !defined(lpcode_h)
  224. #define lpcode_h
  225. #include "lua.h"
  226. int tocharset (TTree *tree, Charset *cs);
  227. int checkaux (TTree *tree, int pred);
  228. int fixedlenx (TTree *tree, int count, int len);
  229. int hascaptures (TTree *tree);
  230. int lp_gc (lua_State *L);
  231. Instruction *compile (lua_State *L, Pattern *p);
  232. void reallocprog (lua_State *L, Pattern *p, int nsize);
  233. int sizei (const Instruction *i);
  234. #define PEnullable 0
  235. #define PEnofail 1
  236. #define nofail(t) checkaux(t, PEnofail)
  237. #define nullable(t) checkaux(t, PEnullable)
  238. #define fixedlen(t) fixedlenx(t, 0, 0)
  239. #endif
  240. /*
  241. ** $Id: lpprint.h,v 1.1 2013/03/21 20:25:12 roberto Exp $
  242. */
  243. #if !defined(lpprint_h)
  244. #define lpprint_h
  245. #if defined(LPEG_DEBUG)
  246. void printpatt (Instruction *p, int n);
  247. void printtree (TTree *tree, int ident);
  248. void printktable (lua_State *L, int idx);
  249. void printcharset (const byte *st);
  250. void printcaplist (Capture *cap, Capture *limit);
  251. #else
  252. #define printktable(L,idx) \
  253. luaL_error(L, "function only implemented in debug mode")
  254. #define printtree(tree,i) \
  255. luaL_error(L, "function only implemented in debug mode")
  256. #define printpatt(p,n) \
  257. luaL_error(L, "function only implemented in debug mode")
  258. #endif
  259. #endif
  260. /*
  261. ** $Id: lpcap.c,v 1.4 2013/03/21 20:25:12 roberto Exp $
  262. ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
  263. */
  264. #include "lua.h"
  265. #include "lauxlib.h"
  266. #define captype(cap) ((cap)->kind)
  267. #define isclosecap(cap) (captype(cap) == Cclose)
  268. #define closeaddr(c) ((c)->s + (c)->siz - 1)
  269. #define isfullcap(cap) ((cap)->siz != 0)
  270. #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
  271. #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
  272. /*
  273. ** Put at the cache for Lua values the value indexed by 'v' in ktable
  274. ** of the running pattern (if it is not there yet); returns its index.
  275. */
  276. static int updatecache (CapState *cs, int v) {
  277. int idx = cs->ptop + 1; /* stack index of cache for Lua values */
  278. if (v != cs->valuecached) { /* not there? */
  279. getfromktable(cs, v); /* get value from 'ktable' */
  280. lua_replace(cs->L, idx); /* put it at reserved stack position */
  281. cs->valuecached = v; /* keep track of what is there */
  282. }
  283. return idx;
  284. }
  285. static int pushcapture (CapState *cs);
  286. /*
  287. ** Goes back in a list of captures looking for an open capture
  288. ** corresponding to a close
  289. */
  290. static Capture *findopen (Capture *cap) {
  291. int n = 0; /* number of closes waiting an open */
  292. for (;;) {
  293. cap--;
  294. if (isclosecap(cap)) n++; /* one more open to skip */
  295. else if (!isfullcap(cap))
  296. if (n-- == 0) return cap;
  297. }
  298. }
  299. /*
  300. ** Go to the next capture
  301. */
  302. static void nextcap (CapState *cs) {
  303. Capture *cap = cs->cap;
  304. if (!isfullcap(cap)) { /* not a single capture? */
  305. int n = 0; /* number of opens waiting a close */
  306. for (;;) { /* look for corresponding close */
  307. cap++;
  308. if (isclosecap(cap)) {
  309. if (n-- == 0) break;
  310. }
  311. else if (!isfullcap(cap)) n++;
  312. }
  313. }
  314. cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */
  315. }
  316. /*
  317. ** Push on the Lua stack all values generated by nested captures inside
  318. ** the current capture. Returns number of values pushed. 'addextra'
  319. ** makes it push the entire match after all captured values. The
  320. ** entire match is pushed also if there are no other nested values,
  321. ** so the function never returns zero.
  322. */
  323. static int pushnestedvalues (CapState *cs, int addextra) {
  324. Capture *co = cs->cap;
  325. if (isfullcap(cs->cap++)) { /* no nested captures? */
  326. lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */
  327. return 1; /* that is it */
  328. }
  329. else {
  330. int n = 0;
  331. while (!isclosecap(cs->cap)) /* repeat for all nested patterns */
  332. n += pushcapture(cs);
  333. if (addextra || n == 0) { /* need extra? */
  334. lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
  335. n++;
  336. }
  337. cs->cap++; /* skip close entry */
  338. return n;
  339. }
  340. }
  341. /*
  342. ** Push only the first value generated by nested captures
  343. */
  344. static void pushonenestedvalue (CapState *cs) {
  345. int n = pushnestedvalues(cs, 0);
  346. if (n > 1)
  347. lua_pop(cs->L, n - 1); /* pop extra values */
  348. }
  349. /*
  350. ** Try to find a named group capture with the name given at the top of
  351. ** the stack; goes backward from 'cap'.
  352. */
  353. static Capture *findback (CapState *cs, Capture *cap) {
  354. lua_State *L = cs->L;
  355. while (cap-- > cs->ocap) { /* repeat until end of list */
  356. if (isclosecap(cap))
  357. cap = findopen(cap); /* skip nested captures */
  358. else if (!isfullcap(cap))
  359. continue; /* opening an enclosing capture: skip and get previous */
  360. if (captype(cap) == Cgroup) {
  361. getfromktable(cs, cap->idx); /* get group name */
  362. if (lua_equal(L, -2, -1)) { /* right group? */
  363. lua_pop(L, 2); /* remove reference name and group name */
  364. return cap;
  365. }
  366. else lua_pop(L, 1); /* remove group name */
  367. }
  368. }
  369. luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
  370. return NULL; /* to avoid warnings */
  371. }
  372. /*
  373. ** Back-reference capture. Return number of values pushed.
  374. */
  375. static int backrefcap (CapState *cs) {
  376. int n;
  377. Capture *curr = cs->cap;
  378. pushluaval(cs); /* reference name */
  379. cs->cap = findback(cs, curr); /* find corresponding group */
  380. n = pushnestedvalues(cs, 0); /* push group's values */
  381. cs->cap = curr + 1;
  382. return n;
  383. }
  384. /*
  385. ** Table capture: creates a new table and populates it with nested
  386. ** captures.
  387. */
  388. static int tablecap (CapState *cs) {
  389. lua_State *L = cs->L;
  390. int n = 0;
  391. lua_newtable(L);
  392. if (isfullcap(cs->cap++))
  393. return 1; /* table is empty */
  394. while (!isclosecap(cs->cap)) {
  395. if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
  396. pushluaval(cs); /* push group name */
  397. pushonenestedvalue(cs);
  398. lua_settable(L, -3);
  399. }
  400. else { /* not a named group */
  401. int i;
  402. int k = pushcapture(cs);
  403. for (i = k; i > 0; i--) /* store all values into table */
  404. lua_rawseti(L, -(i + 1), n + i);
  405. n += k;
  406. }
  407. }
  408. cs->cap++; /* skip close entry */
  409. return 1; /* number of values pushed (only the table) */
  410. }
  411. /*
  412. ** Table-query capture
  413. */
  414. static int querycap (CapState *cs) {
  415. int idx = cs->cap->idx;
  416. pushonenestedvalue(cs); /* get nested capture */
  417. lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
  418. if (!lua_isnil(cs->L, -1))
  419. return 1;
  420. else { /* no value */
  421. lua_pop(cs->L, 1); /* remove nil */
  422. return 0;
  423. }
  424. }
  425. /*
  426. ** Fold capture
  427. */
  428. static int foldcap (CapState *cs) {
  429. int n;
  430. lua_State *L = cs->L;
  431. int idx = cs->cap->idx;
  432. if (isfullcap(cs->cap++) || /* no nested captures? */
  433. isclosecap(cs->cap) || /* no nested captures (large subject)? */
  434. (n = pushcapture(cs)) == 0) /* nested captures with no values? */
  435. return luaL_error(L, "no initial value for fold capture");
  436. if (n > 1)
  437. lua_pop(L, n - 1); /* leave only one result for accumulator */
  438. while (!isclosecap(cs->cap)) {
  439. lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
  440. lua_insert(L, -2); /* put it before accumulator */
  441. n = pushcapture(cs); /* get next capture's values */
  442. lua_call(L, n + 1, 1); /* call folding function */
  443. }
  444. cs->cap++; /* skip close entry */
  445. return 1; /* only accumulator left on the stack */
  446. }
  447. /*
  448. ** Function capture
  449. */
  450. static int functioncap (CapState *cs) {
  451. int n;
  452. int top = lua_gettop(cs->L);
  453. pushluaval(cs); /* push function */
  454. n = pushnestedvalues(cs, 0); /* push nested captures */
  455. lua_call(cs->L, n, LUA_MULTRET); /* call function */
  456. return lua_gettop(cs->L) - top; /* return function's results */
  457. }
  458. /*
  459. ** Select capture
  460. */
  461. static int numcap (CapState *cs) {
  462. int idx = cs->cap->idx; /* value to select */
  463. if (idx == 0) { /* no values? */
  464. nextcap(cs); /* skip entire capture */
  465. return 0; /* no value produced */
  466. }
  467. else {
  468. int n = pushnestedvalues(cs, 0);
  469. if (n < idx) /* invalid index? */
  470. return luaL_error(cs->L, "no capture '%d'", idx);
  471. else {
  472. lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
  473. lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
  474. lua_pop(cs->L, n - 1); /* remove other captures */
  475. return 1;
  476. }
  477. }
  478. }
  479. /*
  480. ** Return the stack index of the first runtime capture in the given
  481. ** list of captures (or zero if no runtime captures)
  482. */
  483. int finddyncap (Capture *cap, Capture *last) {
  484. for (; cap < last; cap++) {
  485. if (cap->kind == Cruntime)
  486. return cap->idx; /* stack position of first capture */
  487. }
  488. return 0; /* no dynamic captures in this segment */
  489. }
  490. /*
  491. ** Calls a runtime capture. Returns number of captures removed by
  492. ** the call, including the initial Cgroup. (Captures to be added are
  493. ** on the Lua stack.)
  494. */
  495. int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
  496. int n, id;
  497. lua_State *L = cs->L;
  498. int otop = lua_gettop(L);
  499. Capture *open = findopen(close);
  500. assert(captype(open) == Cgroup);
  501. id = finddyncap(open, close); /* get first dynamic capture argument */
  502. close->kind = Cclose; /* closes the group */
  503. close->s = s;
  504. cs->cap = open; cs->valuecached = 0; /* prepare capture state */
  505. luaL_checkstack(L, 4, "too many runtime captures");
  506. pushluaval(cs); /* push function to be called */
  507. lua_pushvalue(L, SUBJIDX); /* push original subject */
  508. lua_pushinteger(L, s - cs->s + 1); /* push current position */
  509. n = pushnestedvalues(cs, 0); /* push nested captures */
  510. lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
  511. if (id > 0) { /* are there old dynamic captures to be removed? */
  512. int i;
  513. for (i = id; i <= otop; i++)
  514. lua_remove(L, id); /* remove old dynamic captures */
  515. *rem = otop - id + 1; /* total number of dynamic captures removed */
  516. }
  517. else
  518. *rem = 0; /* no dynamic captures removed */
  519. return close - open; /* number of captures of all kinds removed */
  520. }
  521. /*
  522. ** Auxiliary structure for substitution and string captures: keep
  523. ** information about nested captures for future use, avoiding to push
  524. ** string results into Lua
  525. */
  526. typedef struct StrAux {
  527. int isstring; /* whether capture is a string */
  528. union {
  529. Capture *cp; /* if not a string, respective capture */
  530. struct { /* if it is a string... */
  531. const char *s; /* ... starts here */
  532. const char *e; /* ... ends here */
  533. } s;
  534. } u;
  535. } StrAux;
  536. #define MAXSTRCAPS 10
  537. /*
  538. ** Collect values from current capture into array 'cps'. Current
  539. ** capture must be Cstring (first call) or Csimple (recursive calls).
  540. ** (In first call, fills %0 with whole match for Cstring.)
  541. ** Returns number of elements in the array that were filled.
  542. */
  543. static int getstrcaps (CapState *cs, StrAux *cps, int n) {
  544. int k = n++;
  545. cps[k].isstring = 1; /* get string value */
  546. cps[k].u.s.s = cs->cap->s; /* starts here */
  547. if (!isfullcap(cs->cap++)) { /* nested captures? */
  548. while (!isclosecap(cs->cap)) { /* traverse them */
  549. if (n >= MAXSTRCAPS) /* too many captures? */
  550. nextcap(cs); /* skip extra captures (will not need them) */
  551. else if (captype(cs->cap) == Csimple) /* string? */
  552. n = getstrcaps(cs, cps, n); /* put info. into array */
  553. else {
  554. cps[n].isstring = 0; /* not a string */
  555. cps[n].u.cp = cs->cap; /* keep original capture */
  556. nextcap(cs);
  557. n++;
  558. }
  559. }
  560. cs->cap++; /* skip close */
  561. }
  562. cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */
  563. return n;
  564. }
  565. /*
  566. ** add next capture value (which should be a string) to buffer 'b'
  567. */
  568. static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
  569. /*
  570. ** String capture: add result to buffer 'b' (instead of pushing
  571. ** it into the stack)
  572. */
  573. static void stringcap (luaL_Buffer *b, CapState *cs) {
  574. StrAux cps[MAXSTRCAPS];
  575. int n;
  576. size_t len, i;
  577. const char *fmt; /* format string */
  578. fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
  579. n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
  580. for (i = 0; i < len; i++) { /* traverse them */
  581. if (fmt[i] != '%') /* not an escape? */
  582. luaL_addchar(b, fmt[i]); /* add it to buffer */
  583. else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
  584. luaL_addchar(b, fmt[i]); /* add to buffer */
  585. else {
  586. int l = fmt[i] - '0'; /* capture index */
  587. if (l > n)
  588. luaL_error(cs->L, "invalid capture index (%d)", l);
  589. else if (cps[l].isstring)
  590. luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
  591. else {
  592. Capture *curr = cs->cap;
  593. cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
  594. if (!addonestring(b, cs, "capture"))
  595. luaL_error(cs->L, "no values in capture index %d", l);
  596. cs->cap = curr; /* continue from where it stopped */
  597. }
  598. }
  599. }
  600. }
  601. /*
  602. ** Substitution capture: add result to buffer 'b'
  603. */
  604. static void substcap (luaL_Buffer *b, CapState *cs) {
  605. const char *curr = cs->cap->s;
  606. if (isfullcap(cs->cap)) /* no nested captures? */
  607. luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
  608. else {
  609. cs->cap++; /* skip open entry */
  610. while (!isclosecap(cs->cap)) { /* traverse nested captures */
  611. const char *next = cs->cap->s;
  612. luaL_addlstring(b, curr, next - curr); /* add text up to capture */
  613. if (addonestring(b, cs, "replacement"))
  614. curr = closeaddr(cs->cap - 1); /* continue after match */
  615. else /* no capture value */
  616. curr = next; /* keep original text in final result */
  617. }
  618. luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
  619. }
  620. cs->cap++; /* go to next capture */
  621. }
  622. /*
  623. ** Evaluates a capture and adds its first value to buffer 'b'; returns
  624. ** whether there was a value
  625. */
  626. static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
  627. switch (captype(cs->cap)) {
  628. case Cstring:
  629. stringcap(b, cs); /* add capture directly to buffer */
  630. return 1;
  631. case Csubst:
  632. substcap(b, cs); /* add capture directly to buffer */
  633. return 1;
  634. default: {
  635. lua_State *L = cs->L;
  636. int n = pushcapture(cs);
  637. if (n > 0) {
  638. if (n > 1) lua_pop(L, n - 1); /* only one result */
  639. if (!lua_isstring(L, -1))
  640. luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
  641. luaL_addvalue(b);
  642. }
  643. return n;
  644. }
  645. }
  646. }
  647. /*
  648. ** Push all values of the current capture into the stack; returns
  649. ** number of values pushed
  650. */
  651. static int pushcapture (CapState *cs) {
  652. lua_State *L = cs->L;
  653. luaL_checkstack(L, 4, "too many captures");
  654. switch (captype(cs->cap)) {
  655. case Cposition: {
  656. lua_pushinteger(L, cs->cap->s - cs->s + 1);
  657. cs->cap++;
  658. return 1;
  659. }
  660. case Cconst: {
  661. pushluaval(cs);
  662. cs->cap++;
  663. return 1;
  664. }
  665. case Carg: {
  666. int arg = (cs->cap++)->idx;
  667. if (arg + FIXEDARGS > cs->ptop)
  668. return luaL_error(L, "reference to absent argument #%d", arg);
  669. lua_pushvalue(L, arg + FIXEDARGS);
  670. return 1;
  671. }
  672. case Csimple: {
  673. int k = pushnestedvalues(cs, 1);
  674. lua_insert(L, -k); /* make whole match be first result */
  675. return k;
  676. }
  677. case Cruntime: {
  678. lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
  679. return 1;
  680. }
  681. case Cstring: {
  682. luaL_Buffer b;
  683. luaL_buffinit(L, &b);
  684. stringcap(&b, cs);
  685. luaL_pushresult(&b);
  686. return 1;
  687. }
  688. case Csubst: {
  689. luaL_Buffer b;
  690. luaL_buffinit(L, &b);
  691. substcap(&b, cs);
  692. luaL_pushresult(&b);
  693. return 1;
  694. }
  695. case Cgroup: {
  696. if (cs->cap->idx == 0) /* anonymous group? */
  697. return pushnestedvalues(cs, 0); /* add all nested values */
  698. else { /* named group: add no values */
  699. nextcap(cs); /* skip capture */
  700. return 0;
  701. }
  702. }
  703. case Cbackref: return backrefcap(cs);
  704. case Ctable: return tablecap(cs);
  705. case Cfunction: return functioncap(cs);
  706. case Cnum: return numcap(cs);
  707. case Cquery: return querycap(cs);
  708. case Cfold: return foldcap(cs);
  709. default: assert(0); return 0;
  710. }
  711. }
  712. /*
  713. ** Prepare a CapState structure and traverse the entire list of
  714. ** captures in the stack pushing its results. 's' is the subject
  715. ** string, 'r' is the final position of the match, and 'ptop'
  716. ** the index in the stack where some useful values were pushed.
  717. ** Returns the number of results pushed. (If the list produces no
  718. ** results, push the final position of the match.)
  719. */
  720. int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
  721. Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
  722. int n = 0;
  723. if (!isclosecap(capture)) { /* is there any capture? */
  724. CapState cs;
  725. cs.ocap = cs.cap = capture; cs.L = L;
  726. cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
  727. do { /* collect their values */
  728. n += pushcapture(&cs);
  729. } while (!isclosecap(cs.cap));
  730. }
  731. if (n == 0) { /* no capture values? */
  732. lua_pushinteger(L, r - s + 1); /* return only end position */
  733. n = 1;
  734. }
  735. return n;
  736. }
  737. /*
  738. ** $Id: lpcode.c,v 1.18 2013/04/12 16:30:33 roberto Exp $
  739. ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
  740. */
  741. #include <limits.h>
  742. #include "lua.h"
  743. #include "lauxlib.h"
  744. /* signals a "no-instruction */
  745. #define NOINST -1
  746. static const Charset fullset_ =
  747. {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
  748. 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
  749. 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
  750. 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}};
  751. static const Charset *fullset = &fullset_;
  752. /*
  753. ** {======================================================
  754. ** Analysis and some optimizations
  755. ** =======================================================
  756. */
  757. /*
  758. ** Check whether a charset is empty (IFail), singleton (IChar),
  759. ** full (IAny), or none of those (ISet).
  760. */
  761. static Opcode charsettype (const byte *cs, int *c) {
  762. int count = 0;
  763. int i;
  764. int candidate = -1; /* candidate position for a char */
  765. for (i = 0; i < CHARSETSIZE; i++) {
  766. int b = cs[i];
  767. if (b == 0) {
  768. if (count > 1) return ISet; /* else set is still empty */
  769. }
  770. else if (b == 0xFF) {
  771. if (count < (i * BITSPERCHAR))
  772. return ISet;
  773. else count += BITSPERCHAR; /* set is still full */
  774. }
  775. else if ((b & (b - 1)) == 0) { /* byte has only one bit? */
  776. if (count > 0)
  777. return ISet; /* set is neither full nor empty */
  778. else { /* set has only one char till now; track it */
  779. count++;
  780. candidate = i;
  781. }
  782. }
  783. else return ISet; /* byte is neither empty, full, nor singleton */
  784. }
  785. switch (count) {
  786. case 0: return IFail; /* empty set */
  787. case 1: { /* singleton; find character bit inside byte */
  788. int b = cs[candidate];
  789. *c = candidate * BITSPERCHAR;
  790. if ((b & 0xF0) != 0) { *c += 4; b >>= 4; }
  791. if ((b & 0x0C) != 0) { *c += 2; b >>= 2; }
  792. if ((b & 0x02) != 0) { *c += 1; }
  793. return IChar;
  794. }
  795. default: {
  796. assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */
  797. return IAny;
  798. }
  799. }
  800. }
  801. /*
  802. ** A few basic operations on Charsets
  803. */
  804. static void cs_complement (Charset *cs) {
  805. loopset(i, cs->cs[i] = ~cs->cs[i]);
  806. }
  807. static int cs_equal (const byte *cs1, const byte *cs2) {
  808. loopset(i, if (cs1[i] != cs2[i]) return 0);
  809. return 1;
  810. }
  811. /*
  812. ** computes whether sets cs1 and cs2 are disjoint
  813. */
  814. static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
  815. loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
  816. return 1;
  817. }
  818. /*
  819. ** Convert a 'char' pattern (TSet, TChar, TAny) to a charset
  820. */
  821. int tocharset (TTree *tree, Charset *cs) {
  822. switch (tree->tag) {
  823. case TSet: { /* copy set */
  824. loopset(i, cs->cs[i] = treebuffer(tree)[i]);
  825. return 1;
  826. }
  827. case TChar: { /* only one char */
  828. assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
  829. loopset(i, cs->cs[i] = 0); /* erase all chars */
  830. setchar(cs->cs, tree->u.n); /* add that one */
  831. return 1;
  832. }
  833. case TAny: {
  834. loopset(i, cs->cs[i] = 0xFF); /* add all to the set */
  835. return 1;
  836. }
  837. default: return 0;
  838. }
  839. }
  840. /*
  841. ** Checks whether a pattern has captures
  842. */
  843. int hascaptures (TTree *tree) {
  844. tailcall:
  845. switch (tree->tag) {
  846. case TCapture: case TRunTime:
  847. return 1;
  848. default: {
  849. switch (numsiblings[tree->tag]) {
  850. case 1: /* return hascaptures(sib1(tree)); */
  851. tree = sib1(tree); goto tailcall;
  852. case 2:
  853. if (hascaptures(sib1(tree))) return 1;
  854. /* else return hascaptures(sib2(tree)); */
  855. tree = sib2(tree); goto tailcall;
  856. default: assert(numsiblings[tree->tag] == 0); return 0;
  857. }
  858. }
  859. }
  860. }
  861. /*
  862. ** Checks how a pattern behaves regarding the empty string,
  863. ** in one of two different ways:
  864. ** A pattern is *nullable* if it can match without consuming any character;
  865. ** A pattern is *nofail* if it never fails for any string
  866. ** (including the empty string).
  867. ** The difference is only for predicates and run-time captures;
  868. ** for other patterns, the two properties are equivalent.
  869. ** (With predicates, &'a' is nullable but not nofail. Of course,
  870. ** nofail => nullable.)
  871. ** These functions are all convervative in the following way:
  872. ** p is nullable => nullable(p)
  873. ** nofail(p) => p cannot fail
  874. ** The function assumes that TOpenCall is not nullable;
  875. ** this will be checked again when the grammar is fixed.)
  876. ** Run-time captures can do whatever they want, so the result
  877. ** is conservative.
  878. */
  879. int checkaux (TTree *tree, int pred) {
  880. tailcall:
  881. switch (tree->tag) {
  882. case TChar: case TSet: case TAny:
  883. case TFalse: case TOpenCall:
  884. return 0; /* not nullable */
  885. case TRep: case TTrue:
  886. return 1; /* no fail */
  887. case TNot: case TBehind: /* can match empty, but can fail */
  888. if (pred == PEnofail) return 0;
  889. else return 1; /* PEnullable */
  890. case TAnd: /* can match empty; fail iff body does */
  891. if (pred == PEnullable) return 1;
  892. /* else return checkaux(sib1(tree), pred); */
  893. tree = sib1(tree); goto tailcall;
  894. case TRunTime: /* can fail; match empty iff body does */
  895. if (pred == PEnofail) return 0;
  896. /* else return checkaux(sib1(tree), pred); */
  897. tree = sib1(tree); goto tailcall;
  898. case TSeq:
  899. if (!checkaux(sib1(tree), pred)) return 0;
  900. /* else return checkaux(sib2(tree), pred); */
  901. tree = sib2(tree); goto tailcall;
  902. case TChoice:
  903. if (checkaux(sib2(tree), pred)) return 1;
  904. /* else return checkaux(sib1(tree), pred); */
  905. tree = sib1(tree); goto tailcall;
  906. case TCapture: case TGrammar: case TRule:
  907. /* return checkaux(sib1(tree), pred); */
  908. tree = sib1(tree); goto tailcall;
  909. case TCall: /* return checkaux(sib2(tree), pred); */
  910. tree = sib2(tree); goto tailcall;
  911. default: assert(0); return 0;
  912. };
  913. }
  914. /*
  915. ** number of characters to match a pattern (or -1 if variable)
  916. ** ('count' avoids infinite loops for grammars)
  917. */
  918. int fixedlenx (TTree *tree, int count, int len) {
  919. tailcall:
  920. switch (tree->tag) {
  921. case TChar: case TSet: case TAny:
  922. return len + 1;
  923. case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
  924. return len;
  925. case TRep: case TRunTime: case TOpenCall:
  926. return -1;
  927. case TCapture: case TRule: case TGrammar:
  928. /* return fixedlenx(sib1(tree), count); */
  929. tree = sib1(tree); goto tailcall;
  930. case TCall:
  931. if (count++ >= MAXRULES)
  932. return -1; /* may be a loop */
  933. /* else return fixedlenx(sib2(tree), count); */
  934. tree = sib2(tree); goto tailcall;
  935. case TSeq: {
  936. len = fixedlenx(sib1(tree), count, len);
  937. if (len < 0) return -1;
  938. /* else return fixedlenx(sib2(tree), count, len); */
  939. tree = sib2(tree); goto tailcall;
  940. }
  941. case TChoice: {
  942. int n1, n2;
  943. n1 = fixedlenx(sib1(tree), count, len);
  944. if (n1 < 0) return -1;
  945. n2 = fixedlenx(sib2(tree), count, len);
  946. if (n1 == n2) return n1;
  947. else return -1;
  948. }
  949. default: assert(0); return 0;
  950. };
  951. }
  952. /*
  953. ** Computes the 'first set' of a pattern.
  954. ** The result is a conservative aproximation:
  955. ** match p ax -> x' for some x ==> a in first(p).
  956. ** The set 'follow' is the first set of what follows the
  957. ** pattern (full set if nothing follows it).
  958. ** The function returns 0 when this set can be used for
  959. ** tests that avoid the pattern altogether.
  960. ** A non-zero return can happen for two reasons:
  961. ** 1) match p '' -> '' ==> returns 1.
  962. ** (tests cannot be used because they always fail for an empty input)
  963. ** 2) there is a match-time capture ==> returns 2.
  964. ** (match-time captures should not be avoided by optimizations)
  965. */
  966. static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
  967. tailcall:
  968. switch (tree->tag) {
  969. case TChar: case TSet: case TAny: {
  970. tocharset(tree, firstset);
  971. return 0;
  972. }
  973. case TTrue: {
  974. loopset(i, firstset->cs[i] = follow->cs[i]);
  975. return 1;
  976. }
  977. case TFalse: {
  978. loopset(i, firstset->cs[i] = 0);
  979. return 0;
  980. }
  981. case TChoice: {
  982. Charset csaux;
  983. int e1 = getfirst(sib1(tree), follow, firstset);
  984. int e2 = getfirst(sib2(tree), follow, &csaux);
  985. loopset(i, firstset->cs[i] |= csaux.cs[i]);
  986. return e1 | e2;
  987. }
  988. case TSeq: {
  989. if (!nullable(sib1(tree))) {
  990. /* return getfirst(sib1(tree), fullset, firstset); */
  991. tree = sib1(tree); follow = fullset; goto tailcall;
  992. }
  993. else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
  994. Charset csaux;
  995. int e2 = getfirst(sib2(tree), follow, &csaux);
  996. int e1 = getfirst(sib1(tree), &csaux, firstset);
  997. if (e1 == 0) return 0; /* 'e1' ensures that first can be used */
  998. else if ((e1 | e2) & 2) /* one of the children has a matchtime? */
  999. return 2; /* pattern has a matchtime capture */
  1000. else return e2; /* else depends on 'e2' */
  1001. }
  1002. }
  1003. case TRep: {
  1004. getfirst(sib1(tree), follow, firstset);
  1005. loopset(i, firstset->cs[i] |= follow->cs[i]);
  1006. return 1; /* accept the empty string */
  1007. }
  1008. case TCapture: case TGrammar: case TRule: {
  1009. /* return getfirst(sib1(tree), follow, firstset); */
  1010. tree = sib1(tree); goto tailcall;
  1011. }
  1012. case TRunTime: { /* function invalidates any follow info. */
  1013. int e = getfirst(sib1(tree), fullset, firstset);
  1014. if (e) return 2; /* function is not "protected"? */
  1015. else return 0; /* pattern inside capture ensures first can be used */
  1016. }
  1017. case TCall: {
  1018. /* return getfirst(sib2(tree), follow, firstset); */
  1019. tree = sib2(tree); goto tailcall;
  1020. }
  1021. case TAnd: {
  1022. int e = getfirst(sib1(tree), follow, firstset);
  1023. loopset(i, firstset->cs[i] &= follow->cs[i]);
  1024. return e;
  1025. }
  1026. case TNot: {
  1027. if (tocharset(sib1(tree), firstset)) {
  1028. cs_complement(firstset);
  1029. return 1;
  1030. }
  1031. /* else go through */
  1032. }
  1033. case TBehind: { /* instruction gives no new information */
  1034. /* call 'getfirst' to check for math-time captures */
  1035. int e = getfirst(sib1(tree), follow, firstset);
  1036. loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */
  1037. return e | 1; /* always can accept the empty string */
  1038. }
  1039. default: assert(0); return 0;
  1040. }
  1041. }
  1042. /*
  1043. ** If it returns true, then pattern can fail only depending on the next
  1044. ** character of the subject
  1045. */
  1046. static int headfail (TTree *tree) {
  1047. tailcall:
  1048. switch (tree->tag) {
  1049. case TChar: case TSet: case TAny: case TFalse:
  1050. return 1;
  1051. case TTrue: case TRep: case TRunTime: case TNot:
  1052. case TBehind:
  1053. return 0;
  1054. case TCapture: case TGrammar: case TRule: case TAnd:
  1055. tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */
  1056. case TCall:
  1057. tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */
  1058. case TSeq:
  1059. if (!nofail(sib2(tree))) return 0;
  1060. /* else return headfail(sib1(tree)); */
  1061. tree = sib1(tree); goto tailcall;
  1062. case TChoice:
  1063. if (!headfail(sib1(tree))) return 0;
  1064. /* else return headfail(sib2(tree)); */
  1065. tree = sib2(tree); goto tailcall;
  1066. default: assert(0); return 0;
  1067. }
  1068. }
  1069. /*
  1070. ** Check whether the code generation for the given tree can benefit
  1071. ** from a follow set (to avoid computing the follow set when it is
  1072. ** not needed)
  1073. */
  1074. static int needfollow (TTree *tree) {
  1075. tailcall:
  1076. switch (tree->tag) {
  1077. case TChar: case TSet: case TAny:
  1078. case TFalse: case TTrue: case TAnd: case TNot:
  1079. case TRunTime: case TGrammar: case TCall: case TBehind:
  1080. return 0;
  1081. case TChoice: case TRep:
  1082. return 1;
  1083. case TCapture:
  1084. tree = sib1(tree); goto tailcall;
  1085. case TSeq:
  1086. tree = sib2(tree); goto tailcall;
  1087. default: assert(0); return 0;
  1088. }
  1089. }
  1090. /* }====================================================== */
  1091. /*
  1092. ** {======================================================
  1093. ** Code generation
  1094. ** =======================================================
  1095. */
  1096. /*
  1097. ** size of an instruction
  1098. */
  1099. int sizei (const Instruction *i) {
  1100. switch((Opcode)i->i.code) {
  1101. case ISet: case ISpan: return CHARSETINSTSIZE;
  1102. case ITestSet: return CHARSETINSTSIZE + 1;
  1103. case ITestChar: case ITestAny: case IChoice: case IJmp:
  1104. case ICall: case IOpenCall: case ICommit: case IPartialCommit:
  1105. case IBackCommit: return 2;
  1106. default: return 1;
  1107. }
  1108. }
  1109. /*
  1110. ** state for the compiler
  1111. */
  1112. typedef struct CompileState {
  1113. Pattern *p; /* pattern being compiled */
  1114. int ncode; /* next position in p->code to be filled */
  1115. lua_State *L;
  1116. } CompileState;
  1117. /*
  1118. ** code generation is recursive; 'opt' indicates that the code is
  1119. ** being generated under a 'IChoice' operator jumping to its end.
  1120. ** 'tt' points to a previous test protecting this code. 'fl' is
  1121. ** the follow set of the pattern.
  1122. */
  1123. static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
  1124. const Charset *fl);
  1125. void reallocprog (lua_State *L, Pattern *p, int nsize) {
  1126. void *ud;
  1127. lua_Alloc f = lua_getallocf(L, &ud);
  1128. void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
  1129. nsize * sizeof(Instruction));
  1130. if (newblock == NULL && nsize > 0)
  1131. luaL_error(L, "not enough memory");
  1132. p->code = (Instruction *)newblock;
  1133. p->codesize = nsize;
  1134. }
  1135. static int nextinstruction (CompileState *compst) {
  1136. int size = compst->p->codesize;
  1137. if (compst->ncode >= size)
  1138. reallocprog(compst->L, compst->p, size * 2);
  1139. return compst->ncode++;
  1140. }
  1141. #define getinstr(cs,i) ((cs)->p->code[i])
  1142. static int addinstruction (CompileState *compst, Opcode op, int aux) {
  1143. int i = nextinstruction(compst);
  1144. getinstr(compst, i).i.code = op;
  1145. getinstr(compst, i).i.aux = aux;
  1146. return i;
  1147. }
  1148. static int addoffsetinst (CompileState *compst, Opcode op) {
  1149. int i = addinstruction(compst, op, 0); /* instruction */
  1150. addinstruction(compst, (Opcode)0, 0); /* open space for offset */
  1151. assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2);
  1152. return i;
  1153. }
  1154. static void setoffset (CompileState *compst, int instruction, int offset) {
  1155. getinstr(compst, instruction + 1).offset = offset;
  1156. }
  1157. /*
  1158. ** Add a capture instruction:
  1159. ** 'op' is the capture instruction; 'cap' the capture kind;
  1160. ** 'key' the key into ktable; 'aux' is optional offset
  1161. **
  1162. */
  1163. static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
  1164. int aux) {
  1165. int i = addinstruction(compst, op, joinkindoff(cap, aux));
  1166. getinstr(compst, i).i.key = key;
  1167. return i;
  1168. }
  1169. #define gethere(compst) ((compst)->ncode)
  1170. #define target(code,i) ((i) + code[i + 1].offset)
  1171. static void jumptothere (CompileState *compst, int instruction, int target) {
  1172. if (instruction >= 0)
  1173. setoffset(compst, instruction, target - instruction);
  1174. }
  1175. static void jumptohere (CompileState *compst, int instruction) {
  1176. jumptothere(compst, instruction, gethere(compst));
  1177. }
  1178. /*
  1179. ** Code an IChar instruction, or IAny if there is an equivalent
  1180. ** test dominating it
  1181. */
  1182. static void codechar (CompileState *compst, int c, int tt) {
  1183. if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar &&
  1184. getinstr(compst, tt).i.aux == c)
  1185. addinstruction(compst, IAny, 0);
  1186. else
  1187. addinstruction(compst, IChar, c);
  1188. }
  1189. /*
  1190. ** Add a charset posfix to an instruction
  1191. */
  1192. static void addcharset (CompileState *compst, const byte *cs) {
  1193. int p = gethere(compst);
  1194. int i;
  1195. for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
  1196. nextinstruction(compst); /* space for buffer */
  1197. /* fill buffer with charset */
  1198. loopset(j, getinstr(compst, p).buff[j] = cs[j]);
  1199. }
  1200. /*
  1201. ** code a char set, optimizing unit sets for IChar, "complete"
  1202. ** sets for IAny, and empty sets for IFail; also use an IAny
  1203. ** when instruction is dominated by an equivalent test.
  1204. */
  1205. static void codecharset (CompileState *compst, const byte *cs, int tt) {
  1206. int c = 0; /* (=) to avoid warnings */
  1207. Opcode op = charsettype(cs, &c);
  1208. switch (op) {
  1209. case IChar: codechar(compst, c, tt); break;
  1210. case ISet: { /* non-trivial set? */
  1211. if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet &&
  1212. cs_equal(cs, getinstr(compst, tt + 2).buff))
  1213. addinstruction(compst, IAny, 0);
  1214. else {
  1215. addinstruction(compst, ISet, 0);
  1216. addcharset(compst, cs);
  1217. }
  1218. break;
  1219. }
  1220. default: addinstruction(compst, op, c); break;
  1221. }
  1222. }
  1223. /*
  1224. ** code a test set, optimizing unit sets for ITestChar, "complete"
  1225. ** sets for ITestAny, and empty sets for IJmp (always fails).
  1226. ** 'e' is true iff test should accept the empty string. (Test
  1227. ** instructions in the current VM never accept the empty string.)
  1228. */
  1229. static int codetestset (CompileState *compst, Charset *cs, int e) {
  1230. if (e) return NOINST; /* no test */
  1231. else {
  1232. int c = 0;
  1233. Opcode op = charsettype(cs->cs, &c);
  1234. switch (op) {
  1235. case IFail: return addoffsetinst(compst, IJmp); /* always jump */
  1236. case IAny: return addoffsetinst(compst, ITestAny);
  1237. case IChar: {
  1238. int i = addoffsetinst(compst, ITestChar);
  1239. getinstr(compst, i).i.aux = c;
  1240. return i;
  1241. }
  1242. case ISet: {
  1243. int i = addoffsetinst(compst, ITestSet);
  1244. addcharset(compst, cs->cs);
  1245. return i;
  1246. }
  1247. default: assert(0); return 0;
  1248. }
  1249. }
  1250. }
  1251. /*
  1252. ** Find the final destination of a sequence of jumps
  1253. */
  1254. static int finaltarget (Instruction *code, int i) {
  1255. while (code[i].i.code == IJmp)
  1256. i = target(code, i);
  1257. return i;
  1258. }
  1259. /*
  1260. ** final label (after traversing any jumps)
  1261. */
  1262. static int finallabel (Instruction *code, int i) {
  1263. return finaltarget(code, target(code, i));
  1264. }
  1265. /*
  1266. ** <behind(p)> == behind n; <p> (where n = fixedlen(p))
  1267. */
  1268. static void codebehind (CompileState *compst, TTree *tree) {
  1269. if (tree->u.n > 0)
  1270. addinstruction(compst, IBehind, tree->u.n);
  1271. codegen(compst, sib1(tree), 0, NOINST, fullset);
  1272. }
  1273. /*
  1274. ** Choice; optimizations:
  1275. ** - when p1 is headfail
  1276. ** - when first(p1) and first(p2) are disjoint; than
  1277. ** a character not in first(p1) cannot go to p1, and a character
  1278. ** in first(p1) cannot go to p2 (at it is not in first(p2)).
  1279. ** (The optimization is not valid if p1 accepts the empty string,
  1280. ** as then there is no character at all...)
  1281. ** - when p2 is empty and opt is true; a IPartialCommit can resuse
  1282. ** the Choice already active in the stack.
  1283. */
  1284. static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
  1285. const Charset *fl) {
  1286. int emptyp2 = (p2->tag == TTrue);
  1287. Charset cs1, cs2;
  1288. int e1 = getfirst(p1, fullset, &cs1);
  1289. if (headfail(p1) ||
  1290. (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
  1291. /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
  1292. int test = codetestset(compst, &cs1, 0);
  1293. int jmp = NOINST;
  1294. codegen(compst, p1, 0, test, fl);
  1295. if (!emptyp2)
  1296. jmp = addoffsetinst(compst, IJmp);
  1297. jumptohere(compst, test);
  1298. codegen(compst, p2, opt, NOINST, fl);
  1299. jumptohere(compst, jmp);
  1300. }
  1301. else if (opt && emptyp2) {
  1302. /* p1? == IPartialCommit; p1 */
  1303. jumptohere(compst, addoffsetinst(compst, IPartialCommit));
  1304. codegen(compst, p1, 1, NOINST, fullset);
  1305. }
  1306. else {
  1307. /* <p1 / p2> ==
  1308. test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
  1309. int pcommit;
  1310. int test = codetestset(compst, &cs1, e1);
  1311. int pchoice = addoffsetinst(compst, IChoice);
  1312. codegen(compst, p1, emptyp2, test, fullset);
  1313. pcommit = addoffsetinst(compst, ICommit);
  1314. jumptohere(compst, pchoice);
  1315. jumptohere(compst, test);
  1316. codegen(compst, p2, opt, NOINST, fl);
  1317. jumptohere(compst, pcommit);
  1318. }
  1319. }
  1320. /*
  1321. ** And predicate
  1322. ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
  1323. ** (valid only when 'p' has no captures)
  1324. */
  1325. static void codeand (CompileState *compst, TTree *tree, int tt) {
  1326. int n = fixedlen(tree);
  1327. if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
  1328. codegen(compst, tree, 0, tt, fullset);
  1329. if (n > 0)
  1330. addinstruction(compst, IBehind, n);
  1331. }
  1332. else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
  1333. int pcommit;
  1334. int pchoice = addoffsetinst(compst, IChoice);
  1335. codegen(compst, tree, 0, tt, fullset);
  1336. pcommit = addoffsetinst(compst, IBackCommit);
  1337. jumptohere(compst, pchoice);
  1338. addinstruction(compst, IFail, 0);
  1339. jumptohere(compst, pcommit);
  1340. }
  1341. }
  1342. /*
  1343. ** Captures: if pattern has fixed (and not too big) length, use
  1344. ** a single IFullCapture instruction after the match; otherwise,
  1345. ** enclose the pattern with OpenCapture - CloseCapture.
  1346. */
  1347. static void codecapture (CompileState *compst, TTree *tree, int tt,
  1348. const Charset *fl) {
  1349. int len = fixedlen(sib1(tree));
  1350. if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) {
  1351. codegen(compst, sib1(tree), 0, tt, fl);
  1352. addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
  1353. }
  1354. else {
  1355. addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0);
  1356. codegen(compst, sib1(tree), 0, tt, fl);
  1357. addinstcap(compst, ICloseCapture, Cclose, 0, 0);
  1358. }
  1359. }
  1360. static void coderuntime (CompileState *compst, TTree *tree, int tt) {
  1361. addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0);
  1362. codegen(compst, sib1(tree), 0, tt, fullset);
  1363. addinstcap(compst, ICloseRunTime, Cclose, 0, 0);
  1364. }
  1365. /*
  1366. ** Repetion; optimizations:
  1367. ** When pattern is a charset, can use special instruction ISpan.
  1368. ** When pattern is head fail, or if it starts with characters that
  1369. ** are disjoint from what follows the repetions, a simple test
  1370. ** is enough (a fail inside the repetition would backtrack to fail
  1371. ** again in the following pattern, so there is no need for a choice).
  1372. ** When 'opt' is true, the repetion can reuse the Choice already
  1373. ** active in the stack.
  1374. */
  1375. static void coderep (CompileState *compst, TTree *tree, int opt,
  1376. const Charset *fl) {
  1377. Charset st;
  1378. if (tocharset(tree, &st)) {
  1379. addinstruction(compst, ISpan, 0);
  1380. addcharset(compst, st.cs);
  1381. }
  1382. else {
  1383. int e1 = getfirst(tree, fullset, &st);
  1384. if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
  1385. /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
  1386. int jmp;
  1387. int test = codetestset(compst, &st, 0);
  1388. codegen(compst, tree, opt, test, fullset);
  1389. jmp = addoffsetinst(compst, IJmp);
  1390. jumptohere(compst, test);
  1391. jumptothere(compst, jmp, test);
  1392. }
  1393. else {
  1394. /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */
  1395. /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */
  1396. int commit, l2;
  1397. int test = codetestset(compst, &st, e1);
  1398. int pchoice = NOINST;
  1399. if (opt)
  1400. jumptohere(compst, addoffsetinst(compst, IPartialCommit));
  1401. else
  1402. pchoice = addoffsetinst(compst, IChoice);
  1403. l2 = gethere(compst);
  1404. codegen(compst, tree, 0, NOINST, fullset);
  1405. commit = addoffsetinst(compst, IPartialCommit);
  1406. jumptothere(compst, commit, l2);
  1407. jumptohere(compst, pchoice);
  1408. jumptohere(compst, test);
  1409. }
  1410. }
  1411. }
  1412. /*
  1413. ** Not predicate; optimizations:
  1414. ** In any case, if first test fails, 'not' succeeds, so it can jump to
  1415. ** the end. If pattern is headfail, that is all (it cannot fail
  1416. ** in other parts); this case includes 'not' of simple sets. Otherwise,
  1417. ** use the default code (a choice plus a failtwice).
  1418. */
  1419. static void codenot (CompileState *compst, TTree *tree) {
  1420. Charset st;
  1421. int e = getfirst(tree, fullset, &st);
  1422. int test = codetestset(compst, &st, e);
  1423. if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */
  1424. addinstruction(compst, IFail, 0);
  1425. else {
  1426. /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */
  1427. int pchoice = addoffsetinst(compst, IChoice);
  1428. codegen(compst, tree, 0, NOINST, fullset);
  1429. addinstruction(compst, IFailTwice, 0);
  1430. jumptohere(compst, pchoice);
  1431. }
  1432. jumptohere(compst, test);
  1433. }
  1434. /*
  1435. ** change open calls to calls, using list 'positions' to find
  1436. ** correct offsets; also optimize tail calls
  1437. */
  1438. static void correctcalls (CompileState *compst, int *positions,
  1439. int from, int to) {
  1440. int i;
  1441. Instruction *code = compst->p->code;
  1442. for (i = from; i < to; i += sizei(&code[i])) {
  1443. if (code[i].i.code == IOpenCall) {
  1444. int n = code[i].i.key; /* rule number */
  1445. int rule = positions[n]; /* rule position */
  1446. assert(rule == from || code[rule - 1].i.code == IRet);
  1447. if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */
  1448. code[i].i.code = IJmp; /* tail call */
  1449. else
  1450. code[i].i.code = ICall;
  1451. jumptothere(compst, i, rule); /* call jumps to respective rule */
  1452. }
  1453. }
  1454. assert(i == to);
  1455. }
  1456. /*
  1457. ** Code for a grammar:
  1458. ** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
  1459. */
  1460. static void codegrammar (CompileState *compst, TTree *grammar) {
  1461. int positions[MAXRULES];
  1462. int rulenumber = 0;
  1463. TTree *rule;
  1464. int firstcall = addoffsetinst(compst, ICall); /* call initial rule */
  1465. int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */
  1466. int start = gethere(compst); /* here starts the initial rule */
  1467. jumptohere(compst, firstcall);
  1468. for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
  1469. positions[rulenumber++] = gethere(compst); /* save rule position */
  1470. codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */
  1471. addinstruction(compst, IRet, 0);
  1472. }
  1473. assert(rule->tag == TTrue);
  1474. jumptohere(compst, jumptoend);
  1475. correctcalls(compst, positions, start, gethere(compst));
  1476. }
  1477. static void codecall (CompileState *compst, TTree *call) {
  1478. int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
  1479. getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */
  1480. assert(sib2(call)->tag == TRule);
  1481. }
  1482. /*
  1483. ** Code first child of a sequence
  1484. ** (second child is called in-place to allow tail call)
  1485. ** Return 'tt' for second child
  1486. */
  1487. static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
  1488. int tt, const Charset *fl) {
  1489. if (needfollow(p1)) {
  1490. Charset fl1;
  1491. getfirst(p2, fl, &fl1); /* p1 follow is p2 first */
  1492. codegen(compst, p1, 0, tt, &fl1);
  1493. }
  1494. else /* use 'fullset' as follow */
  1495. codegen(compst, p1, 0, tt, fullset);
  1496. if (fixedlen(p1) != 0) /* can 'p1' consume anything? */
  1497. return NOINST; /* invalidate test */
  1498. else return tt; /* else 'tt' still protects sib2 */
  1499. }
  1500. /*
  1501. ** Main code-generation function: dispatch to auxiliar functions
  1502. ** according to kind of tree
  1503. */
  1504. static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
  1505. const Charset *fl) {
  1506. tailcall:
  1507. switch (tree->tag) {
  1508. case TChar: codechar(compst, tree->u.n, tt); break;
  1509. case TAny: addinstruction(compst, IAny, 0); break;
  1510. case TSet: codecharset(compst, treebuffer(tree), tt); break;
  1511. case TTrue: break;
  1512. case TFalse: addinstruction(compst, IFail, 0); break;
  1513. case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
  1514. case TRep: coderep(compst, sib1(tree), opt, fl); break;
  1515. case TBehind: codebehind(compst, tree); break;
  1516. case TNot: codenot(compst, sib1(tree)); break;
  1517. case TAnd: codeand(compst, sib1(tree), tt); break;
  1518. case TCapture: codecapture(compst, tree, tt, fl); break;
  1519. case TRunTime: coderuntime(compst, tree, tt); break;
  1520. case TGrammar: codegrammar(compst, tree); break;
  1521. case TCall: codecall(compst, tree); break;
  1522. case TSeq: {
  1523. tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */
  1524. /* codegen(compst, p2, opt, tt, fl); */
  1525. tree = sib2(tree); goto tailcall;
  1526. }
  1527. default: assert(0);
  1528. }
  1529. }
  1530. /*
  1531. ** Optimize jumps and other jump-like instructions.
  1532. ** * Update labels of instructions with labels to their final
  1533. ** destinations (e.g., choice L1; ... L1: jmp L2: becomes
  1534. ** choice L2)
  1535. ** * Jumps to other instructions that do jumps become those
  1536. ** instructions (e.g., jump to return becomes a return; jump
  1537. ** to commit becomes a commit)
  1538. */
  1539. static void peephole (CompileState *compst) {
  1540. Instruction *code = compst->p->code;
  1541. int i;
  1542. for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
  1543. switch (code[i].i.code) {
  1544. case IChoice: case ICall: case ICommit: case IPartialCommit:
  1545. case IBackCommit: case ITestChar: case ITestSet:
  1546. case ITestAny: { /* instructions with labels */
  1547. jumptothere(compst, i, finallabel(code, i)); /* optimize label */
  1548. break;
  1549. }
  1550. case IJmp: {
  1551. int ft = finaltarget(code, i);
  1552. switch (code[ft].i.code) { /* jumping to what? */
  1553. case IRet: case IFail: case IFailTwice:
  1554. case IEnd: { /* instructions with unconditional implicit jumps */
  1555. code[i] = code[ft]; /* jump becomes that instruction */
  1556. code[i + 1].i.code = IAny; /* 'no-op' for target position */
  1557. break;
  1558. }
  1559. case ICommit: case IPartialCommit:
  1560. case IBackCommit: { /* inst. with unconditional explicit jumps */
  1561. int fft = finallabel(code, ft);
  1562. code[i] = code[ft]; /* jump becomes that instruction... */
  1563. jumptothere(compst, i, fft); /* but must correct its offset */
  1564. i--; /* reoptimize its label */
  1565. break;
  1566. }
  1567. default: {
  1568. jumptothere(compst, i, ft); /* optimize label */
  1569. break;
  1570. }
  1571. }
  1572. break;
  1573. }
  1574. default: break;
  1575. }
  1576. }
  1577. assert(code[i - 1].i.code == IEnd);
  1578. }
  1579. /*
  1580. ** Compile a pattern
  1581. */
  1582. Instruction *compile (lua_State *L, Pattern *p) {
  1583. CompileState compst;
  1584. compst.p = p; compst.ncode = 0; compst.L = L;
  1585. reallocprog(L, p, 2); /* minimum initial size */
  1586. codegen(&compst, p->tree, 0, NOINST, fullset);
  1587. addinstruction(&compst, IEnd, 0);
  1588. reallocprog(L, p, compst.ncode); /* set final size */
  1589. peephole(&compst);
  1590. return p->code;
  1591. }
  1592. /* }====================================================== */
  1593. /*
  1594. ** $Id: lpprint.c,v 1.7 2013/04/12 16:29:49 roberto Exp $
  1595. ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
  1596. */
  1597. #include <ctype.h>
  1598. #include <limits.h>
  1599. #include <stdio.h>
  1600. #if defined(LPEG_DEBUG)
  1601. /*
  1602. ** {======================================================
  1603. ** Printing patterns (for debugging)
  1604. ** =======================================================
  1605. */
  1606. void printcharset (const byte *st) {
  1607. int i;
  1608. printf("[");
  1609. for (i = 0; i <= UCHAR_MAX; i++) {
  1610. int first = i;
  1611. while (testchar(st, i) && i <= UCHAR_MAX) i++;
  1612. if (i - 1 == first) /* unary range? */
  1613. printf("(%02x)", first);
  1614. else if (i - 1 > first) /* non-empty range? */
  1615. printf("(%02x-%02x)", first, i - 1);
  1616. }
  1617. printf("]");
  1618. }
  1619. static void printcapkind (int kind) {
  1620. const char *const modes[] = {
  1621. "close", "position", "constant", "backref",
  1622. "argument", "simple", "table", "function",
  1623. "query", "string", "num", "substitution", "fold",
  1624. "runtime", "group"};
  1625. printf("%s", modes[kind]);
  1626. }
  1627. static void printjmp (const Instruction *op, const Instruction *p) {
  1628. printf("-> %d", (int)(p + (p + 1)->offset - op));
  1629. }
  1630. static void printinst (const Instruction *op, const Instruction *p) {
  1631. const char *const names[] = {
  1632. "any", "char", "set",
  1633. "testany", "testchar", "testset",
  1634. "span", "behind",
  1635. "ret", "end",
  1636. "choice", "jmp", "call", "open_call",
  1637. "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
  1638. "fullcapture", "opencapture", "closecapture", "closeruntime"
  1639. };
  1640. printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
  1641. switch ((Opcode)p->i.code) {
  1642. case IChar: {
  1643. printf("'%c'", p->i.aux);
  1644. break;
  1645. }
  1646. case ITestChar: {
  1647. printf("'%c'", p->i.aux); printjmp(op, p);
  1648. break;
  1649. }
  1650. case IFullCapture: {
  1651. printcapkind(getkind(p));
  1652. printf(" (size = %d) (idx = %d)", getoff(p), p->i.key);
  1653. break;
  1654. }
  1655. case IOpenCapture: {
  1656. printcapkind(getkind(p));
  1657. printf(" (idx = %d)", p->i.key);
  1658. break;
  1659. }
  1660. case ISet: {
  1661. printcharset((p+1)->buff);
  1662. break;
  1663. }
  1664. case ITestSet: {
  1665. printcharset((p+2)->buff); printjmp(op, p);
  1666. break;
  1667. }
  1668. case ISpan: {
  1669. printcharset((p+1)->buff);
  1670. break;
  1671. }
  1672. case IOpenCall: {
  1673. printf("-> %d", (p + 1)->offset);
  1674. break;
  1675. }
  1676. case IBehind: {
  1677. printf("%d", p->i.aux);
  1678. break;
  1679. }
  1680. case IJmp: case ICall: case ICommit: case IChoice:
  1681. case IPartialCommit: case IBackCommit: case ITestAny: {
  1682. printjmp(op, p);
  1683. break;
  1684. }
  1685. default: break;
  1686. }
  1687. printf("\n");
  1688. }
  1689. void printpatt (Instruction *p, int n) {
  1690. Instruction *op = p;
  1691. while (p < op + n) {
  1692. printinst(op, p);
  1693. p += sizei(p);
  1694. }
  1695. }
  1696. #if defined(LPEG_DEBUG)
  1697. static void printcap (Capture *cap) {
  1698. printcapkind(cap->kind);
  1699. printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s);
  1700. }
  1701. void printcaplist (Capture *cap, Capture *limit) {
  1702. printf(">======\n");
  1703. for (; cap->s && (limit == NULL || cap < limit); cap++)
  1704. printcap(cap);
  1705. printf("=======\n");
  1706. }
  1707. #endif
  1708. /* }====================================================== */
  1709. /*
  1710. ** {======================================================
  1711. ** Printing trees (for debugging)
  1712. ** =======================================================
  1713. */
  1714. static const char *tagnames[] = {
  1715. "char", "set", "any",
  1716. "true", "false",
  1717. "rep",
  1718. "seq", "choice",
  1719. "not", "and",
  1720. "call", "opencall", "rule", "grammar",
  1721. "behind",
  1722. "capture", "run-time"
  1723. };
  1724. void printtree (TTree *tree, int ident) {
  1725. int i;
  1726. for (i = 0; i < ident; i++) printf(" ");
  1727. printf("%s", tagnames[tree->tag]);
  1728. switch (tree->tag) {
  1729. case TChar: {
  1730. int c = tree->u.n;
  1731. if (isprint(c))
  1732. printf(" '%c'\n", c);
  1733. else
  1734. printf(" (%02X)\n", c);
  1735. break;
  1736. }
  1737. case TSet: {
  1738. printcharset(treebuffer(tree));
  1739. printf("\n");
  1740. break;
  1741. }
  1742. case TOpenCall: case TCall: {
  1743. printf(" key: %d\n", tree->key);
  1744. break;
  1745. }
  1746. case TBehind: {
  1747. printf(" %d\n", tree->u.n);
  1748. printtree(sib1(tree), ident + 2);
  1749. break;
  1750. }
  1751. case TCapture: {
  1752. printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n);
  1753. printtree(sib1(tree), ident + 2);
  1754. break;
  1755. }
  1756. case TRule: {
  1757. printf(" n: %d key: %d\n", tree->cap, tree->key);
  1758. printtree(sib1(tree), ident + 2);
  1759. break; /* do not print next rule as a sibling */
  1760. }
  1761. case TGrammar: {
  1762. TTree *rule = sib1(tree);
  1763. printf(" %d\n", tree->u.n); /* number of rules */
  1764. for (i = 0; i < tree->u.n; i++) {
  1765. printtree(rule, ident + 2);
  1766. rule = sib2(rule);
  1767. }
  1768. assert(rule->tag == TTrue); /* sentinel */
  1769. break;
  1770. }
  1771. default: {
  1772. int sibs = numsiblings[tree->tag];
  1773. printf("\n");
  1774. if (sibs >= 1) {
  1775. printtree(sib1(tree), ident + 2);
  1776. if (sibs >= 2)
  1777. printtree(sib2(tree), ident + 2);
  1778. }
  1779. break;
  1780. }
  1781. }
  1782. }
  1783. void printktable (lua_State *L, int idx) {
  1784. int n, i;
  1785. lua_getfenv(L, idx);
  1786. if (lua_isnil(L, -1)) /* no ktable? */
  1787. return;
  1788. n = lua_objlen(L, -1);
  1789. printf("[");
  1790. for (i = 1; i <= n; i++) {
  1791. printf("%d = ", i);
  1792. lua_rawgeti(L, -1, i);
  1793. if (lua_isstring(L, -1))
  1794. printf("%s ", lua_tostring(L, -1));
  1795. else
  1796. printf("%s ", lua_typename(L, lua_type(L, -1)));
  1797. lua_pop(L, 1);
  1798. }
  1799. printf("]\n");
  1800. /* leave ktable at the stack */
  1801. }
  1802. /* }====================================================== */
  1803. #endif
  1804. /*
  1805. ** $Id: lptree.c,v 1.10 2013/04/12 16:30:33 roberto Exp $
  1806. ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
  1807. */
  1808. #include <ctype.h>
  1809. #include <limits.h>
  1810. #include <string.h>
  1811. #include "lua.h"
  1812. #include "lauxlib.h"
  1813. /* number of siblings for each tree */
  1814. const byte numsiblings[] = {
  1815. 0, 0, 0, /* char, set, any */
  1816. 0, 0, /* true, false */
  1817. 1, /* rep */
  1818. 2, 2, /* seq, choice */
  1819. 1, 1, /* not, and */
  1820. 0, 0, 2, 1, /* call, opencall, rule, grammar */
  1821. 1, /* behind */
  1822. 1, 1 /* capture, runtime capture */
  1823. };
  1824. static TTree *newgrammar (lua_State *L, int arg);
  1825. /*
  1826. ** returns a reasonable name for value at index 'idx' on the stack
  1827. */
  1828. static const char *val2str (lua_State *L, int idx) {
  1829. const char *k = lua_tostring(L, idx);
  1830. if (k != NULL)
  1831. return lua_pushfstring(L, "%s", k);
  1832. else
  1833. return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx));
  1834. }
  1835. /*
  1836. ** Fix a TOpenCall into a TCall node, using table 'postable' to
  1837. ** translate a key to its rule address in the tree. Raises an
  1838. ** error if key does not exist.
  1839. */
  1840. static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) {
  1841. int n;
  1842. lua_rawgeti(L, -1, t->key); /* get rule's name */
  1843. lua_gettable(L, postable); /* query name in position table */
  1844. n = lua_tonumber(L, -1); /* get (absolute) position */
  1845. lua_pop(L, 1); /* remove position */
  1846. if (n == 0) { /* no position? */
  1847. lua_rawgeti(L, -1, t->key); /* get rule's name again */
  1848. luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1));
  1849. }
  1850. t->tag = TCall;
  1851. t->u.ps = n - (t - g); /* position relative to node */
  1852. assert(sib2(t)->tag == TRule);
  1853. sib2(t)->key = t->key;
  1854. }
  1855. /*
  1856. ** Transform left associative constructions into right
  1857. ** associative ones, for sequence and choice; that is:
  1858. ** (t11 + t12) + t2 => t11 + (t12 + t2)
  1859. ** (t11 * t12) * t2 => t11 * (t12 * t2)
  1860. ** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
  1861. */
  1862. static void correctassociativity (TTree *tree) {
  1863. TTree *t1 = sib1(tree);
  1864. assert(tree->tag == TChoice || tree->tag == TSeq);
  1865. while (t1->tag == tree->tag) {
  1866. int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */
  1867. int n11size = t1->u.ps - 1;
  1868. int n12size = n1size - n11size - 1;
  1869. memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */
  1870. tree->u.ps = n11size + 1;
  1871. sib2(tree)->tag = tree->tag;
  1872. sib2(tree)->u.ps = n12size + 1;
  1873. }
  1874. }
  1875. /*
  1876. ** Make final adjustments in a tree. Fix open calls in tree 't',
  1877. ** making them refer to their respective rules or raising appropriate
  1878. ** errors (if not inside a grammar). Correct associativity of associative
  1879. ** constructions (making them right associative). Assume that tree's
  1880. ** ktable is at the top of the stack (for error messages).
  1881. */
  1882. static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) {
  1883. tailcall:
  1884. switch (t->tag) {
  1885. case TGrammar: /* subgrammars were already fixed */
  1886. return;
  1887. case TOpenCall: {
  1888. if (g != NULL) /* inside a grammar? */
  1889. fixonecall(L, postable, g, t);
  1890. else { /* open call outside grammar */
  1891. lua_rawgeti(L, -1, t->key);
  1892. luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1));
  1893. }
  1894. break;
  1895. }
  1896. case TSeq: case TChoice:
  1897. correctassociativity(t);
  1898. break;
  1899. }
  1900. switch (numsiblings[t->tag]) {
  1901. case 1: /* finalfix(L, postable, g, sib1(t)); */
  1902. t = sib1(t); goto tailcall;
  1903. case 2:
  1904. finalfix(L, postable, g, sib1(t));
  1905. t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */
  1906. default: assert(numsiblings[t->tag] == 0); break;
  1907. }
  1908. }
  1909. /*
  1910. ** {======================================================
  1911. ** Tree generation
  1912. ** =======================================================
  1913. */
  1914. /*
  1915. ** In 5.2, could use 'luaL_testudata'...
  1916. */
  1917. static int testpattern (lua_State *L, int idx) {
  1918. if (lua_touserdata(L, idx)) { /* value is a userdata? */
  1919. if (lua_getmetatable(L, idx)) { /* does it have a metatable? */
  1920. luaL_getmetatable(L, PATTERN_T);
  1921. if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */
  1922. lua_pop(L, 2); /* remove both metatables */
  1923. return 1;
  1924. }
  1925. }
  1926. }
  1927. return 0;
  1928. }
  1929. static Pattern *getpattern (lua_State *L, int idx) {
  1930. return (Pattern *)luaL_checkudata(L, idx, PATTERN_T);
  1931. }
  1932. static int getsize (lua_State *L, int idx) {
  1933. return (lua_objlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1;
  1934. }
  1935. static TTree *gettree (lua_State *L, int idx, int *len) {
  1936. Pattern *p = getpattern(L, idx);
  1937. if (len)
  1938. *len = getsize(L, idx);
  1939. return p->tree;
  1940. }
  1941. /*
  1942. ** create a pattern
  1943. */
  1944. static TTree *newtree (lua_State *L, int len) {
  1945. size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern);
  1946. Pattern *p = (Pattern *)lua_newuserdata(L, size);
  1947. luaL_getmetatable(L, PATTERN_T);
  1948. lua_setmetatable(L, -2);
  1949. p->code = NULL; p->codesize = 0;
  1950. return p->tree;
  1951. }
  1952. static TTree *newleaf (lua_State *L, int tag) {
  1953. TTree *tree = newtree(L, 1);
  1954. tree->tag = tag;
  1955. return tree;
  1956. }
  1957. static TTree *newcharset (lua_State *L) {
  1958. TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1);
  1959. tree->tag = TSet;
  1960. loopset(i, treebuffer(tree)[i] = 0);
  1961. return tree;
  1962. }
  1963. /*
  1964. ** add to tree a sequence where first sibling is 'sib' (with size
  1965. ** 'sibsize'); returns position for second sibling
  1966. */
  1967. static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
  1968. tree->tag = TSeq; tree->u.ps = sibsize + 1;
  1969. memcpy(sib1(tree), sib, sibsize * sizeof(TTree));
  1970. return sib2(tree);
  1971. }
  1972. /*
  1973. ** Add element 'idx' to 'ktable' of pattern at the top of the stack;
  1974. ** create new 'ktable' if necessary. Return index of new element.
  1975. */
  1976. static int addtoktable (lua_State *L, int idx) {
  1977. if (idx == 0 || lua_isnil(L, idx)) /* no actual value to insert? */
  1978. return 0;
  1979. else {
  1980. int n;
  1981. lua_getfenv(L, -1); /* get ktable from pattern */
  1982. n = lua_objlen(L, -1);
  1983. if (n == 0) { /* is it empty/non-existent? */
  1984. lua_pop(L, 1); /* remove it */
  1985. lua_createtable(L, 1, 0); /* create a fresh table */
  1986. }
  1987. lua_pushvalue(L, idx); /* element to be added */
  1988. lua_rawseti(L, -2, n + 1);
  1989. lua_setfenv(L, -2); /* set it as ktable for pattern */
  1990. return n + 1;
  1991. }
  1992. }
  1993. /*
  1994. ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got
  1995. ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
  1996. ** must build a sequence of sequence of sequence...)
  1997. */
  1998. static void fillseq (TTree *tree, int tag, int n, const char *s) {
  1999. int i;
  2000. for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */
  2001. tree->tag = TSeq; tree->u.ps = 2;
  2002. sib1(tree)->tag = tag;
  2003. sib1(tree)->u.n = s ? (byte)s[i] : 0;
  2004. tree = sib2(tree);
  2005. }
  2006. tree->tag = tag; /* last one does not need TSeq */
  2007. tree->u.n = s ? (byte)s[i] : 0;
  2008. }
  2009. /*
  2010. ** Numbers as patterns:
  2011. ** 0 == true (always match); n == TAny repeated 'n' times;
  2012. ** -n == not (TAny repeated 'n' times)
  2013. */
  2014. static TTree *numtree (lua_State *L, int n) {
  2015. if (n == 0)
  2016. return newleaf(L, TTrue);
  2017. else {
  2018. TTree *tree, *nd;
  2019. if (n > 0)
  2020. tree = nd = newtree(L, 2 * n - 1);
  2021. else { /* negative: code it as !(-n) */
  2022. n = -n;
  2023. tree = newtree(L, 2 * n);
  2024. tree->tag = TNot;
  2025. nd = sib1(tree);
  2026. }
  2027. fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */
  2028. return tree;
  2029. }
  2030. }
  2031. /*
  2032. ** Convert value at index 'idx' to a pattern
  2033. */
  2034. static TTree *getpatt (lua_State *L, int idx, int *len) {
  2035. TTree *tree;
  2036. switch (lua_type(L, idx)) {
  2037. case LUA_TSTRING: {
  2038. size_t slen;
  2039. const char *s = lua_tolstring(L, idx, &slen); /* get string */
  2040. if (slen == 0) /* empty? */
  2041. tree = newleaf(L, TTrue); /* always match */
  2042. else {
  2043. tree = newtree(L, 2 * (slen - 1) + 1);
  2044. fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */
  2045. }
  2046. break;
  2047. }
  2048. case LUA_TNUMBER: {
  2049. int n = lua_tointeger(L, idx);
  2050. tree = numtree(L, n);
  2051. break;
  2052. }
  2053. case LUA_TBOOLEAN: {
  2054. tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse));
  2055. break;
  2056. }
  2057. case LUA_TTABLE: {
  2058. tree = newgrammar(L, idx);
  2059. break;
  2060. }
  2061. case LUA_TFUNCTION: {
  2062. tree = newtree(L, 2);
  2063. tree->tag = TRunTime;
  2064. tree->key = addtoktable(L, idx);
  2065. sib1(tree)->tag = TTrue;
  2066. break;
  2067. }
  2068. default: {
  2069. return gettree(L, idx, len);
  2070. }
  2071. }
  2072. lua_replace(L, idx); /* put new tree into 'idx' slot */
  2073. if (len)
  2074. *len = getsize(L, idx);
  2075. return tree;
  2076. }
  2077. /*
  2078. ** Return the number of elements in the ktable of pattern at 'idx'.
  2079. ** In Lua 5.2, default "environment" for patterns is nil, not
  2080. ** a table. Treat it as an empty table. In Lua 5.1, assumes that
  2081. ** the environment has no numeric indices (len == 0)
  2082. */
  2083. static int ktablelen (lua_State *L, int idx) {
  2084. if (!lua_istable(L, idx)) return 0;
  2085. else return lua_objlen(L, idx);
  2086. }
  2087. /*
  2088. ** Concatentate the contents of table 'idx1' into table 'idx2'.
  2089. ** (Assume that both indices are negative.)
  2090. ** Return the original length of table 'idx2'
  2091. */
  2092. static int concattable (lua_State *L, int idx1, int idx2) {
  2093. int i;
  2094. int n1 = ktablelen(L, idx1);
  2095. int n2 = ktablelen(L, idx2);
  2096. if (n1 == 0) return 0; /* nothing to correct */
  2097. for (i = 1; i <= n1; i++) {
  2098. lua_rawgeti(L, idx1, i);
  2099. lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */
  2100. }
  2101. return n2;
  2102. }
  2103. /*
  2104. ** Make a merge of ktables from p1 and p2 the ktable for the new
  2105. ** pattern at the top of the stack.
  2106. */
  2107. static int joinktables (lua_State *L, int p1, int p2) {
  2108. int n1, n2;
  2109. lua_getfenv(L, p1); /* get ktables */
  2110. lua_getfenv(L, p2);
  2111. n1 = ktablelen(L, -2);
  2112. n2 = ktablelen(L, -1);
  2113. if (n1 == 0 && n2 == 0) { /* are both tables empty? */
  2114. lua_pop(L, 2); /* nothing to be done; pop tables */
  2115. return 0; /* nothing to correct */
  2116. }
  2117. if (n2 == 0 || lua_equal(L, -2, -1)) { /* second table is empty or equal? */
  2118. lua_pop(L, 1); /* pop 2nd table */
  2119. lua_setfenv(L, -2); /* set 1st ktable into new pattern */
  2120. return 0; /* nothing to correct */
  2121. }
  2122. if (n1 == 0) { /* first table is empty? */
  2123. lua_setfenv(L, -3); /* set 2nd table into new pattern */
  2124. lua_pop(L, 1); /* pop 1st table */
  2125. return 0; /* nothing to correct */
  2126. }
  2127. else {
  2128. lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */
  2129. /* stack: new p; ktable p1; ktable p2; new ktable */
  2130. concattable(L, -3, -1); /* from p1 into new ktable */
  2131. concattable(L, -2, -1); /* from p2 into new ktable */
  2132. lua_setfenv(L, -4); /* new ktable becomes p env */
  2133. lua_pop(L, 2); /* pop other ktables */
  2134. return n1; /* correction for indices from p2 */
  2135. }
  2136. }
  2137. static void correctkeys (TTree *tree, int n) {
  2138. if (n == 0) return; /* no correction? */
  2139. tailcall:
  2140. switch (tree->tag) {
  2141. case TOpenCall: case TCall: case TRunTime: case TRule: {
  2142. if (tree->key > 0)
  2143. tree->key += n;
  2144. break;
  2145. }
  2146. case TCapture: {
  2147. if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
  2148. tree->key += n;
  2149. break;
  2150. }
  2151. default: break;
  2152. }
  2153. switch (numsiblings[tree->tag]) {
  2154. case 1: /* correctkeys(sib1(tree), n); */
  2155. tree = sib1(tree); goto tailcall;
  2156. case 2:
  2157. correctkeys(sib1(tree), n);
  2158. tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */
  2159. default: assert(numsiblings[tree->tag] == 0); break;
  2160. }
  2161. }
  2162. /*
  2163. ** copy 'ktable' of element 'idx' to new tree (on top of stack)
  2164. */
  2165. static void copyktable (lua_State *L, int idx) {
  2166. lua_getfenv(L, idx);
  2167. lua_setfenv(L, -2);
  2168. }
  2169. /*
  2170. ** merge 'ktable' from rule at stack index 'idx' into 'ktable'
  2171. ** from tree at the top of the stack, and correct corresponding
  2172. ** tree.
  2173. */
  2174. static void mergektable (lua_State *L, int idx, TTree *rule) {
  2175. int n;
  2176. lua_getfenv(L, -1); /* get ktables */
  2177. lua_getfenv(L, idx);
  2178. n = concattable(L, -1, -2);
  2179. lua_pop(L, 2); /* remove both ktables */
  2180. correctkeys(rule, n);
  2181. }
  2182. /*
  2183. ** create a new tree, whith a new root and one sibling.
  2184. ** Sibling must be on the Lua stack, at index 1.
  2185. */
  2186. static TTree *newroot1sib (lua_State *L, int tag) {
  2187. int s1;
  2188. TTree *tree1 = getpatt(L, 1, &s1);
  2189. TTree *tree = newtree(L, 1 + s1); /* create new tree */
  2190. tree->tag = tag;
  2191. memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
  2192. copyktable(L, 1);
  2193. return tree;
  2194. }
  2195. /*
  2196. ** create a new tree, whith a new root and 2 siblings.
  2197. ** Siblings must be on the Lua stack, first one at index 1.
  2198. */
  2199. static TTree *newroot2sib (lua_State *L, int tag) {
  2200. int s1, s2;
  2201. TTree *tree1 = getpatt(L, 1, &s1);
  2202. TTree *tree2 = getpatt(L, 2, &s2);
  2203. TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */
  2204. tree->tag = tag;
  2205. tree->u.ps = 1 + s1;
  2206. memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
  2207. memcpy(sib2(tree), tree2, s2 * sizeof(TTree));
  2208. correctkeys(sib2(tree), joinktables(L, 1, 2));
  2209. return tree;
  2210. }
  2211. static int lp_P (lua_State *L) {
  2212. luaL_checkany(L, 1);
  2213. getpatt(L, 1, NULL);
  2214. lua_settop(L, 1);
  2215. return 1;
  2216. }
  2217. /*
  2218. ** sequence operator; optimizations:
  2219. ** false x => false, x true => x, true x => x
  2220. ** (cannot do x . false => false because x may have runtime captures)
  2221. */
  2222. static int lp_seq (lua_State *L) {
  2223. TTree *tree1 = getpatt(L, 1, NULL);
  2224. TTree *tree2 = getpatt(L, 2, NULL);
  2225. if (tree1->tag == TFalse || tree2->tag == TTrue)
  2226. lua_pushvalue(L, 1); /* false . x == false, x . true = x */
  2227. else if (tree1->tag == TTrue)
  2228. lua_pushvalue(L, 2); /* true . x = x */
  2229. else
  2230. newroot2sib(L, TSeq);
  2231. return 1;
  2232. }
  2233. /*
  2234. ** choice operator; optimizations:
  2235. ** charset / charset => charset
  2236. ** true / x => true, x / false => x, false / x => x
  2237. ** (x / true is not equivalent to true)
  2238. */
  2239. static int lp_choice (lua_State *L) {
  2240. Charset st1, st2;
  2241. TTree *t1 = getpatt(L, 1, NULL);
  2242. TTree *t2 = getpatt(L, 2, NULL);
  2243. if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
  2244. TTree *t = newcharset(L);
  2245. loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]);
  2246. }
  2247. else if (nofail(t1) || t2->tag == TFalse)
  2248. lua_pushvalue(L, 1); /* true / x => true, x / false => x */
  2249. else if (t1->tag == TFalse)
  2250. lua_pushvalue(L, 2); /* false / x => x */
  2251. else
  2252. newroot2sib(L, TChoice);
  2253. return 1;
  2254. }
  2255. /*
  2256. ** p^n
  2257. */
  2258. static int lp_star (lua_State *L) {
  2259. int size1;
  2260. int n = luaL_checkint(L, 2);
  2261. TTree *tree1 = gettree(L, 1, &size1);
  2262. if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */
  2263. TTree *tree = newtree(L, (n + 1) * (size1 + 1));
  2264. if (nullable(tree1))
  2265. luaL_error(L, "loop body may accept empty string");
  2266. while (n--) /* repeat 'n' times */
  2267. tree = seqaux(tree, tree1, size1);
  2268. tree->tag = TRep;
  2269. memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
  2270. }
  2271. else { /* choice (seq tree1 ... choice tree1 true ...) true */
  2272. TTree *tree;
  2273. n = -n;
  2274. /* size = (choice + seq + tree1 + true) * n, but the last has no seq */
  2275. tree = newtree(L, n * (size1 + 3) - 1);
  2276. for (; n > 1; n--) { /* repeat (n - 1) times */
  2277. tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2;
  2278. sib2(tree)->tag = TTrue;
  2279. tree = sib1(tree);
  2280. tree = seqaux(tree, tree1, size1);
  2281. }
  2282. tree->tag = TChoice; tree->u.ps = size1 + 1;
  2283. sib2(tree)->tag = TTrue;
  2284. memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
  2285. }
  2286. copyktable(L, 1);
  2287. return 1;
  2288. }
  2289. /*
  2290. ** #p == &p
  2291. */
  2292. static int lp_and (lua_State *L) {
  2293. newroot1sib(L, TAnd);
  2294. return 1;
  2295. }
  2296. /*
  2297. ** -p == !p
  2298. */
  2299. static int lp_not (lua_State *L) {
  2300. newroot1sib(L, TNot);
  2301. return 1;
  2302. }
  2303. /*
  2304. ** [t1 - t2] == Seq (Not t2) t1
  2305. ** If t1 and t2 are charsets, make their difference.
  2306. */
  2307. static int lp_sub (lua_State *L) {
  2308. Charset st1, st2;
  2309. int s1, s2;
  2310. TTree *t1 = getpatt(L, 1, &s1);
  2311. TTree *t2 = getpatt(L, 2, &s2);
  2312. if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
  2313. TTree *t = newcharset(L);
  2314. loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]);
  2315. }
  2316. else {
  2317. TTree *tree = newtree(L, 2 + s1 + s2);
  2318. tree->tag = TSeq; /* sequence of... */
  2319. tree->u.ps = 2 + s2;
  2320. sib1(tree)->tag = TNot; /* ...not... */
  2321. memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */
  2322. memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */
  2323. correctkeys(sib1(tree), joinktables(L, 1, 2));
  2324. }
  2325. return 1;
  2326. }
  2327. static int lp_set (lua_State *L) {
  2328. size_t l;
  2329. const char *s = luaL_checklstring(L, 1, &l);
  2330. TTree *tree = newcharset(L);
  2331. while (l--) {
  2332. setchar(treebuffer(tree), (byte)(*s));
  2333. s++;
  2334. }
  2335. return 1;
  2336. }
  2337. static int lp_range (lua_State *L) {
  2338. int arg;
  2339. int top = lua_gettop(L);
  2340. TTree *tree = newcharset(L);
  2341. for (arg = 1; arg <= top; arg++) {
  2342. int c;
  2343. size_t l;
  2344. const char *r = luaL_checklstring(L, arg, &l);
  2345. luaL_argcheck(L, l == 2, arg, "range must have two characters");
  2346. for (c = (byte)r[0]; c <= (byte)r[1]; c++)
  2347. setchar(treebuffer(tree), c);
  2348. }
  2349. return 1;
  2350. }
  2351. /*
  2352. ** Look-behind predicate
  2353. */
  2354. static int lp_behind (lua_State *L) {
  2355. TTree *tree;
  2356. TTree *tree1 = getpatt(L, 1, NULL);
  2357. int n = fixedlen(tree1);
  2358. luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
  2359. luaL_argcheck(L, n > 0, 1, "pattern may not have fixed length");
  2360. luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind");
  2361. tree = newroot1sib(L, TBehind);
  2362. tree->u.n = n;
  2363. return 1;
  2364. }
  2365. /*
  2366. ** Create a non-terminal
  2367. */
  2368. static int lp_V (lua_State *L) {
  2369. TTree *tree = newleaf(L, TOpenCall);
  2370. luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
  2371. tree->key = addtoktable(L, 1);
  2372. return 1;
  2373. }
  2374. /*
  2375. ** Create a tree for a non-empty capture, with a body and
  2376. ** optionally with an associated Lua value (at index 'labelidx' in the
  2377. ** stack)
  2378. */
  2379. static int capture_aux (lua_State *L, int cap, int labelidx) {
  2380. TTree *tree = newroot1sib(L, TCapture);
  2381. tree->cap = cap;
  2382. tree->key = addtoktable(L, labelidx);
  2383. return 1;
  2384. }
  2385. /*
  2386. ** Fill a tree with an empty capture, using an empty (TTrue) sibling.
  2387. */
  2388. static TTree *auxemptycap (lua_State *L, TTree *tree, int cap, int idx) {
  2389. tree->tag = TCapture;
  2390. tree->cap = cap;
  2391. tree->key = addtoktable(L, idx);
  2392. sib1(tree)->tag = TTrue;
  2393. return tree;
  2394. }
  2395. /*
  2396. ** Create a tree for an empty capture
  2397. */
  2398. static TTree *newemptycap (lua_State *L, int cap, int idx) {
  2399. return auxemptycap(L, newtree(L, 2), cap, idx);
  2400. }
  2401. /*
  2402. ** Captures with syntax p / v
  2403. ** (function capture, query capture, string capture, or number capture)
  2404. */
  2405. static int lp_divcapture (lua_State *L) {
  2406. switch (lua_type(L, 2)) {
  2407. case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2);
  2408. case LUA_TTABLE: return capture_aux(L, Cquery, 2);
  2409. case LUA_TSTRING: return capture_aux(L, Cstring, 2);
  2410. case LUA_TNUMBER: {
  2411. int n = lua_tointeger(L, 2);
  2412. TTree *tree = newroot1sib(L, TCapture);
  2413. luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number");
  2414. tree->cap = Cnum;
  2415. tree->key = n;
  2416. return 1;
  2417. }
  2418. default: return luaL_argerror(L, 2, "invalid replacement value");
  2419. }
  2420. }
  2421. static int lp_substcapture (lua_State *L) {
  2422. return capture_aux(L, Csubst, 0);
  2423. }
  2424. static int lp_tablecapture (lua_State *L) {
  2425. return capture_aux(L, Ctable, 0);
  2426. }
  2427. static int lp_groupcapture (lua_State *L) {
  2428. if (lua_isnoneornil(L, 2))
  2429. return capture_aux(L, Cgroup, 0);
  2430. else {
  2431. luaL_checkstring(L, 2);
  2432. return capture_aux(L, Cgroup, 2);
  2433. }
  2434. }
  2435. static int lp_foldcapture (lua_State *L) {
  2436. luaL_checktype(L, 2, LUA_TFUNCTION);
  2437. return capture_aux(L, Cfold, 2);
  2438. }
  2439. static int lp_simplecapture (lua_State *L) {
  2440. return capture_aux(L, Csimple, 0);
  2441. }
  2442. static int lp_poscapture (lua_State *L) {
  2443. newemptycap(L, Cposition, 0);
  2444. return 1;
  2445. }
  2446. static int lp_argcapture (lua_State *L) {
  2447. int n = luaL_checkint(L, 1);
  2448. TTree *tree = newemptycap(L, Carg, 0);
  2449. tree->key = n;
  2450. luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index");
  2451. return 1;
  2452. }
  2453. static int lp_backref (lua_State *L) {
  2454. luaL_checkstring(L, 1);
  2455. newemptycap(L, Cbackref, 1);
  2456. return 1;
  2457. }
  2458. /*
  2459. ** Constant capture
  2460. */
  2461. static int lp_constcapture (lua_State *L) {
  2462. int i;
  2463. int n = lua_gettop(L); /* number of values */
  2464. if (n == 0) /* no values? */
  2465. newleaf(L, TTrue); /* no capture */
  2466. else if (n == 1)
  2467. newemptycap(L, Cconst, 1); /* single constant capture */
  2468. else { /* create a group capture with all values */
  2469. TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2);
  2470. tree->tag = TCapture;
  2471. tree->cap = Cgroup;
  2472. tree->key = 0;
  2473. tree = sib1(tree);
  2474. for (i = 1; i <= n - 1; i++) {
  2475. tree->tag = TSeq;
  2476. tree->u.ps = 3; /* skip TCapture and its sibling */
  2477. auxemptycap(L, sib1(tree), Cconst, i);
  2478. tree = sib2(tree);
  2479. }
  2480. auxemptycap(L, tree, Cconst, i);
  2481. }
  2482. return 1;
  2483. }
  2484. static int lp_matchtime (lua_State *L) {
  2485. TTree *tree;
  2486. luaL_checktype(L, 2, LUA_TFUNCTION);
  2487. tree = newroot1sib(L, TRunTime);
  2488. tree->key = addtoktable(L, 2);
  2489. return 1;
  2490. }
  2491. /* }====================================================== */
  2492. /*
  2493. ** {======================================================
  2494. ** Grammar - Tree generation
  2495. ** =======================================================
  2496. */
  2497. /*
  2498. ** push on the stack the index and the pattern for the
  2499. ** initial rule of grammar at index 'arg' in the stack;
  2500. ** also add that index into position table.
  2501. */
  2502. static void getfirstrule (lua_State *L, int arg, int postab) {
  2503. lua_rawgeti(L, arg, 1); /* access first element */
  2504. if (lua_isstring(L, -1)) { /* is it the name of initial rule? */
  2505. lua_pushvalue(L, -1); /* duplicate it to use as key */
  2506. lua_gettable(L, arg); /* get associated rule */
  2507. }
  2508. else {
  2509. lua_pushinteger(L, 1); /* key for initial rule */
  2510. lua_insert(L, -2); /* put it before rule */
  2511. }
  2512. if (!testpattern(L, -1)) { /* initial rule not a pattern? */
  2513. if (lua_isnil(L, -1))
  2514. luaL_error(L, "grammar has no initial rule");
  2515. else
  2516. luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2));
  2517. }
  2518. lua_pushvalue(L, -2); /* push key */
  2519. lua_pushinteger(L, 1); /* push rule position (after TGrammar) */
  2520. lua_settable(L, postab); /* insert pair at position table */
  2521. }
  2522. /*
  2523. ** traverse grammar at index 'arg', pushing all its keys and patterns
  2524. ** into the stack. Create a new table (before all pairs key-pattern) to
  2525. ** collect all keys and their associated positions in the final tree
  2526. ** (the "position table").
  2527. ** Return the number of rules and (in 'totalsize') the total size
  2528. ** for the new tree.
  2529. */
  2530. static int collectrules (lua_State *L, int arg, int *totalsize) {
  2531. int n = 1; /* to count number of rules */
  2532. int postab = lua_gettop(L) + 1; /* index of position table */
  2533. int size; /* accumulator for total size */
  2534. lua_newtable(L); /* create position table */
  2535. getfirstrule(L, arg, postab);
  2536. size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */
  2537. lua_pushnil(L); /* prepare to traverse grammar table */
  2538. while (lua_next(L, arg) != 0) {
  2539. if (lua_tonumber(L, -2) == 1 ||
  2540. lua_equal(L, -2, postab + 1)) { /* initial rule? */
  2541. lua_pop(L, 1); /* remove value (keep key for lua_next) */
  2542. continue;
  2543. }
  2544. if (!testpattern(L, -1)) /* value is not a pattern? */
  2545. luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2));
  2546. luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules");
  2547. lua_pushvalue(L, -2); /* push key (to insert into position table) */
  2548. lua_pushinteger(L, size);
  2549. lua_settable(L, postab);
  2550. size += 1 + getsize(L, -1); /* update size */
  2551. lua_pushvalue(L, -2); /* push key (for next lua_next) */
  2552. n++;
  2553. }
  2554. *totalsize = size + 1; /* TTrue to finish list of rules */
  2555. return n;
  2556. }
  2557. static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
  2558. int i;
  2559. TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */
  2560. for (i = 0; i < n; i++) { /* add each rule into new tree */
  2561. int ridx = frule + 2*i + 1; /* index of i-th rule */
  2562. int rulesize;
  2563. TTree *rn = gettree(L, ridx, &rulesize);
  2564. nd->tag = TRule;
  2565. nd->key = 0;
  2566. nd->cap = i; /* rule number */
  2567. nd->u.ps = rulesize + 1; /* point to next rule */
  2568. memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */
  2569. mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */
  2570. nd = sib2(nd); /* move to next rule */
  2571. }
  2572. nd->tag = TTrue; /* finish list of rules */
  2573. }
  2574. /*
  2575. ** Check whether a tree has potential infinite loops
  2576. */
  2577. static int checkloops (TTree *tree) {
  2578. tailcall:
  2579. if (tree->tag == TRep && nullable(sib1(tree)))
  2580. return 1;
  2581. else if (tree->tag == TGrammar)
  2582. return 0; /* sub-grammars already checked */
  2583. else {
  2584. switch (numsiblings[tree->tag]) {
  2585. case 1: /* return checkloops(sib1(tree)); */
  2586. tree = sib1(tree); goto tailcall;
  2587. case 2:
  2588. if (checkloops(sib1(tree))) return 1;
  2589. /* else return checkloops(sib2(tree)); */
  2590. tree = sib2(tree); goto tailcall;
  2591. default: assert(numsiblings[tree->tag] == 0); return 0;
  2592. }
  2593. }
  2594. }
  2595. static int verifyerror (lua_State *L, int *passed, int npassed) {
  2596. int i, j;
  2597. for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
  2598. for (j = i - 1; j >= 0; j--) {
  2599. if (passed[i] == passed[j]) {
  2600. lua_rawgeti(L, -1, passed[i]); /* get rule's key */
  2601. return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1));
  2602. }
  2603. }
  2604. }
  2605. return luaL_error(L, "too many left calls in grammar");
  2606. }
  2607. /*
  2608. ** Check whether a rule can be left recursive; raise an error in that
  2609. ** case; otherwise return 1 iff pattern is nullable. Assume ktable at
  2610. ** the top of the stack.
  2611. */
  2612. static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
  2613. int nullable) {
  2614. tailcall:
  2615. switch (tree->tag) {
  2616. case TChar: case TSet: case TAny:
  2617. case TFalse:
  2618. return nullable; /* cannot pass from here */
  2619. case TTrue:
  2620. case TBehind: /* look-behind cannot have calls */
  2621. return 1;
  2622. case TNot: case TAnd: case TRep:
  2623. /* return verifyrule(L, sib1(tree), passed, npassed, 1); */
  2624. tree = sib1(tree); nullable = 1; goto tailcall;
  2625. case TCapture: case TRunTime:
  2626. /* return verifyrule(L, sib1(tree), passed, npassed); */
  2627. tree = sib1(tree); goto tailcall;
  2628. case TCall:
  2629. /* return verifyrule(L, sib2(tree), passed, npassed); */
  2630. tree = sib2(tree); goto tailcall;
  2631. case TSeq: /* only check 2nd child if first is nullable */
  2632. if (!verifyrule(L, sib1(tree), passed, npassed, 0))
  2633. return nullable;
  2634. /* else return verifyrule(L, sib2(tree), passed, npassed); */
  2635. tree = sib2(tree); goto tailcall;
  2636. case TChoice: /* must check both children */
  2637. nullable = verifyrule(L, sib1(tree), passed, npassed, nullable);
  2638. /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */
  2639. tree = sib2(tree); goto tailcall;
  2640. case TRule:
  2641. if (npassed >= MAXRULES)
  2642. return verifyerror(L, passed, npassed);
  2643. else {
  2644. passed[npassed++] = tree->key;
  2645. /* return verifyrule(L, sib1(tree), passed, npassed); */
  2646. tree = sib1(tree); goto tailcall;
  2647. }
  2648. case TGrammar:
  2649. return nullable(tree); /* sub-grammar cannot be left recursive */
  2650. default: assert(0); return 0;
  2651. }
  2652. }
  2653. static void verifygrammar (lua_State *L, TTree *grammar) {
  2654. int passed[MAXRULES];
  2655. TTree *rule;
  2656. /* check left-recursive rules */
  2657. for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
  2658. if (rule->key == 0) continue; /* unused rule */
  2659. verifyrule(L, sib1(rule), passed, 0, 0);
  2660. }
  2661. assert(rule->tag == TTrue);
  2662. /* check infinite loops inside rules */
  2663. for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
  2664. if (rule->key == 0) continue; /* unused rule */
  2665. if (checkloops(sib1(rule))) {
  2666. lua_rawgeti(L, -1, rule->key); /* get rule's key */
  2667. luaL_error(L, "empty loop in rule '%s'", val2str(L, -1));
  2668. }
  2669. }
  2670. assert(rule->tag == TTrue);
  2671. }
  2672. /*
  2673. ** Give a name for the initial rule if it is not referenced
  2674. */
  2675. static void initialrulename (lua_State *L, TTree *grammar, int frule) {
  2676. if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */
  2677. int n = lua_objlen(L, -1) + 1; /* index for name */
  2678. lua_pushvalue(L, frule); /* rule's name */
  2679. lua_rawseti(L, -2, n); /* ktable was on the top of the stack */
  2680. sib1(grammar)->key = n;
  2681. }
  2682. }
  2683. static TTree *newgrammar (lua_State *L, int arg) {
  2684. int treesize;
  2685. int frule = lua_gettop(L) + 2; /* position of first rule's key */
  2686. int n = collectrules(L, arg, &treesize);
  2687. TTree *g = newtree(L, treesize);
  2688. luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules");
  2689. g->tag = TGrammar; g->u.n = n;
  2690. lua_newtable(L); /* create 'ktable' */
  2691. lua_setfenv(L, -2);
  2692. buildgrammar(L, g, frule, n);
  2693. lua_getfenv(L, -1); /* get 'ktable' for new tree */
  2694. finalfix(L, frule - 1, g, sib1(g));
  2695. initialrulename(L, g, frule);
  2696. verifygrammar(L, g);
  2697. lua_pop(L, 1); /* remove 'ktable' */
  2698. lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */
  2699. lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */
  2700. return g; /* new table at the top of the stack */
  2701. }
  2702. /* }====================================================== */
  2703. static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) {
  2704. lua_getfenv(L, idx); /* push 'ktable' (may be used by 'finalfix') */
  2705. finalfix(L, 0, NULL, p->tree);
  2706. lua_pop(L, 1); /* remove 'ktable' */
  2707. return compile(L, p);
  2708. }
  2709. static int lp_printtree (lua_State *L) {
  2710. TTree *tree = getpatt(L, 1, NULL);
  2711. int c = lua_toboolean(L, 2);
  2712. if (c) {
  2713. lua_getfenv(L, 1); /* push 'ktable' (may be used by 'finalfix') */
  2714. finalfix(L, 0, NULL, tree);
  2715. lua_pop(L, 1); /* remove 'ktable' */
  2716. }
  2717. printktable(L, 1);
  2718. printtree(tree, 0);
  2719. return 0;
  2720. }
  2721. static int lp_printcode (lua_State *L) {
  2722. Pattern *p = getpattern(L, 1);
  2723. printktable(L, 1);
  2724. if (p->code == NULL) /* not compiled yet? */
  2725. prepcompile(L, p, 1);
  2726. printpatt(p->code, p->codesize);
  2727. return 0;
  2728. }
  2729. /*
  2730. ** Get the initial position for the match, interpreting negative
  2731. ** values from the end of the subject
  2732. */
  2733. static size_t initposition (lua_State *L, size_t len) {
  2734. lua_Integer ii = luaL_optinteger(L, 3, 1);
  2735. if (ii > 0) { /* positive index? */
  2736. if ((size_t)ii <= len) /* inside the string? */
  2737. return (size_t)ii - 1; /* return it (corrected to 0-base) */
  2738. else return len; /* crop at the end */
  2739. }
  2740. else { /* negative index */
  2741. if ((size_t)(-ii) <= len) /* inside the string? */
  2742. return len - ((size_t)(-ii)); /* return position from the end */
  2743. else return 0; /* crop at the beginning */
  2744. }
  2745. }
  2746. /*
  2747. ** Main match function
  2748. */
  2749. static int lp_match (lua_State *L) {
  2750. Capture capture[INITCAPSIZE];
  2751. const char *r;
  2752. size_t l;
  2753. Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1));
  2754. Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1);
  2755. const char *s = luaL_checklstring(L, SUBJIDX, &l);
  2756. size_t i = initposition(L, l);
  2757. int ptop = lua_gettop(L);
  2758. lua_pushnil(L); /* initialize subscache */
  2759. lua_pushlightuserdata(L, capture); /* initialize caplistidx */
  2760. lua_getfenv(L, 1); /* initialize penvidx */
  2761. r = match(L, s, s + i, s + l, code, capture, ptop);
  2762. if (r == NULL) {
  2763. lua_pushnil(L);
  2764. return 1;
  2765. }
  2766. return getcaptures(L, s, r, ptop);
  2767. }
  2768. /*
  2769. ** {======================================================
  2770. ** Library creation and functions not related to matching
  2771. ** =======================================================
  2772. */
  2773. static int lp_setmax (lua_State *L) {
  2774. luaL_optinteger(L, 1, -1);
  2775. lua_settop(L, 1);
  2776. lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
  2777. return 0;
  2778. }
  2779. static int lp_version (lua_State *L) {
  2780. lua_pushstring(L, VERSION);
  2781. return 1;
  2782. }
  2783. static int lp_type (lua_State *L) {
  2784. if (testpattern(L, 1))
  2785. lua_pushliteral(L, "pattern");
  2786. else
  2787. lua_pushnil(L);
  2788. return 1;
  2789. }
  2790. int lp_gc (lua_State *L) {
  2791. Pattern *p = getpattern(L, 1);
  2792. if (p->codesize > 0)
  2793. reallocprog(L, p, 0);
  2794. return 0;
  2795. }
  2796. static void createcat (lua_State *L, const char *catname, int (catf) (int)) {
  2797. TTree *t = newcharset(L);
  2798. int i;
  2799. for (i = 0; i <= UCHAR_MAX; i++)
  2800. if (catf(i)) setchar(treebuffer(t), i);
  2801. lua_setfield(L, -2, catname);
  2802. }
  2803. static int lp_locale (lua_State *L) {
  2804. if (lua_isnoneornil(L, 1)) {
  2805. lua_settop(L, 0);
  2806. lua_createtable(L, 0, 12);
  2807. }
  2808. else {
  2809. luaL_checktype(L, 1, LUA_TTABLE);
  2810. lua_settop(L, 1);
  2811. }
  2812. createcat(L, "alnum", isalnum);
  2813. createcat(L, "alpha", isalpha);
  2814. createcat(L, "cntrl", iscntrl);
  2815. createcat(L, "digit", isdigit);
  2816. createcat(L, "graph", isgraph);
  2817. createcat(L, "lower", islower);
  2818. createcat(L, "print", isprint);
  2819. createcat(L, "punct", ispunct);
  2820. createcat(L, "space", isspace);
  2821. createcat(L, "upper", isupper);
  2822. createcat(L, "xdigit", isxdigit);
  2823. return 1;
  2824. }
  2825. static struct luaL_Reg pattreg[] = {
  2826. {"ptree", lp_printtree},
  2827. {"pcode", lp_printcode},
  2828. {"match", lp_match},
  2829. {"B", lp_behind},
  2830. {"V", lp_V},
  2831. {"C", lp_simplecapture},
  2832. {"Cc", lp_constcapture},
  2833. {"Cmt", lp_matchtime},
  2834. {"Cb", lp_backref},
  2835. {"Carg", lp_argcapture},
  2836. {"Cp", lp_poscapture},
  2837. {"Cs", lp_substcapture},
  2838. {"Ct", lp_tablecapture},
  2839. {"Cf", lp_foldcapture},
  2840. {"Cg", lp_groupcapture},
  2841. {"P", lp_P},
  2842. {"S", lp_set},
  2843. {"R", lp_range},
  2844. {"locale", lp_locale},
  2845. {"version", lp_version},
  2846. {"setmaxstack", lp_setmax},
  2847. {"type", lp_type},
  2848. {NULL, NULL}
  2849. };
  2850. static struct luaL_Reg metareg[] = {
  2851. {"__mul", lp_seq},
  2852. {"__add", lp_choice},
  2853. {"__pow", lp_star},
  2854. {"__gc", lp_gc},
  2855. {"__len", lp_and},
  2856. {"__div", lp_divcapture},
  2857. {"__unm", lp_not},
  2858. {"__sub", lp_sub},
  2859. {NULL, NULL}
  2860. };
  2861. int luaopen_lpeg (lua_State *L);
  2862. int luaopen_lpeg (lua_State *L) {
  2863. luaL_newmetatable(L, PATTERN_T);
  2864. lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */
  2865. lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
  2866. luaL_register(L, NULL, metareg);
  2867. luaL_register(L, "lpeg", pattreg);
  2868. lua_pushvalue(L, -1);
  2869. lua_setfield(L, -3, "__index");
  2870. return 1;
  2871. }
  2872. /* }====================================================== */
  2873. /*
  2874. ** $Id: lpvm.c,v 1.5 2013/04/12 16:29:49 roberto Exp $
  2875. ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
  2876. */
  2877. #include <limits.h>
  2878. #include <string.h>
  2879. #include "lua.h"
  2880. #include "lauxlib.h"
  2881. /* initial size for call/backtrack stack */
  2882. #if !defined(INITBACK)
  2883. #define INITBACK 100
  2884. #endif
  2885. #define getoffset(p) (((p) + 1)->offset)
  2886. static const Instruction giveup = {{IGiveup, 0, 0}};
  2887. /*
  2888. ** {======================================================
  2889. ** Virtual Machine
  2890. ** =======================================================
  2891. */
  2892. typedef struct Stack {
  2893. const char *s; /* saved position (or NULL for calls) */
  2894. const Instruction *p; /* next instruction */
  2895. int caplevel;
  2896. } Stack;
  2897. #define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
  2898. /*
  2899. ** Double the size of the array of captures
  2900. */
  2901. static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
  2902. Capture *newc;
  2903. if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
  2904. luaL_error(L, "too many captures");
  2905. newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
  2906. memcpy(newc, cap, captop * sizeof(Capture));
  2907. lua_replace(L, caplistidx(ptop));
  2908. return newc;
  2909. }
  2910. /*
  2911. ** Double the size of the stack
  2912. */
  2913. static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
  2914. Stack *stack = getstackbase(L, ptop);
  2915. Stack *newstack;
  2916. int n = *stacklimit - stack; /* current stack size */
  2917. int max, newn;
  2918. lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
  2919. max = lua_tointeger(L, -1); /* maximum allowed size */
  2920. lua_pop(L, 1);
  2921. if (n >= max) /* already at maximum size? */
  2922. luaL_error(L, "too many pending calls/choices");
  2923. newn = 2 * n; /* new size */
  2924. if (newn > max) newn = max;
  2925. newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
  2926. memcpy(newstack, stack, n * sizeof(Stack));
  2927. lua_replace(L, stackidx(ptop));
  2928. *stacklimit = newstack + newn;
  2929. return newstack + n; /* return next position */
  2930. }
  2931. /*
  2932. ** Interpret the result of a dynamic capture: false -> fail;
  2933. ** true -> keep current position; number -> next position.
  2934. ** Return new subject position. 'fr' is stack index where
  2935. ** is the result; 'curr' is current subject position; 'limit'
  2936. ** is subject's size.
  2937. */
  2938. static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
  2939. lua_Integer res;
  2940. if (!lua_toboolean(L, fr)) { /* false value? */
  2941. lua_settop(L, fr - 1); /* remove results */
  2942. return -1; /* and fail */
  2943. }
  2944. else if (lua_isboolean(L, fr)) /* true? */
  2945. res = curr; /* keep current position */
  2946. else {
  2947. res = lua_tointeger(L, fr) - 1; /* new position */
  2948. if (res < curr || res > limit)
  2949. luaL_error(L, "invalid position returned by match-time capture");
  2950. }
  2951. lua_remove(L, fr); /* remove first result (offset) */
  2952. return res;
  2953. }
  2954. /*
  2955. ** Add capture values returned by a dynamic capture to the capture list
  2956. ** 'base', nested inside a group capture. 'fd' indexes the first capture
  2957. ** value, 'n' is the number of values (at least 1).
  2958. */
  2959. static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
  2960. int i;
  2961. /* Cgroup capture is already there */
  2962. assert(base[0].kind == Cgroup && base[0].siz == 0);
  2963. base[0].idx = 0; /* make it an anonymous group */
  2964. for (i = 1; i <= n; i++) { /* add runtime captures */
  2965. base[i].kind = Cruntime;
  2966. base[i].siz = 1; /* mark it as closed */
  2967. base[i].idx = fd + i - 1; /* stack index of capture value */
  2968. base[i].s = s;
  2969. }
  2970. base[i].kind = Cclose; /* close group */
  2971. base[i].siz = 1;
  2972. base[i].s = s;
  2973. }
  2974. /*
  2975. ** Remove dynamic captures from the Lua stack (called in case of failure)
  2976. */
  2977. static int removedyncap (lua_State *L, Capture *capture,
  2978. int level, int last) {
  2979. int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */
  2980. int top = lua_gettop(L);
  2981. if (id == 0) return 0; /* no dynamic captures? */
  2982. lua_settop(L, id - 1); /* remove captures */
  2983. return top - id + 1; /* number of values removed */
  2984. }
  2985. /*
  2986. ** Opcode interpreter
  2987. */
  2988. const char *match (lua_State *L, const char *o, const char *s, const char *e,
  2989. Instruction *op, Capture *capture, int ptop) {
  2990. Stack stackbase[INITBACK];
  2991. Stack *stacklimit = stackbase + INITBACK;
  2992. Stack *stack = stackbase; /* point to first empty slot in stack */
  2993. int capsize = INITCAPSIZE;
  2994. int captop = 0; /* point to first empty slot in captures */
  2995. int ndyncap = 0; /* number of dynamic captures (in Lua stack) */
  2996. const Instruction *p = op; /* current instruction */
  2997. stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
  2998. lua_pushlightuserdata(L, stackbase);
  2999. for (;;) {
  3000. #if defined(DEBUG)
  3001. printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
  3002. s, stack - getstackbase(L, ptop), ndyncap, captop);
  3003. printinst(op, p);
  3004. printcaplist(capture, capture + captop);
  3005. #endif
  3006. assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
  3007. switch ((Opcode)p->i.code) {
  3008. case IEnd: {
  3009. assert(stack == getstackbase(L, ptop) + 1);
  3010. capture[captop].kind = Cclose;
  3011. capture[captop].s = NULL;
  3012. return s;
  3013. }
  3014. case IGiveup: {
  3015. assert(stack == getstackbase(L, ptop));
  3016. return NULL;
  3017. }
  3018. case IRet: {
  3019. assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
  3020. p = (--stack)->p;
  3021. continue;
  3022. }
  3023. case IAny: {
  3024. if (s < e) { p++; s++; }
  3025. else goto fail;
  3026. continue;
  3027. }
  3028. case ITestAny: {
  3029. if (s < e) p += 2;
  3030. else p += getoffset(p);
  3031. continue;
  3032. }
  3033. case IChar: {
  3034. if ((byte)*s == p->i.aux && s < e) { p++; s++; }
  3035. else goto fail;
  3036. continue;
  3037. }
  3038. case ITestChar: {
  3039. if ((byte)*s == p->i.aux && s < e) p += 2;
  3040. else p += getoffset(p);
  3041. continue;
  3042. }
  3043. case ISet: {
  3044. int c = (byte)*s;
  3045. if (testchar((p+1)->buff, c) && s < e)
  3046. { p += CHARSETINSTSIZE; s++; }
  3047. else goto fail;
  3048. continue;
  3049. }
  3050. case ITestSet: {
  3051. int c = (byte)*s;
  3052. if (testchar((p + 2)->buff, c) && s < e)
  3053. p += 1 + CHARSETINSTSIZE;
  3054. else p += getoffset(p);
  3055. continue;
  3056. }
  3057. case IBehind: {
  3058. int n = p->i.aux;
  3059. if (n > s - o) goto fail;
  3060. s -= n; p++;
  3061. continue;
  3062. }
  3063. case ISpan: {
  3064. for (; s < e; s++) {
  3065. int c = (byte)*s;
  3066. if (!testchar((p+1)->buff, c)) break;
  3067. }
  3068. p += CHARSETINSTSIZE;
  3069. continue;
  3070. }
  3071. case IJmp: {
  3072. p += getoffset(p);
  3073. continue;
  3074. }
  3075. case IChoice: {
  3076. if (stack == stacklimit)
  3077. stack = doublestack(L, &stacklimit, ptop);
  3078. stack->p = p + getoffset(p);
  3079. stack->s = s;
  3080. stack->caplevel = captop;
  3081. stack++;
  3082. p += 2;
  3083. continue;
  3084. }
  3085. case ICall: {
  3086. if (stack == stacklimit)
  3087. stack = doublestack(L, &stacklimit, ptop);
  3088. stack->s = NULL;
  3089. stack->p = p + 2; /* save return address */
  3090. stack++;
  3091. p += getoffset(p);
  3092. continue;
  3093. }
  3094. case ICommit: {
  3095. assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
  3096. stack--;
  3097. p += getoffset(p);
  3098. continue;
  3099. }
  3100. case IPartialCommit: {
  3101. assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
  3102. (stack - 1)->s = s;
  3103. (stack - 1)->caplevel = captop;
  3104. p += getoffset(p);
  3105. continue;
  3106. }
  3107. case IBackCommit: {
  3108. assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
  3109. s = (--stack)->s;
  3110. captop = stack->caplevel;
  3111. p += getoffset(p);
  3112. continue;
  3113. }
  3114. case IFailTwice:
  3115. assert(stack > getstackbase(L, ptop));
  3116. stack--;
  3117. /* go through */
  3118. case IFail:
  3119. fail: { /* pattern failed: try to backtrack */
  3120. do { /* remove pending calls */
  3121. assert(stack > getstackbase(L, ptop));
  3122. s = (--stack)->s;
  3123. } while (s == NULL);
  3124. if (ndyncap > 0) /* is there matchtime captures? */
  3125. ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
  3126. captop = stack->caplevel;
  3127. p = stack->p;
  3128. continue;
  3129. }
  3130. case ICloseRunTime: {
  3131. CapState cs;
  3132. int rem, res, n;
  3133. int fr = lua_gettop(L) + 1; /* stack index of first result */
  3134. cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
  3135. n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
  3136. captop -= n; /* remove nested captures */
  3137. fr -= rem; /* 'rem' items were popped from Lua stack */
  3138. res = resdyncaptures(L, fr, s - o, e - o); /* get result */
  3139. if (res == -1) /* fail? */
  3140. goto fail;
  3141. s = o + res; /* else update current position */
  3142. n = lua_gettop(L) - fr + 1; /* number of new captures */
  3143. ndyncap += n - rem; /* update number of dynamic captures */
  3144. if (n > 0) { /* any new capture? */
  3145. if ((captop += n + 2) >= capsize) {
  3146. capture = doublecap(L, capture, captop, ptop);
  3147. capsize = 2 * captop;
  3148. }
  3149. /* add new captures to 'capture' list */
  3150. adddyncaptures(s, capture + captop - n - 2, n, fr);
  3151. }
  3152. p++;
  3153. continue;
  3154. }
  3155. case ICloseCapture: {
  3156. const char *s1 = s;
  3157. assert(captop > 0);
  3158. /* if possible, turn capture into a full capture */
  3159. if (capture[captop - 1].siz == 0 &&
  3160. s1 - capture[captop - 1].s < UCHAR_MAX) {
  3161. capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
  3162. p++;
  3163. continue;
  3164. }
  3165. else {
  3166. capture[captop].siz = 1; /* mark entry as closed */
  3167. capture[captop].s = s;
  3168. goto pushcapture;
  3169. }
  3170. }
  3171. case IOpenCapture:
  3172. capture[captop].siz = 0; /* mark entry as open */
  3173. capture[captop].s = s;
  3174. goto pushcapture;
  3175. case IFullCapture:
  3176. capture[captop].siz = getoff(p) + 1; /* save capture size */
  3177. capture[captop].s = s - getoff(p);
  3178. /* goto pushcapture; */
  3179. pushcapture: {
  3180. capture[captop].idx = p->i.key;
  3181. capture[captop].kind = getkind(p);
  3182. if (++captop >= capsize) {
  3183. capture = doublecap(L, capture, captop, ptop);
  3184. capsize = 2 * captop;
  3185. }
  3186. p++;
  3187. continue;
  3188. }
  3189. default: assert(0); return NULL;
  3190. }
  3191. }
  3192. }
  3193. /* }====================================================== */