/regcomp.c

https://github.com/diabolo/ruby · C · 6314 lines · 5385 code · 864 blank · 65 comment · 1386 complexity · 7830f555aaa385edcde24e93f132e8e9 MD5 · raw file

Large files are truncated click here to view the full file

  1. /**********************************************************************
  2. regcomp.c - Oniguruma (regular expression library)
  3. **********************************************************************/
  4. /*-
  5. * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
  6. * All rights reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. * 1. Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27. * SUCH DAMAGE.
  28. */
  29. #include "regparse.h"
  30. OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
  31. extern OnigCaseFoldType
  32. onig_get_default_case_fold_flag(void)
  33. {
  34. return OnigDefaultCaseFoldFlag;
  35. }
  36. extern int
  37. onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
  38. {
  39. OnigDefaultCaseFoldFlag = case_fold_flag;
  40. return 0;
  41. }
  42. #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
  43. static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
  44. #endif
  45. static UChar*
  46. str_dup(UChar* s, UChar* end)
  47. {
  48. ptrdiff_t len = end - s;
  49. if (len > 0) {
  50. UChar* r = (UChar* )xmalloc(len + 1);
  51. CHECK_NULL_RETURN(r);
  52. xmemcpy(r, s, len);
  53. r[len] = (UChar )0;
  54. return r;
  55. }
  56. else return NULL;
  57. }
  58. static void
  59. swap_node(Node* a, Node* b)
  60. {
  61. Node c;
  62. c = *a; *a = *b; *b = c;
  63. if (NTYPE(a) == NT_STR) {
  64. StrNode* sn = NSTR(a);
  65. if (sn->capa == 0) {
  66. size_t len = sn->end - sn->s;
  67. sn->s = sn->buf;
  68. sn->end = sn->s + len;
  69. }
  70. }
  71. if (NTYPE(b) == NT_STR) {
  72. StrNode* sn = NSTR(b);
  73. if (sn->capa == 0) {
  74. size_t len = sn->end - sn->s;
  75. sn->s = sn->buf;
  76. sn->end = sn->s + len;
  77. }
  78. }
  79. }
  80. static OnigDistance
  81. distance_add(OnigDistance d1, OnigDistance d2)
  82. {
  83. if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
  84. return ONIG_INFINITE_DISTANCE;
  85. else {
  86. if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
  87. else return ONIG_INFINITE_DISTANCE;
  88. }
  89. }
  90. static OnigDistance
  91. distance_multiply(OnigDistance d, int m)
  92. {
  93. if (m == 0) return 0;
  94. if (d < ONIG_INFINITE_DISTANCE / m)
  95. return d * m;
  96. else
  97. return ONIG_INFINITE_DISTANCE;
  98. }
  99. static int
  100. bitset_is_empty(BitSetRef bs)
  101. {
  102. int i;
  103. for (i = 0; i < (int )BITSET_SIZE; i++) {
  104. if (bs[i] != 0) return 0;
  105. }
  106. return 1;
  107. }
  108. #ifdef ONIG_DEBUG
  109. static int
  110. bitset_on_num(BitSetRef bs)
  111. {
  112. int i, n;
  113. n = 0;
  114. for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
  115. if (BITSET_AT(bs, i)) n++;
  116. }
  117. return n;
  118. }
  119. #endif
  120. extern int
  121. onig_bbuf_init(BBuf* buf, int size)
  122. {
  123. if (size <= 0) {
  124. size = 0;
  125. buf->p = NULL;
  126. }
  127. else {
  128. buf->p = (UChar* )xmalloc(size);
  129. if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
  130. }
  131. buf->alloc = size;
  132. buf->used = 0;
  133. return 0;
  134. }
  135. #ifdef USE_SUBEXP_CALL
  136. static int
  137. unset_addr_list_init(UnsetAddrList* uslist, int size)
  138. {
  139. UnsetAddr* p;
  140. p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
  141. CHECK_NULL_RETURN_MEMERR(p);
  142. uslist->num = 0;
  143. uslist->alloc = size;
  144. uslist->us = p;
  145. return 0;
  146. }
  147. static void
  148. unset_addr_list_end(UnsetAddrList* uslist)
  149. {
  150. if (IS_NOT_NULL(uslist->us))
  151. xfree(uslist->us);
  152. }
  153. static int
  154. unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
  155. {
  156. UnsetAddr* p;
  157. int size;
  158. if (uslist->num >= uslist->alloc) {
  159. size = uslist->alloc * 2;
  160. p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
  161. CHECK_NULL_RETURN_MEMERR(p);
  162. uslist->alloc = size;
  163. uslist->us = p;
  164. }
  165. uslist->us[uslist->num].offset = offset;
  166. uslist->us[uslist->num].target = node;
  167. uslist->num++;
  168. return 0;
  169. }
  170. #endif /* USE_SUBEXP_CALL */
  171. static int
  172. add_opcode(regex_t* reg, int opcode)
  173. {
  174. BBUF_ADD1(reg, opcode);
  175. return 0;
  176. }
  177. #ifdef USE_COMBINATION_EXPLOSION_CHECK
  178. static int
  179. add_state_check_num(regex_t* reg, int num)
  180. {
  181. StateCheckNumType n = (StateCheckNumType )num;
  182. BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
  183. return 0;
  184. }
  185. #endif
  186. static int
  187. add_rel_addr(regex_t* reg, int addr)
  188. {
  189. RelAddrType ra = (RelAddrType )addr;
  190. BBUF_ADD(reg, &ra, SIZE_RELADDR);
  191. return 0;
  192. }
  193. static int
  194. add_abs_addr(regex_t* reg, int addr)
  195. {
  196. AbsAddrType ra = (AbsAddrType )addr;
  197. BBUF_ADD(reg, &ra, SIZE_ABSADDR);
  198. return 0;
  199. }
  200. static int
  201. add_length(regex_t* reg, int len)
  202. {
  203. LengthType l = (LengthType )len;
  204. BBUF_ADD(reg, &l, SIZE_LENGTH);
  205. return 0;
  206. }
  207. static int
  208. add_mem_num(regex_t* reg, int num)
  209. {
  210. MemNumType n = (MemNumType )num;
  211. BBUF_ADD(reg, &n, SIZE_MEMNUM);
  212. return 0;
  213. }
  214. static int
  215. add_pointer(regex_t* reg, void* addr)
  216. {
  217. PointerType ptr = (PointerType )addr;
  218. BBUF_ADD(reg, &ptr, SIZE_POINTER);
  219. return 0;
  220. }
  221. static int
  222. add_option(regex_t* reg, OnigOptionType option)
  223. {
  224. BBUF_ADD(reg, &option, SIZE_OPTION);
  225. return 0;
  226. }
  227. static int
  228. add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
  229. {
  230. int r;
  231. r = add_opcode(reg, opcode);
  232. if (r) return r;
  233. r = add_rel_addr(reg, addr);
  234. return r;
  235. }
  236. static int
  237. add_bytes(regex_t* reg, UChar* bytes, int len)
  238. {
  239. BBUF_ADD(reg, bytes, len);
  240. return 0;
  241. }
  242. static int
  243. add_bitset(regex_t* reg, BitSetRef bs)
  244. {
  245. BBUF_ADD(reg, bs, SIZE_BITSET);
  246. return 0;
  247. }
  248. static int
  249. add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
  250. {
  251. int r;
  252. r = add_opcode(reg, opcode);
  253. if (r) return r;
  254. r = add_option(reg, option);
  255. return r;
  256. }
  257. static int compile_length_tree(Node* node, regex_t* reg);
  258. static int compile_tree(Node* node, regex_t* reg);
  259. #define IS_NEED_STR_LEN_OP_EXACT(op) \
  260. ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
  261. (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
  262. static int
  263. select_str_opcode(int mb_len, int str_len, int ignore_case)
  264. {
  265. int op;
  266. if (ignore_case) {
  267. switch (str_len) {
  268. case 1: op = OP_EXACT1_IC; break;
  269. default: op = OP_EXACTN_IC; break;
  270. }
  271. }
  272. else {
  273. switch (mb_len) {
  274. case 1:
  275. switch (str_len) {
  276. case 1: op = OP_EXACT1; break;
  277. case 2: op = OP_EXACT2; break;
  278. case 3: op = OP_EXACT3; break;
  279. case 4: op = OP_EXACT4; break;
  280. case 5: op = OP_EXACT5; break;
  281. default: op = OP_EXACTN; break;
  282. }
  283. break;
  284. case 2:
  285. switch (str_len) {
  286. case 1: op = OP_EXACTMB2N1; break;
  287. case 2: op = OP_EXACTMB2N2; break;
  288. case 3: op = OP_EXACTMB2N3; break;
  289. default: op = OP_EXACTMB2N; break;
  290. }
  291. break;
  292. case 3:
  293. op = OP_EXACTMB3N;
  294. break;
  295. default:
  296. op = OP_EXACTMBN;
  297. break;
  298. }
  299. }
  300. return op;
  301. }
  302. static int
  303. compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
  304. {
  305. int r;
  306. int saved_num_null_check = reg->num_null_check;
  307. if (empty_info != 0) {
  308. r = add_opcode(reg, OP_NULL_CHECK_START);
  309. if (r) return r;
  310. r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
  311. if (r) return r;
  312. reg->num_null_check++;
  313. }
  314. r = compile_tree(node, reg);
  315. if (r) return r;
  316. if (empty_info != 0) {
  317. if (empty_info == NQ_TARGET_IS_EMPTY)
  318. r = add_opcode(reg, OP_NULL_CHECK_END);
  319. else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
  320. r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
  321. else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
  322. r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
  323. if (r) return r;
  324. r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
  325. }
  326. return r;
  327. }
  328. #ifdef USE_SUBEXP_CALL
  329. static int
  330. compile_call(CallNode* node, regex_t* reg)
  331. {
  332. int r;
  333. r = add_opcode(reg, OP_CALL);
  334. if (r) return r;
  335. r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
  336. node->target);
  337. if (r) return r;
  338. r = add_abs_addr(reg, 0 /*dummy addr.*/);
  339. return r;
  340. }
  341. #endif
  342. static int
  343. compile_tree_n_times(Node* node, int n, regex_t* reg)
  344. {
  345. int i, r;
  346. for (i = 0; i < n; i++) {
  347. r = compile_tree(node, reg);
  348. if (r) return r;
  349. }
  350. return 0;
  351. }
  352. static int
  353. add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
  354. regex_t* reg ARG_UNUSED, int ignore_case)
  355. {
  356. int len;
  357. int op = select_str_opcode(mb_len, str_len, ignore_case);
  358. len = SIZE_OPCODE;
  359. if (op == OP_EXACTMBN) len += SIZE_LENGTH;
  360. if (IS_NEED_STR_LEN_OP_EXACT(op))
  361. len += SIZE_LENGTH;
  362. len += mb_len * str_len;
  363. return len;
  364. }
  365. static int
  366. add_compile_string(UChar* s, int mb_len, int str_len,
  367. regex_t* reg, int ignore_case)
  368. {
  369. int op = select_str_opcode(mb_len, str_len, ignore_case);
  370. add_opcode(reg, op);
  371. if (op == OP_EXACTMBN)
  372. add_length(reg, mb_len);
  373. if (IS_NEED_STR_LEN_OP_EXACT(op)) {
  374. if (op == OP_EXACTN_IC)
  375. add_length(reg, mb_len * str_len);
  376. else
  377. add_length(reg, str_len);
  378. }
  379. add_bytes(reg, s, mb_len * str_len);
  380. return 0;
  381. }
  382. static int
  383. compile_length_string_node(Node* node, regex_t* reg)
  384. {
  385. int rlen, r, len, prev_len, slen, ambig;
  386. OnigEncoding enc = reg->enc;
  387. UChar *p, *prev;
  388. StrNode* sn;
  389. sn = NSTR(node);
  390. if (sn->end <= sn->s)
  391. return 0;
  392. ambig = NSTRING_IS_AMBIG(node);
  393. p = prev = sn->s;
  394. prev_len = enclen(enc, p, sn->end);
  395. p += prev_len;
  396. slen = 1;
  397. rlen = 0;
  398. for (; p < sn->end; ) {
  399. len = enclen(enc, p, sn->end);
  400. if (len == prev_len) {
  401. slen++;
  402. }
  403. else {
  404. r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
  405. rlen += r;
  406. prev = p;
  407. slen = 1;
  408. prev_len = len;
  409. }
  410. p += len;
  411. }
  412. r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
  413. rlen += r;
  414. return rlen;
  415. }
  416. static int
  417. compile_length_string_raw_node(StrNode* sn, regex_t* reg)
  418. {
  419. if (sn->end <= sn->s)
  420. return 0;
  421. return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
  422. }
  423. static int
  424. compile_string_node(Node* node, regex_t* reg)
  425. {
  426. int r, len, prev_len, slen, ambig;
  427. OnigEncoding enc = reg->enc;
  428. UChar *p, *prev, *end;
  429. StrNode* sn;
  430. sn = NSTR(node);
  431. if (sn->end <= sn->s)
  432. return 0;
  433. end = sn->end;
  434. ambig = NSTRING_IS_AMBIG(node);
  435. p = prev = sn->s;
  436. prev_len = enclen(enc, p, end);
  437. p += prev_len;
  438. slen = 1;
  439. for (; p < end; ) {
  440. len = enclen(enc, p, end);
  441. if (len == prev_len) {
  442. slen++;
  443. }
  444. else {
  445. r = add_compile_string(prev, prev_len, slen, reg, ambig);
  446. if (r) return r;
  447. prev = p;
  448. slen = 1;
  449. prev_len = len;
  450. }
  451. p += len;
  452. }
  453. return add_compile_string(prev, prev_len, slen, reg, ambig);
  454. }
  455. static int
  456. compile_string_raw_node(StrNode* sn, regex_t* reg)
  457. {
  458. if (sn->end <= sn->s)
  459. return 0;
  460. return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
  461. }
  462. static int
  463. add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
  464. {
  465. #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
  466. add_length(reg, mbuf->used);
  467. return add_bytes(reg, mbuf->p, mbuf->used);
  468. #else
  469. int r, pad_size;
  470. UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
  471. GET_ALIGNMENT_PAD_SIZE(p, pad_size);
  472. add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
  473. if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
  474. r = add_bytes(reg, mbuf->p, mbuf->used);
  475. /* padding for return value from compile_length_cclass_node() to be fix. */
  476. pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
  477. if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
  478. return r;
  479. #endif
  480. }
  481. static int
  482. compile_length_cclass_node(CClassNode* cc, regex_t* reg)
  483. {
  484. int len;
  485. if (IS_NCCLASS_SHARE(cc)) {
  486. len = SIZE_OPCODE + SIZE_POINTER;
  487. return len;
  488. }
  489. if (IS_NULL(cc->mbuf)) {
  490. len = SIZE_OPCODE + SIZE_BITSET;
  491. }
  492. else {
  493. if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
  494. len = SIZE_OPCODE;
  495. }
  496. else {
  497. len = SIZE_OPCODE + SIZE_BITSET;
  498. }
  499. #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
  500. len += SIZE_LENGTH + cc->mbuf->used;
  501. #else
  502. len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
  503. #endif
  504. }
  505. return len;
  506. }
  507. static int
  508. compile_cclass_node(CClassNode* cc, regex_t* reg)
  509. {
  510. int r;
  511. if (IS_NCCLASS_SHARE(cc)) {
  512. add_opcode(reg, OP_CCLASS_NODE);
  513. r = add_pointer(reg, cc);
  514. return r;
  515. }
  516. if (IS_NULL(cc->mbuf)) {
  517. if (IS_NCCLASS_NOT(cc))
  518. add_opcode(reg, OP_CCLASS_NOT);
  519. else
  520. add_opcode(reg, OP_CCLASS);
  521. r = add_bitset(reg, cc->bs);
  522. }
  523. else {
  524. if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
  525. if (IS_NCCLASS_NOT(cc))
  526. add_opcode(reg, OP_CCLASS_MB_NOT);
  527. else
  528. add_opcode(reg, OP_CCLASS_MB);
  529. r = add_multi_byte_cclass(cc->mbuf, reg);
  530. }
  531. else {
  532. if (IS_NCCLASS_NOT(cc))
  533. add_opcode(reg, OP_CCLASS_MIX_NOT);
  534. else
  535. add_opcode(reg, OP_CCLASS_MIX);
  536. r = add_bitset(reg, cc->bs);
  537. if (r) return r;
  538. r = add_multi_byte_cclass(cc->mbuf, reg);
  539. }
  540. }
  541. return r;
  542. }
  543. static int
  544. entry_repeat_range(regex_t* reg, int id, int lower, int upper)
  545. {
  546. #define REPEAT_RANGE_ALLOC 4
  547. OnigRepeatRange* p;
  548. if (reg->repeat_range_alloc == 0) {
  549. p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
  550. CHECK_NULL_RETURN_MEMERR(p);
  551. reg->repeat_range = p;
  552. reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
  553. }
  554. else if (reg->repeat_range_alloc <= id) {
  555. int n;
  556. n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
  557. p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
  558. sizeof(OnigRepeatRange) * n);
  559. CHECK_NULL_RETURN_MEMERR(p);
  560. reg->repeat_range = p;
  561. reg->repeat_range_alloc = n;
  562. }
  563. else {
  564. p = reg->repeat_range;
  565. }
  566. p[id].lower = lower;
  567. p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
  568. return 0;
  569. }
  570. static int
  571. compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
  572. regex_t* reg)
  573. {
  574. int r;
  575. int num_repeat = reg->num_repeat;
  576. r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
  577. if (r) return r;
  578. r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
  579. reg->num_repeat++;
  580. if (r) return r;
  581. r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
  582. if (r) return r;
  583. r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
  584. if (r) return r;
  585. r = compile_tree_empty_check(qn->target, reg, empty_info);
  586. if (r) return r;
  587. if (
  588. #ifdef USE_SUBEXP_CALL
  589. reg->num_call > 0 ||
  590. #endif
  591. IS_QUANTIFIER_IN_REPEAT(qn)) {
  592. r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
  593. }
  594. else {
  595. r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
  596. }
  597. if (r) return r;
  598. r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
  599. return r;
  600. }
  601. static int
  602. is_anychar_star_quantifier(QtfrNode* qn)
  603. {
  604. if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
  605. NTYPE(qn->target) == NT_CANY)
  606. return 1;
  607. else
  608. return 0;
  609. }
  610. #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
  611. #define CKN_ON (ckn > 0)
  612. #ifdef USE_COMBINATION_EXPLOSION_CHECK
  613. static int
  614. compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
  615. {
  616. int len, mod_tlen, cklen;
  617. int ckn;
  618. int infinite = IS_REPEAT_INFINITE(qn->upper);
  619. int empty_info = qn->target_empty_info;
  620. int tlen = compile_length_tree(qn->target, reg);
  621. if (tlen < 0) return tlen;
  622. ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
  623. cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
  624. /* anychar repeat */
  625. if (NTYPE(qn->target) == NT_CANY) {
  626. if (qn->greedy && infinite) {
  627. if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
  628. return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
  629. else
  630. return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
  631. }
  632. }
  633. if (empty_info != 0)
  634. mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
  635. else
  636. mod_tlen = tlen;
  637. if (infinite && qn->lower <= 1) {
  638. if (qn->greedy) {
  639. if (qn->lower == 1)
  640. len = SIZE_OP_JUMP;
  641. else
  642. len = 0;
  643. len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
  644. }
  645. else {
  646. if (qn->lower == 0)
  647. len = SIZE_OP_JUMP;
  648. else
  649. len = 0;
  650. len += mod_tlen + SIZE_OP_PUSH + cklen;
  651. }
  652. }
  653. else if (qn->upper == 0) {
  654. if (qn->is_refered != 0) /* /(?<n>..){0}/ */
  655. len = SIZE_OP_JUMP + tlen;
  656. else
  657. len = 0;
  658. }
  659. else if (qn->upper == 1 && qn->greedy) {
  660. if (qn->lower == 0) {
  661. if (CKN_ON) {
  662. len = SIZE_OP_STATE_CHECK_PUSH + tlen;
  663. }
  664. else {
  665. len = SIZE_OP_PUSH + tlen;
  666. }
  667. }
  668. else {
  669. len = tlen;
  670. }
  671. }
  672. else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
  673. len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
  674. }
  675. else {
  676. len = SIZE_OP_REPEAT_INC
  677. + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
  678. if (CKN_ON)
  679. len += SIZE_OP_STATE_CHECK;
  680. }
  681. return len;
  682. }
  683. static int
  684. compile_quantifier_node(QtfrNode* qn, regex_t* reg)
  685. {
  686. int r, mod_tlen;
  687. int ckn;
  688. int infinite = IS_REPEAT_INFINITE(qn->upper);
  689. int empty_info = qn->target_empty_info;
  690. int tlen = compile_length_tree(qn->target, reg);
  691. if (tlen < 0) return tlen;
  692. ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
  693. if (is_anychar_star_quantifier(qn)) {
  694. r = compile_tree_n_times(qn->target, qn->lower, reg);
  695. if (r) return r;
  696. if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
  697. if (IS_MULTILINE(reg->options))
  698. r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
  699. else
  700. r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
  701. if (r) return r;
  702. if (CKN_ON) {
  703. r = add_state_check_num(reg, ckn);
  704. if (r) return r;
  705. }
  706. return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
  707. }
  708. else {
  709. if (IS_MULTILINE(reg->options)) {
  710. r = add_opcode(reg, (CKN_ON ?
  711. OP_STATE_CHECK_ANYCHAR_ML_STAR
  712. : OP_ANYCHAR_ML_STAR));
  713. }
  714. else {
  715. r = add_opcode(reg, (CKN_ON ?
  716. OP_STATE_CHECK_ANYCHAR_STAR
  717. : OP_ANYCHAR_STAR));
  718. }
  719. if (r) return r;
  720. if (CKN_ON)
  721. r = add_state_check_num(reg, ckn);
  722. return r;
  723. }
  724. }
  725. if (empty_info != 0)
  726. mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
  727. else
  728. mod_tlen = tlen;
  729. if (infinite && qn->lower <= 1) {
  730. if (qn->greedy) {
  731. if (qn->lower == 1) {
  732. r = add_opcode_rel_addr(reg, OP_JUMP,
  733. (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
  734. if (r) return r;
  735. }
  736. if (CKN_ON) {
  737. r = add_opcode(reg, OP_STATE_CHECK_PUSH);
  738. if (r) return r;
  739. r = add_state_check_num(reg, ckn);
  740. if (r) return r;
  741. r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
  742. }
  743. else {
  744. r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
  745. }
  746. if (r) return r;
  747. r = compile_tree_empty_check(qn->target, reg, empty_info);
  748. if (r) return r;
  749. r = add_opcode_rel_addr(reg, OP_JUMP,
  750. -(mod_tlen + (int )SIZE_OP_JUMP
  751. + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
  752. }
  753. else {
  754. if (qn->lower == 0) {
  755. r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
  756. if (r) return r;
  757. }
  758. r = compile_tree_empty_check(qn->target, reg, empty_info);
  759. if (r) return r;
  760. if (CKN_ON) {
  761. r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
  762. if (r) return r;
  763. r = add_state_check_num(reg, ckn);
  764. if (r) return r;
  765. r = add_rel_addr(reg,
  766. -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
  767. }
  768. else
  769. r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
  770. }
  771. }
  772. else if (qn->upper == 0) {
  773. if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
  774. r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
  775. if (r) return r;
  776. r = compile_tree(qn->target, reg);
  777. }
  778. else
  779. r = 0;
  780. }
  781. else if (qn->upper == 1 && qn->greedy) {
  782. if (qn->lower == 0) {
  783. if (CKN_ON) {
  784. r = add_opcode(reg, OP_STATE_CHECK_PUSH);
  785. if (r) return r;
  786. r = add_state_check_num(reg, ckn);
  787. if (r) return r;
  788. r = add_rel_addr(reg, tlen);
  789. }
  790. else {
  791. r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
  792. }
  793. if (r) return r;
  794. }
  795. r = compile_tree(qn->target, reg);
  796. }
  797. else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
  798. if (CKN_ON) {
  799. r = add_opcode(reg, OP_STATE_CHECK_PUSH);
  800. if (r) return r;
  801. r = add_state_check_num(reg, ckn);
  802. if (r) return r;
  803. r = add_rel_addr(reg, SIZE_OP_JUMP);
  804. }
  805. else {
  806. r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
  807. }
  808. if (r) return r;
  809. r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
  810. if (r) return r;
  811. r = compile_tree(qn->target, reg);
  812. }
  813. else {
  814. r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
  815. if (CKN_ON) {
  816. if (r) return r;
  817. r = add_opcode(reg, OP_STATE_CHECK);
  818. if (r) return r;
  819. r = add_state_check_num(reg, ckn);
  820. }
  821. }
  822. return r;
  823. }
  824. #else /* USE_COMBINATION_EXPLOSION_CHECK */
  825. static int
  826. compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
  827. {
  828. int len, mod_tlen;
  829. int infinite = IS_REPEAT_INFINITE(qn->upper);
  830. int empty_info = qn->target_empty_info;
  831. int tlen = compile_length_tree(qn->target, reg);
  832. if (tlen < 0) return tlen;
  833. /* anychar repeat */
  834. if (NTYPE(qn->target) == NT_CANY) {
  835. if (qn->greedy && infinite) {
  836. if (IS_NOT_NULL(qn->next_head_exact))
  837. return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
  838. else
  839. return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
  840. }
  841. }
  842. if (empty_info != 0)
  843. mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
  844. else
  845. mod_tlen = tlen;
  846. if (infinite &&
  847. (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
  848. if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
  849. len = SIZE_OP_JUMP;
  850. }
  851. else {
  852. len = tlen * qn->lower;
  853. }
  854. if (qn->greedy) {
  855. if (IS_NOT_NULL(qn->head_exact))
  856. len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
  857. else if (IS_NOT_NULL(qn->next_head_exact))
  858. len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
  859. else
  860. len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
  861. }
  862. else
  863. len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
  864. }
  865. else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
  866. len = SIZE_OP_JUMP + tlen;
  867. }
  868. else if (!infinite && qn->greedy &&
  869. (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
  870. <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
  871. len = tlen * qn->lower;
  872. len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
  873. }
  874. else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
  875. len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
  876. }
  877. else {
  878. len = SIZE_OP_REPEAT_INC
  879. + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
  880. }
  881. return len;
  882. }
  883. static int
  884. compile_quantifier_node(QtfrNode* qn, regex_t* reg)
  885. {
  886. int i, r, mod_tlen;
  887. int infinite = IS_REPEAT_INFINITE(qn->upper);
  888. int empty_info = qn->target_empty_info;
  889. int tlen = compile_length_tree(qn->target, reg);
  890. if (tlen < 0) return tlen;
  891. if (is_anychar_star_quantifier(qn)) {
  892. r = compile_tree_n_times(qn->target, qn->lower, reg);
  893. if (r) return r;
  894. if (IS_NOT_NULL(qn->next_head_exact)) {
  895. if (IS_MULTILINE(reg->options))
  896. r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
  897. else
  898. r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
  899. if (r) return r;
  900. return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
  901. }
  902. else {
  903. if (IS_MULTILINE(reg->options))
  904. return add_opcode(reg, OP_ANYCHAR_ML_STAR);
  905. else
  906. return add_opcode(reg, OP_ANYCHAR_STAR);
  907. }
  908. }
  909. if (empty_info != 0)
  910. mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
  911. else
  912. mod_tlen = tlen;
  913. if (infinite &&
  914. (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
  915. if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
  916. if (qn->greedy) {
  917. if (IS_NOT_NULL(qn->head_exact))
  918. r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
  919. else if (IS_NOT_NULL(qn->next_head_exact))
  920. r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
  921. else
  922. r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
  923. }
  924. else {
  925. r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
  926. }
  927. if (r) return r;
  928. }
  929. else {
  930. r = compile_tree_n_times(qn->target, qn->lower, reg);
  931. if (r) return r;
  932. }
  933. if (qn->greedy) {
  934. if (IS_NOT_NULL(qn->head_exact)) {
  935. r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
  936. mod_tlen + SIZE_OP_JUMP);
  937. if (r) return r;
  938. add_bytes(reg, NSTR(qn->head_exact)->s, 1);
  939. r = compile_tree_empty_check(qn->target, reg, empty_info);
  940. if (r) return r;
  941. r = add_opcode_rel_addr(reg, OP_JUMP,
  942. -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
  943. }
  944. else if (IS_NOT_NULL(qn->next_head_exact)) {
  945. r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
  946. mod_tlen + SIZE_OP_JUMP);
  947. if (r) return r;
  948. add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
  949. r = compile_tree_empty_check(qn->target, reg, empty_info);
  950. if (r) return r;
  951. r = add_opcode_rel_addr(reg, OP_JUMP,
  952. -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
  953. }
  954. else {
  955. r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
  956. if (r) return r;
  957. r = compile_tree_empty_check(qn->target, reg, empty_info);
  958. if (r) return r;
  959. r = add_opcode_rel_addr(reg, OP_JUMP,
  960. -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
  961. }
  962. }
  963. else {
  964. r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
  965. if (r) return r;
  966. r = compile_tree_empty_check(qn->target, reg, empty_info);
  967. if (r) return r;
  968. r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
  969. }
  970. }
  971. else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
  972. r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
  973. if (r) return r;
  974. r = compile_tree(qn->target, reg);
  975. }
  976. else if (!infinite && qn->greedy &&
  977. (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
  978. <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
  979. int n = qn->upper - qn->lower;
  980. r = compile_tree_n_times(qn->target, qn->lower, reg);
  981. if (r) return r;
  982. for (i = 0; i < n; i++) {
  983. r = add_opcode_rel_addr(reg, OP_PUSH,
  984. (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
  985. if (r) return r;
  986. r = compile_tree(qn->target, reg);
  987. if (r) return r;
  988. }
  989. }
  990. else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
  991. r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
  992. if (r) return r;
  993. r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
  994. if (r) return r;
  995. r = compile_tree(qn->target, reg);
  996. }
  997. else {
  998. r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
  999. }
  1000. return r;
  1001. }
  1002. #endif /* USE_COMBINATION_EXPLOSION_CHECK */
  1003. static int
  1004. compile_length_option_node(EncloseNode* node, regex_t* reg)
  1005. {
  1006. int tlen;
  1007. OnigOptionType prev = reg->options;
  1008. reg->options = node->option;
  1009. tlen = compile_length_tree(node->target, reg);
  1010. reg->options = prev;
  1011. if (tlen < 0) return tlen;
  1012. if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
  1013. return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
  1014. + tlen + SIZE_OP_SET_OPTION;
  1015. }
  1016. else
  1017. return tlen;
  1018. }
  1019. static int
  1020. compile_option_node(EncloseNode* node, regex_t* reg)
  1021. {
  1022. int r;
  1023. OnigOptionType prev = reg->options;
  1024. if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
  1025. r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
  1026. if (r) return r;
  1027. r = add_opcode_option(reg, OP_SET_OPTION, prev);
  1028. if (r) return r;
  1029. r = add_opcode(reg, OP_FAIL);
  1030. if (r) return r;
  1031. }
  1032. reg->options = node->option;
  1033. r = compile_tree(node->target, reg);
  1034. reg->options = prev;
  1035. if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
  1036. if (r) return r;
  1037. r = add_opcode_option(reg, OP_SET_OPTION, prev);
  1038. }
  1039. return r;
  1040. }
  1041. static int
  1042. compile_length_enclose_node(EncloseNode* node, regex_t* reg)
  1043. {
  1044. int len;
  1045. int tlen;
  1046. if (node->type == ENCLOSE_OPTION)
  1047. return compile_length_option_node(node, reg);
  1048. if (node->target) {
  1049. tlen = compile_length_tree(node->target, reg);
  1050. if (tlen < 0) return tlen;
  1051. }
  1052. else
  1053. tlen = 0;
  1054. switch (node->type) {
  1055. case ENCLOSE_MEMORY:
  1056. #ifdef USE_SUBEXP_CALL
  1057. if (IS_ENCLOSE_CALLED(node)) {
  1058. len = SIZE_OP_MEMORY_START_PUSH + tlen
  1059. + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
  1060. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1061. len += (IS_ENCLOSE_RECURSION(node)
  1062. ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
  1063. else
  1064. len += (IS_ENCLOSE_RECURSION(node)
  1065. ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
  1066. }
  1067. else
  1068. #endif
  1069. {
  1070. if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
  1071. len = SIZE_OP_MEMORY_START_PUSH;
  1072. else
  1073. len = SIZE_OP_MEMORY_START;
  1074. len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
  1075. ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
  1076. }
  1077. break;
  1078. case ENCLOSE_STOP_BACKTRACK:
  1079. if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
  1080. QtfrNode* qn = NQTFR(node->target);
  1081. tlen = compile_length_tree(qn->target, reg);
  1082. if (tlen < 0) return tlen;
  1083. len = tlen * qn->lower
  1084. + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
  1085. }
  1086. else {
  1087. len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
  1088. }
  1089. break;
  1090. default:
  1091. return ONIGERR_TYPE_BUG;
  1092. break;
  1093. }
  1094. return len;
  1095. }
  1096. static int get_char_length_tree(Node* node, regex_t* reg, int* len);
  1097. static int
  1098. compile_enclose_node(EncloseNode* node, regex_t* reg)
  1099. {
  1100. int r, len;
  1101. if (node->type == ENCLOSE_OPTION)
  1102. return compile_option_node(node, reg);
  1103. switch (node->type) {
  1104. case ENCLOSE_MEMORY:
  1105. #ifdef USE_SUBEXP_CALL
  1106. if (IS_ENCLOSE_CALLED(node)) {
  1107. r = add_opcode(reg, OP_CALL);
  1108. if (r) return r;
  1109. node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
  1110. node->state |= NST_ADDR_FIXED;
  1111. r = add_abs_addr(reg, (int )node->call_addr);
  1112. if (r) return r;
  1113. len = compile_length_tree(node->target, reg);
  1114. len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
  1115. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1116. len += (IS_ENCLOSE_RECURSION(node)
  1117. ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
  1118. else
  1119. len += (IS_ENCLOSE_RECURSION(node)
  1120. ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
  1121. r = add_opcode_rel_addr(reg, OP_JUMP, len);
  1122. if (r) return r;
  1123. }
  1124. #endif
  1125. if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
  1126. r = add_opcode(reg, OP_MEMORY_START_PUSH);
  1127. else
  1128. r = add_opcode(reg, OP_MEMORY_START);
  1129. if (r) return r;
  1130. r = add_mem_num(reg, node->regnum);
  1131. if (r) return r;
  1132. r = compile_tree(node->target, reg);
  1133. if (r) return r;
  1134. #ifdef USE_SUBEXP_CALL
  1135. if (IS_ENCLOSE_CALLED(node)) {
  1136. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1137. r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
  1138. ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
  1139. else
  1140. r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
  1141. ? OP_MEMORY_END_REC : OP_MEMORY_END));
  1142. if (r) return r;
  1143. r = add_mem_num(reg, node->regnum);
  1144. if (r) return r;
  1145. r = add_opcode(reg, OP_RETURN);
  1146. }
  1147. else
  1148. #endif
  1149. {
  1150. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1151. r = add_opcode(reg, OP_MEMORY_END_PUSH);
  1152. else
  1153. r = add_opcode(reg, OP_MEMORY_END);
  1154. if (r) return r;
  1155. r = add_mem_num(reg, node->regnum);
  1156. }
  1157. break;
  1158. case ENCLOSE_STOP_BACKTRACK:
  1159. if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
  1160. QtfrNode* qn = NQTFR(node->target);
  1161. r = compile_tree_n_times(qn->target, qn->lower, reg);
  1162. if (r) return r;
  1163. len = compile_length_tree(qn->target, reg);
  1164. if (len < 0) return len;
  1165. r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
  1166. if (r) return r;
  1167. r = compile_tree(qn->target, reg);
  1168. if (r) return r;
  1169. r = add_opcode(reg, OP_POP);
  1170. if (r) return r;
  1171. r = add_opcode_rel_addr(reg, OP_JUMP,
  1172. -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
  1173. }
  1174. else {
  1175. r = add_opcode(reg, OP_PUSH_STOP_BT);
  1176. if (r) return r;
  1177. r = compile_tree(node->target, reg);
  1178. if (r) return r;
  1179. r = add_opcode(reg, OP_POP_STOP_BT);
  1180. }
  1181. break;
  1182. default:
  1183. return ONIGERR_TYPE_BUG;
  1184. break;
  1185. }
  1186. return r;
  1187. }
  1188. static int
  1189. compile_length_anchor_node(AnchorNode* node, regex_t* reg)
  1190. {
  1191. int len;
  1192. int tlen = 0;
  1193. if (node->target) {
  1194. tlen = compile_length_tree(node->target, reg);
  1195. if (tlen < 0) return tlen;
  1196. }
  1197. switch (node->type) {
  1198. case ANCHOR_PREC_READ:
  1199. len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
  1200. break;
  1201. case ANCHOR_PREC_READ_NOT:
  1202. len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
  1203. break;
  1204. case ANCHOR_LOOK_BEHIND:
  1205. len = SIZE_OP_LOOK_BEHIND + tlen;
  1206. break;
  1207. case ANCHOR_LOOK_BEHIND_NOT:
  1208. len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
  1209. break;
  1210. default:
  1211. len = SIZE_OPCODE;
  1212. break;
  1213. }
  1214. return len;
  1215. }
  1216. static int
  1217. compile_anchor_node(AnchorNode* node, regex_t* reg)
  1218. {
  1219. int r, len;
  1220. switch (node->type) {
  1221. case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
  1222. case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
  1223. case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
  1224. case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
  1225. case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
  1226. case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
  1227. case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
  1228. case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
  1229. #ifdef USE_WORD_BEGIN_END
  1230. case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
  1231. case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
  1232. #endif
  1233. case ANCHOR_PREC_READ:
  1234. r = add_opcode(reg, OP_PUSH_POS);
  1235. if (r) return r;
  1236. r = compile_tree(node->target, reg);
  1237. if (r) return r;
  1238. r = add_opcode(reg, OP_POP_POS);
  1239. break;
  1240. case ANCHOR_PREC_READ_NOT:
  1241. len = compile_length_tree(node->target, reg);
  1242. if (len < 0) return len;
  1243. r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
  1244. if (r) return r;
  1245. r = compile_tree(node->target, reg);
  1246. if (r) return r;
  1247. r = add_opcode(reg, OP_FAIL_POS);
  1248. break;
  1249. case ANCHOR_LOOK_BEHIND:
  1250. {
  1251. int n;
  1252. r = add_opcode(reg, OP_LOOK_BEHIND);
  1253. if (r) return r;
  1254. if (node->char_len < 0) {
  1255. r = get_char_length_tree(node->target, reg, &n);
  1256. if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
  1257. }
  1258. else
  1259. n = node->char_len;
  1260. r = add_length(reg, n);
  1261. if (r) return r;
  1262. r = compile_tree(node->target, reg);
  1263. }
  1264. break;
  1265. case ANCHOR_LOOK_BEHIND_NOT:
  1266. {
  1267. int n;
  1268. len = compile_length_tree(node->target, reg);
  1269. r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
  1270. len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
  1271. if (r) return r;
  1272. if (node->char_len < 0) {
  1273. r = get_char_length_tree(node->target, reg, &n);
  1274. if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
  1275. }
  1276. else
  1277. n = node->char_len;
  1278. r = add_length(reg, n);
  1279. if (r) return r;
  1280. r = compile_tree(node->target, reg);
  1281. if (r) return r;
  1282. r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
  1283. }
  1284. break;
  1285. default:
  1286. return ONIGERR_TYPE_BUG;
  1287. break;
  1288. }
  1289. return r;
  1290. }
  1291. static int
  1292. compile_length_tree(Node* node, regex_t* reg)
  1293. {
  1294. int len, type, r;
  1295. type = NTYPE(node);
  1296. switch (type) {
  1297. case NT_LIST:
  1298. len = 0;
  1299. do {
  1300. r = compile_length_tree(NCAR(node), reg);
  1301. if (r < 0) return r;
  1302. len += r;
  1303. } while (IS_NOT_NULL(node = NCDR(node)));
  1304. r = len;
  1305. break;
  1306. case NT_ALT:
  1307. {
  1308. int n;
  1309. n = r = 0;
  1310. do {
  1311. r += compile_length_tree(NCAR(node), reg);
  1312. n++;
  1313. } while (IS_NOT_NULL(node = NCDR(node)));
  1314. r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
  1315. }
  1316. break;
  1317. case NT_STR:
  1318. if (NSTRING_IS_RAW(node))
  1319. r = compile_length_string_raw_node(NSTR(node), reg);
  1320. else
  1321. r = compile_length_string_node(node, reg);
  1322. break;
  1323. case NT_CCLASS:
  1324. r = compile_length_cclass_node(NCCLASS(node), reg);
  1325. break;
  1326. case NT_CTYPE:
  1327. case NT_CANY:
  1328. r = SIZE_OPCODE;
  1329. break;
  1330. case NT_BREF:
  1331. {
  1332. BRefNode* br = NBREF(node);
  1333. #ifdef USE_BACKREF_WITH_LEVEL
  1334. if (IS_BACKREF_NEST_LEVEL(br)) {
  1335. r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
  1336. SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
  1337. }
  1338. else
  1339. #endif
  1340. if (br->back_num == 1) {
  1341. r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
  1342. ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
  1343. }
  1344. else {
  1345. r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
  1346. }
  1347. }
  1348. break;
  1349. #ifdef USE_SUBEXP_CALL
  1350. case NT_CALL:
  1351. r = SIZE_OP_CALL;
  1352. break;
  1353. #endif
  1354. case NT_QTFR:
  1355. r = compile_length_quantifier_node(NQTFR(node), reg);
  1356. break;
  1357. case NT_ENCLOSE:
  1358. r = compile_length_enclose_node(NENCLOSE(node), reg);
  1359. break;
  1360. case NT_ANCHOR:
  1361. r = compile_length_anchor_node(NANCHOR(node), reg);
  1362. break;
  1363. default:
  1364. return ONIGERR_TYPE_BUG;
  1365. break;
  1366. }
  1367. return r;
  1368. }
  1369. static int
  1370. compile_tree(Node* node, regex_t* reg)
  1371. {
  1372. int n, type, len, pos, r = 0;
  1373. type = NTYPE(node);
  1374. switch (type) {
  1375. case NT_LIST:
  1376. do {
  1377. r = compile_tree(NCAR(node), reg);
  1378. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1379. break;
  1380. case NT_ALT:
  1381. {
  1382. Node* x = node;
  1383. len = 0;
  1384. do {
  1385. len += compile_length_tree(NCAR(x), reg);
  1386. if (NCDR(x) != NULL) {
  1387. len += SIZE_OP_PUSH + SIZE_OP_JUMP;
  1388. }
  1389. } while (IS_NOT_NULL(x = NCDR(x)));
  1390. pos = reg->used + len; /* goal position */
  1391. do {
  1392. len = compile_length_tree(NCAR(node), reg);
  1393. if (IS_NOT_NULL(NCDR(node))) {
  1394. r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
  1395. if (r) break;
  1396. }
  1397. r = compile_tree(NCAR(node), reg);
  1398. if (r) break;
  1399. if (IS_NOT_NULL(NCDR(node))) {
  1400. len = pos - (reg->used + SIZE_OP_JUMP);
  1401. r = add_opcode_rel_addr(reg, OP_JUMP, len);
  1402. if (r) break;
  1403. }
  1404. } while (IS_NOT_NULL(node = NCDR(node)));
  1405. }
  1406. break;
  1407. case NT_STR:
  1408. if (NSTRING_IS_RAW(node))
  1409. r = compile_string_raw_node(NSTR(node), reg);
  1410. else
  1411. r = compile_string_node(node, reg);
  1412. break;
  1413. case NT_CCLASS:
  1414. r = compile_cclass_node(NCCLASS(node), reg);
  1415. break;
  1416. case NT_CTYPE:
  1417. {
  1418. int op;
  1419. switch (NCTYPE(node)->ctype) {
  1420. case ONIGENC_CTYPE_WORD:
  1421. if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
  1422. else op = OP_WORD;
  1423. break;
  1424. default:
  1425. return ONIGERR_TYPE_BUG;
  1426. break;
  1427. }
  1428. r = add_opcode(reg, op);
  1429. }
  1430. break;
  1431. case NT_CANY:
  1432. if (IS_MULTILINE(reg->options))
  1433. r = add_opcode(reg, OP_ANYCHAR_ML);
  1434. else
  1435. r = add_opcode(reg, OP_ANYCHAR);
  1436. break;
  1437. case NT_BREF:
  1438. {
  1439. BRefNode* br = NBREF(node);
  1440. #ifdef USE_BACKREF_WITH_LEVEL
  1441. if (IS_BACKREF_NEST_LEVEL(br)) {
  1442. r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
  1443. if (r) return r;
  1444. r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
  1445. if (r) return r;
  1446. r = add_length(reg, br->nest_level);
  1447. if (r) return r;
  1448. goto add_bacref_mems;
  1449. }
  1450. else
  1451. #endif
  1452. if (br->back_num == 1) {
  1453. n = br->back_static[0];
  1454. if (IS_IGNORECASE(reg->options)) {
  1455. r = add_opcode(reg, OP_BACKREFN_IC);
  1456. if (r) return r;
  1457. r = add_mem_num(reg, n);
  1458. }
  1459. else {
  1460. switch (n) {
  1461. case 1: r = add_opcode(reg, OP_BACKREF1); break;
  1462. case 2: r = add_opcode(reg, OP_BACKREF2); break;
  1463. default:
  1464. r = add_opcode(reg, OP_BACKREFN);
  1465. if (r) return r;
  1466. r = add_mem_num(reg, n);
  1467. break;
  1468. }
  1469. }
  1470. }
  1471. else {
  1472. int i;
  1473. int* p;
  1474. if (IS_IGNORECASE(reg->options)) {
  1475. r = add_opcode(reg, OP_BACKREF_MULTI_IC);
  1476. }
  1477. else {
  1478. r = add_opcode(reg, OP_BACKREF_MULTI);
  1479. }
  1480. if (r) return r;
  1481. #ifdef USE_BACKREF_WITH_LEVEL
  1482. add_bacref_mems:
  1483. #endif
  1484. r = add_length(reg, br->back_num);
  1485. if (r) return r;
  1486. p = BACKREFS_P(br);
  1487. for (i = br->back_num - 1; i >= 0; i--) {
  1488. r = add_mem_num(reg, p[i]);
  1489. if (r) return r;
  1490. }
  1491. }
  1492. }
  1493. break;
  1494. #ifdef USE_SUBEXP_CALL
  1495. case NT_CALL:
  1496. r = compile_call(NCALL(node), reg);
  1497. break;
  1498. #endif
  1499. case NT_QTFR:
  1500. r = compile_quantifier_node(NQTFR(node), reg);
  1501. break;
  1502. case NT_ENCLOSE:
  1503. r = compile_enclose_node(NENCLOSE(node), reg);
  1504. break;
  1505. case NT_ANCHOR:
  1506. r = compile_anchor_node(NANCHOR(node), reg);
  1507. break;
  1508. default:
  1509. #ifdef ONIG_DEBUG
  1510. fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
  1511. #endif
  1512. break;
  1513. }
  1514. return r;
  1515. }
  1516. #ifdef USE_NAMED_GROUP
  1517. static int
  1518. noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
  1519. {
  1520. int r = 0;
  1521. Node* node = *plink;
  1522. switch (NTYPE(node)) {
  1523. case NT_LIST:
  1524. case NT_ALT:
  1525. do {
  1526. r = noname_disable_map(&(NCAR(node)), map, counter);
  1527. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1528. break;
  1529. case NT_QTFR:
  1530. {
  1531. Node** ptarget = &(NQTFR(node)->target);
  1532. Node* old = *ptarget;
  1533. r = noname_disable_map(ptarget, map, counter);
  1534. if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
  1535. onig_reduce_nested_quantifier(node, *ptarget);
  1536. }
  1537. }
  1538. break;
  1539. case NT_ENCLOSE:
  1540. {
  1541. EncloseNode* en = NENCLOSE(node);
  1542. if (en->type == ENCLOSE_MEMORY) {
  1543. if (IS_ENCLOSE_NAMED_GROUP(en)) {
  1544. (*counter)++;
  1545. map[en->regnum].new_val = *counter;
  1546. en->regnum = *counter;
  1547. r = noname_disable_map(&(en->target), map, counter);
  1548. }
  1549. else {
  1550. *plink = en->target;
  1551. en->target = NULL_NODE;
  1552. onig_node_free(node);
  1553. r = noname_disable_map(plink, map, counter);
  1554. }
  1555. }
  1556. else
  1557. r = noname_disable_map(&(en->target), map, counter);
  1558. }
  1559. break;
  1560. case NT_ANCHOR:
  1561. {
  1562. AnchorNode* an = NANCHOR(node);
  1563. switch (an->type) {
  1564. case ANCHOR_PREC_READ:
  1565. case ANCHOR_PREC_READ_NOT:
  1566. case ANCHOR_LOOK_BEHIND:
  1567. case ANCHOR_LOOK_BEHIND_NOT:
  1568. r = noname_disable_map(&(an->target), map, counter);
  1569. break;
  1570. }
  1571. }
  1572. break;
  1573. default:
  1574. break;
  1575. }
  1576. return r;
  1577. }
  1578. static int
  1579. renumber_node_backref(Node* node, GroupNumRemap* map)
  1580. {
  1581. int i, pos, n, old_num;
  1582. int *backs;
  1583. BRefNode* bn = NBREF(node);
  1584. if (! IS_BACKREF_NAME_REF(bn))
  1585. return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
  1586. old_num = bn->back_num;
  1587. if (IS_NULL(bn->back_dynamic))
  1588. backs = bn->back_static;
  1589. else
  1590. backs = bn->back_dynamic;
  1591. for (i = 0, pos = 0; i < old_num; i++) {
  1592. n = map[backs[i]].new_val;
  1593. if (n > 0) {
  1594. backs[pos] = n;
  1595. pos++;
  1596. }
  1597. }
  1598. bn->back_num = pos;
  1599. return 0;
  1600. }
  1601. static int
  1602. renumber_by_map(Node* node, GroupNumRemap* map)
  1603. {
  1604. int r = 0;
  1605. switch (NTYPE(node)) {
  1606. case NT_LIST:
  1607. case NT_ALT:
  1608. do {
  1609. r = renumber_by_map(NCAR(node), map);
  1610. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1611. break;
  1612. case NT_QTFR:
  1613. r = renumber_by_map(NQTFR(node)->target, map);
  1614. break;
  1615. case NT_ENCLOSE:
  1616. r = renumber_by_map(NENCLOSE(node)->target, map);
  1617. break;
  1618. case NT_BREF:
  1619. r = renumber_node_backref(node, map);
  1620. break;
  1621. case NT_ANCHOR:
  1622. {
  1623. AnchorNode* an = NANCHOR(node);
  1624. switch (an->type) {
  1625. case ANCHOR_PREC_READ:
  1626. case ANCHOR_PREC_READ_NOT:
  1627. case ANCHOR_LOOK_BEHIND:
  1628. case ANCHOR_LOOK_BEHIND_NOT:
  1629. r = renumber_by_map(an->target, map);
  1630. break;
  1631. }
  1632. }
  1633. break;
  1634. default:
  1635. break;
  1636. }
  1637. return r;
  1638. }
  1639. static int
  1640. numbered_ref_check(Node* node)
  1641. {
  1642. int r = 0;
  1643. switch (NTYPE(node)) {
  1644. case NT_LIST:
  1645. case NT_ALT:
  1646. do {
  1647. r = numbered_ref_check(NCAR(node));
  1648. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1649. break;
  1650. case NT_QTFR:
  1651. r = numbered_ref_check(NQTFR(node)->target);
  1652. break;
  1653. case NT_ENCLOSE:
  1654. r = numbered_ref_check(NENCLOSE(node)->target);
  1655. break;
  1656. case NT_BREF:
  1657. if (! IS_BACKREF_NAME_REF(NBREF(node)))
  1658. return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
  1659. break;
  1660. default:
  1661. break;
  1662. }
  1663. return r;
  1664. }
  1665. static int
  1666. disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
  1667. {
  1668. int r, i, pos, counter;
  1669. BitStatusType loc;
  1670. GroupNumRemap* map;
  1671. map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
  1672. CHECK_NULL_RETURN_MEMERR(map);
  1673. for (i = 1; i <= env->num_mem; i++) {
  1674. map[i].new_val = 0;
  1675. }
  1676. counter = 0;
  1677. r = noname_disable_map(root, map, &counter);
  1678. if (r != 0) return r;
  1679. r = renumber_by_map(*root, map);
  1680. if (r != 0) return r;
  1681. for (i = 1, pos = 1; i <= env->num_mem; i++) {
  1682. if (map[i].new_val > 0) {
  1683. SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
  1684. pos++;
  1685. }
  1686. }
  1687. loc = env->capture_history;
  1688. BIT_STATUS_CLEAR(env->capture_history);
  1689. for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
  1690. if (BIT_STATUS_AT(loc, i)) {
  1691. BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
  1692. }
  1693. }
  1694. env->num_mem = env->num_named;
  1695. reg->num_mem = env->num_named;
  1696. return onig_renumber_name_table(reg, map);
  1697. }
  1698. #endif /* USE_NAMED_GROUP */
  1699. #ifdef USE_SUBEXP_CALL
  1700. static int
  1701. unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
  1702. {
  1703. int i, offset;
  1704. EncloseNode* en;
  1705. AbsAddrType addr;
  1706. for (i = 0; i < uslist->num; i++) {
  1707. en = NENCLOSE(uslist->us[i].target);
  1708. if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
  1709. addr = en->call_addr;
  1710. offset = uslist->us[i].offset;
  1711. BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
  1712. }
  1713. return 0;
  1714. }
  1715. #endif
  1716. #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
  1717. static int
  1718. quantifiers_memory_node_info(Node* node)
  1719. {
  1720. int r = 0;
  1721. switch (NTYPE(node)) {
  1722. case NT_LIST:
  1723. case NT_ALT:
  1724. {
  1725. int v;
  1726. do {
  1727. v = quantifiers_memory_node_info(NCAR(node));
  1728. if (v > r) r = v;
  1729. } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
  1730. }
  1731. break;
  1732. #ifdef USE_SUBEXP_CALL
  1733. case NT_CALL:
  1734. if (IS_CALL_RECURSION(NCALL(node))) {
  1735. return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
  1736. }
  1737. else
  1738. r = quantifiers_memory_node_info(NCALL(node)->target);
  1739. break;
  1740. #endif
  1741. case NT_QTFR:
  1742. {
  1743. QtfrNode* qn = NQTFR(node);
  1744. if (qn->upper != 0) {
  1745. r = quantifiers_memory_node_info(qn->target);
  1746. }
  1747. }
  1748. break;
  1749. case NT_ENCLOSE:
  1750. {
  1751. EncloseNode* en = NENCLOSE(node);
  1752. switch (en->type) {
  1753. case ENCLOSE_MEMORY:
  1754. return NQ_TARGET_IS_EMPTY_MEM;
  1755. break;
  1756. case ENCLOSE_OPTION:
  1757. case ENCLOSE_STOP_BACKTRACK:
  1758. r = quantifiers_memory_node_info(en->target);
  1759. break;
  1760. default:
  1761. break;
  1762. }
  1763. }
  1764. break;
  1765. case NT_BREF:
  1766. case NT_STR:
  1767. case NT_CTYPE:
  1768. case NT_CCLASS:
  1769. case NT_CANY:
  1770. case NT_ANCHOR:
  1771. default:
  1772. break;
  1773. }
  1774. return r;
  1775. }
  1776. #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
  1777. static int
  1778. get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
  1779. {
  1780. OnigDistance tmin;
  1781. int r = 0;
  1782. *min = 0;
  1783. switch (NTYPE(node)) {
  1784. case NT_BREF:
  1785. {
  1786. int i;
  1787. int* backs;
  1788. Node** nodes = SCANENV_MEM_NODES(env);
  1789. BRefNode* br = NBREF(node);
  1790. if (br->state & NST_RECURSION) break;
  1791. backs = BACKREFS_P(br);
  1792. if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
  1793. r = get_min_match_length(nodes[backs[0]], min, env);
  1794. if (r != 0) break;
  1795. for (i = 1; i < br->back_num; i++) {
  1796. if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
  1797. r = get_min_match_length(nodes[backs[i]], &tmin, env);
  1798. if (r != 0) break;
  1799. if (*min > tmin) *min = tmin;
  1800. }
  1801. }
  1802. break;
  1803. #ifdef USE_SUBEXP_CALL
  1804. case NT_CALL:
  1805. if (IS_CALL_RECURSION(NCALL(node))) {
  1806. EncloseNode* en = NENCLOSE(NCALL(node)->target);
  1807. if (IS_ENCLOSE_MIN_FIXED(en))
  1808. *min = en->min_len;
  1809. }
  1810. else
  1811. r = get_min_match_length(NCALL(node)->target, min, env);
  1812. break;
  1813. #endif
  1814. case NT_LIST:
  1815. do {
  1816. r = get_min_match_length(NCAR(node), &tmin, env);
  1817. if (r == 0) *min += tmin;
  1818. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1819. break;
  1820. case NT_ALT:
  1821. {
  1822. Node *x, *y;
  1823. y = node;
  1824. do {
  1825. x = NCAR(y);
  1826. r = get_min_match_length(x, &tmin, env);
  1827. if (r != 0) break;
  1828. if (y == node) *min = tmin;
  1829. else if (*min > tmin) *min = tmin;
  1830. } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
  1831. }
  1832. break;
  1833. case NT_STR:
  1834. {
  1835. StrNode* sn = NSTR(node);
  1836. *min = sn->end - sn->s;
  1837. }
  1838. break;
  1839. case NT_CTYPE:
  1840. *min = 1;
  1841. break;
  1842. case NT_CCLASS:
  1843. case NT_CANY:
  1844. *min = 1;
  1845. break;
  1846. case NT_QTFR:
  1847. {
  1848. QtfrNode* qn = NQTFR(node);
  1849. if (qn->lower > 0) {
  1850. r = get_min_match_length(qn->target, min, env);
  1851. if (r == 0)
  1852. *min = distance_multiply(*min, qn->lower);
  1853. }
  1854. }
  1855. break;
  1856. case NT_ENCLOSE:
  1857. {
  1858. EncloseNode* en = NENCLOSE(node);
  1859. switch (en->type) {
  1860. case ENCLOSE_MEMORY:
  1861. #ifdef USE_SUBEXP_CALL
  1862. if (IS_ENCLOSE_MIN_FIXED(en))
  1863. *min = en->min_len;
  1864. else {
  1865. r = get_min_match_length(en->target, min, env);
  1866. if (r == 0) {
  1867. en->min_len = *min;
  1868. SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
  1869. }
  1870. }
  1871. break;
  1872. #endif
  1873. cas