PageRenderTime 82ms CodeModel.GetById 39ms RepoModel.GetById 1ms app.codeStats 1ms

/regcomp.c

https://github.com/wanabe/ruby
C | 6764 lines | 5799 code | 894 blank | 71 comment | 1548 complexity | 1db447b23f7c522e48fb2db082a273af MD5 | raw file
Possible License(s): LGPL-2.1, AGPL-3.0, 0BSD, Unlicense, GPL-2.0, BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. /**********************************************************************
  2. regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
  3. **********************************************************************/
  4. /*-
  5. * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
  6. * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
  7. * All rights reserved.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions
  11. * are met:
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. * 2. Redistributions in binary form must reproduce the above copyright
  15. * notice, this list of conditions and the following disclaimer in the
  16. * documentation and/or other materials provided with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  19. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  22. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  24. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  25. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  26. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28. * SUCH DAMAGE.
  29. */
  30. #include "regparse.h"
  31. OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
  32. extern OnigCaseFoldType
  33. onig_get_default_case_fold_flag(void)
  34. {
  35. return OnigDefaultCaseFoldFlag;
  36. }
  37. extern int
  38. onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
  39. {
  40. OnigDefaultCaseFoldFlag = case_fold_flag;
  41. return 0;
  42. }
  43. #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
  44. static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
  45. #endif
  46. #if 0
  47. static UChar*
  48. str_dup(UChar* s, UChar* end)
  49. {
  50. ptrdiff_t len = end - s;
  51. if (len > 0) {
  52. UChar* r = (UChar* )xmalloc(len + 1);
  53. CHECK_NULL_RETURN(r);
  54. xmemcpy(r, s, len);
  55. r[len] = (UChar )0;
  56. return r;
  57. }
  58. else return NULL;
  59. }
  60. #endif
  61. static void
  62. swap_node(Node* a, Node* b)
  63. {
  64. Node c;
  65. c = *a; *a = *b; *b = c;
  66. if (NTYPE(a) == NT_STR) {
  67. StrNode* sn = NSTR(a);
  68. if (sn->capa == 0) {
  69. size_t len = sn->end - sn->s;
  70. sn->s = sn->buf;
  71. sn->end = sn->s + len;
  72. }
  73. }
  74. if (NTYPE(b) == NT_STR) {
  75. StrNode* sn = NSTR(b);
  76. if (sn->capa == 0) {
  77. size_t len = sn->end - sn->s;
  78. sn->s = sn->buf;
  79. sn->end = sn->s + len;
  80. }
  81. }
  82. }
  83. static OnigDistance
  84. distance_add(OnigDistance d1, OnigDistance d2)
  85. {
  86. if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
  87. return ONIG_INFINITE_DISTANCE;
  88. else {
  89. if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
  90. else return ONIG_INFINITE_DISTANCE;
  91. }
  92. }
  93. static OnigDistance
  94. distance_multiply(OnigDistance d, int m)
  95. {
  96. if (m == 0) return 0;
  97. if (d < ONIG_INFINITE_DISTANCE / m)
  98. return d * m;
  99. else
  100. return ONIG_INFINITE_DISTANCE;
  101. }
  102. static int
  103. bitset_is_empty(BitSetRef bs)
  104. {
  105. int i;
  106. for (i = 0; i < BITSET_SIZE; i++) {
  107. if (bs[i] != 0) return 0;
  108. }
  109. return 1;
  110. }
  111. #ifdef ONIG_DEBUG
  112. static int
  113. bitset_on_num(BitSetRef bs)
  114. {
  115. int i, n;
  116. n = 0;
  117. for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
  118. if (BITSET_AT(bs, i)) n++;
  119. }
  120. return n;
  121. }
  122. #endif
  123. // Attempt to right size allocated buffers for a regex post compile
  124. static void
  125. onig_reg_resize(regex_t *reg)
  126. {
  127. resize:
  128. if (reg->alloc > reg->used) {
  129. unsigned char *new_ptr = xrealloc(reg->p, reg->used);
  130. // Skip the right size optimization if memory allocation fails
  131. if (new_ptr) {
  132. reg->alloc = reg->used;
  133. reg->p = new_ptr;
  134. }
  135. }
  136. if (reg->chain) {
  137. reg = reg->chain;
  138. goto resize;
  139. }
  140. }
  141. extern int
  142. onig_bbuf_init(BBuf* buf, OnigDistance size)
  143. {
  144. if (size <= 0) {
  145. size = 0;
  146. buf->p = NULL;
  147. }
  148. else {
  149. buf->p = (UChar* )xmalloc(size);
  150. if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
  151. }
  152. buf->alloc = (unsigned int )size;
  153. buf->used = 0;
  154. return 0;
  155. }
  156. #ifdef USE_SUBEXP_CALL
  157. static int
  158. unset_addr_list_init(UnsetAddrList* uslist, int size)
  159. {
  160. UnsetAddr* p;
  161. p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
  162. CHECK_NULL_RETURN_MEMERR(p);
  163. uslist->num = 0;
  164. uslist->alloc = size;
  165. uslist->us = p;
  166. return 0;
  167. }
  168. static void
  169. unset_addr_list_end(UnsetAddrList* uslist)
  170. {
  171. if (IS_NOT_NULL(uslist->us))
  172. xfree(uslist->us);
  173. }
  174. static int
  175. unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
  176. {
  177. UnsetAddr* p;
  178. int size;
  179. if (uslist->num >= uslist->alloc) {
  180. size = uslist->alloc * 2;
  181. p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
  182. CHECK_NULL_RETURN_MEMERR(p);
  183. uslist->alloc = size;
  184. uslist->us = p;
  185. }
  186. uslist->us[uslist->num].offset = offset;
  187. uslist->us[uslist->num].target = node;
  188. uslist->num++;
  189. return 0;
  190. }
  191. #endif /* USE_SUBEXP_CALL */
  192. static int
  193. add_opcode(regex_t* reg, int opcode)
  194. {
  195. BBUF_ADD1(reg, opcode);
  196. return 0;
  197. }
  198. #ifdef USE_COMBINATION_EXPLOSION_CHECK
  199. static int
  200. add_state_check_num(regex_t* reg, int num)
  201. {
  202. StateCheckNumType n = (StateCheckNumType )num;
  203. BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
  204. return 0;
  205. }
  206. #endif
  207. static int
  208. add_rel_addr(regex_t* reg, int addr)
  209. {
  210. RelAddrType ra = (RelAddrType )addr;
  211. BBUF_ADD(reg, &ra, SIZE_RELADDR);
  212. return 0;
  213. }
  214. static int
  215. add_abs_addr(regex_t* reg, int addr)
  216. {
  217. AbsAddrType ra = (AbsAddrType )addr;
  218. BBUF_ADD(reg, &ra, SIZE_ABSADDR);
  219. return 0;
  220. }
  221. static int
  222. add_length(regex_t* reg, OnigDistance len)
  223. {
  224. LengthType l = (LengthType )len;
  225. BBUF_ADD(reg, &l, SIZE_LENGTH);
  226. return 0;
  227. }
  228. static int
  229. add_mem_num(regex_t* reg, int num)
  230. {
  231. MemNumType n = (MemNumType )num;
  232. BBUF_ADD(reg, &n, SIZE_MEMNUM);
  233. return 0;
  234. }
  235. #if 0
  236. static int
  237. add_pointer(regex_t* reg, void* addr)
  238. {
  239. PointerType ptr = (PointerType )addr;
  240. BBUF_ADD(reg, &ptr, SIZE_POINTER);
  241. return 0;
  242. }
  243. #endif
  244. static int
  245. add_option(regex_t* reg, OnigOptionType option)
  246. {
  247. BBUF_ADD(reg, &option, SIZE_OPTION);
  248. return 0;
  249. }
  250. static int
  251. add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
  252. {
  253. int r;
  254. r = add_opcode(reg, opcode);
  255. if (r) return r;
  256. r = add_rel_addr(reg, addr);
  257. return r;
  258. }
  259. static int
  260. add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
  261. {
  262. BBUF_ADD(reg, bytes, len);
  263. return 0;
  264. }
  265. static int
  266. add_bitset(regex_t* reg, BitSetRef bs)
  267. {
  268. BBUF_ADD(reg, bs, SIZE_BITSET);
  269. return 0;
  270. }
  271. static int
  272. add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
  273. {
  274. int r;
  275. r = add_opcode(reg, opcode);
  276. if (r) return r;
  277. r = add_option(reg, option);
  278. return r;
  279. }
  280. static int compile_length_tree(Node* node, regex_t* reg);
  281. static int compile_tree(Node* node, regex_t* reg);
  282. #define IS_NEED_STR_LEN_OP_EXACT(op) \
  283. ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
  284. (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
  285. static int
  286. select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
  287. {
  288. int op;
  289. OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
  290. if (ignore_case) {
  291. switch (str_len) {
  292. case 1: op = OP_EXACT1_IC; break;
  293. default: op = OP_EXACTN_IC; break;
  294. }
  295. }
  296. else {
  297. switch (mb_len) {
  298. case 1:
  299. switch (str_len) {
  300. case 1: op = OP_EXACT1; break;
  301. case 2: op = OP_EXACT2; break;
  302. case 3: op = OP_EXACT3; break;
  303. case 4: op = OP_EXACT4; break;
  304. case 5: op = OP_EXACT5; break;
  305. default: op = OP_EXACTN; break;
  306. }
  307. break;
  308. case 2:
  309. switch (str_len) {
  310. case 1: op = OP_EXACTMB2N1; break;
  311. case 2: op = OP_EXACTMB2N2; break;
  312. case 3: op = OP_EXACTMB2N3; break;
  313. default: op = OP_EXACTMB2N; break;
  314. }
  315. break;
  316. case 3:
  317. op = OP_EXACTMB3N;
  318. break;
  319. default:
  320. op = OP_EXACTMBN;
  321. break;
  322. }
  323. }
  324. return op;
  325. }
  326. static int
  327. compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
  328. {
  329. int r;
  330. int saved_num_null_check = reg->num_null_check;
  331. if (empty_info != 0) {
  332. r = add_opcode(reg, OP_NULL_CHECK_START);
  333. if (r) return r;
  334. r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
  335. if (r) return r;
  336. reg->num_null_check++;
  337. }
  338. r = compile_tree(node, reg);
  339. if (r) return r;
  340. if (empty_info != 0) {
  341. if (empty_info == NQ_TARGET_IS_EMPTY)
  342. r = add_opcode(reg, OP_NULL_CHECK_END);
  343. else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
  344. r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
  345. else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
  346. r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
  347. if (r) return r;
  348. r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
  349. }
  350. return r;
  351. }
  352. #ifdef USE_SUBEXP_CALL
  353. static int
  354. compile_call(CallNode* node, regex_t* reg)
  355. {
  356. int r;
  357. r = add_opcode(reg, OP_CALL);
  358. if (r) return r;
  359. r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
  360. node->target);
  361. if (r) return r;
  362. r = add_abs_addr(reg, 0 /*dummy addr.*/);
  363. return r;
  364. }
  365. #endif
  366. static int
  367. compile_tree_n_times(Node* node, int n, regex_t* reg)
  368. {
  369. int i, r;
  370. for (i = 0; i < n; i++) {
  371. r = compile_tree(node, reg);
  372. if (r) return r;
  373. }
  374. return 0;
  375. }
  376. static int
  377. add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
  378. regex_t* reg ARG_UNUSED, int ignore_case)
  379. {
  380. int len;
  381. int op = select_str_opcode(mb_len, byte_len, ignore_case);
  382. len = SIZE_OPCODE;
  383. if (op == OP_EXACTMBN) len += SIZE_LENGTH;
  384. if (IS_NEED_STR_LEN_OP_EXACT(op))
  385. len += SIZE_LENGTH;
  386. len += (int )byte_len;
  387. return len;
  388. }
  389. static int
  390. add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
  391. regex_t* reg, int ignore_case)
  392. {
  393. int op = select_str_opcode(mb_len, byte_len, ignore_case);
  394. add_opcode(reg, op);
  395. if (op == OP_EXACTMBN)
  396. add_length(reg, mb_len);
  397. if (IS_NEED_STR_LEN_OP_EXACT(op)) {
  398. if (op == OP_EXACTN_IC)
  399. add_length(reg, byte_len);
  400. else
  401. add_length(reg, byte_len / mb_len);
  402. }
  403. add_bytes(reg, s, byte_len);
  404. return 0;
  405. }
  406. static int
  407. compile_length_string_node(Node* node, regex_t* reg)
  408. {
  409. int rlen, r, len, prev_len, blen, ambig;
  410. OnigEncoding enc = reg->enc;
  411. UChar *p, *prev;
  412. StrNode* sn;
  413. sn = NSTR(node);
  414. if (sn->end <= sn->s)
  415. return 0;
  416. ambig = NSTRING_IS_AMBIG(node);
  417. p = prev = sn->s;
  418. prev_len = enclen(enc, p, sn->end);
  419. p += prev_len;
  420. blen = prev_len;
  421. rlen = 0;
  422. for (; p < sn->end; ) {
  423. len = enclen(enc, p, sn->end);
  424. if (len == prev_len || ambig) {
  425. blen += len;
  426. }
  427. else {
  428. r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
  429. rlen += r;
  430. prev = p;
  431. blen = len;
  432. prev_len = len;
  433. }
  434. p += len;
  435. }
  436. r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
  437. rlen += r;
  438. return rlen;
  439. }
  440. static int
  441. compile_length_string_raw_node(StrNode* sn, regex_t* reg)
  442. {
  443. if (sn->end <= sn->s)
  444. return 0;
  445. return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
  446. }
  447. static int
  448. compile_string_node(Node* node, regex_t* reg)
  449. {
  450. int r, len, prev_len, blen, ambig;
  451. OnigEncoding enc = reg->enc;
  452. UChar *p, *prev, *end;
  453. StrNode* sn;
  454. sn = NSTR(node);
  455. if (sn->end <= sn->s)
  456. return 0;
  457. end = sn->end;
  458. ambig = NSTRING_IS_AMBIG(node);
  459. p = prev = sn->s;
  460. prev_len = enclen(enc, p, end);
  461. p += prev_len;
  462. blen = prev_len;
  463. for (; p < end; ) {
  464. len = enclen(enc, p, end);
  465. if (len == prev_len || ambig) {
  466. blen += len;
  467. }
  468. else {
  469. r = add_compile_string(prev, prev_len, blen, reg, ambig);
  470. if (r) return r;
  471. prev = p;
  472. blen = len;
  473. prev_len = len;
  474. }
  475. p += len;
  476. }
  477. return add_compile_string(prev, prev_len, blen, reg, ambig);
  478. }
  479. static int
  480. compile_string_raw_node(StrNode* sn, regex_t* reg)
  481. {
  482. if (sn->end <= sn->s)
  483. return 0;
  484. return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
  485. }
  486. static int
  487. add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
  488. {
  489. #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
  490. add_length(reg, mbuf->used);
  491. return add_bytes(reg, mbuf->p, mbuf->used);
  492. #else
  493. int r, pad_size;
  494. UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
  495. GET_ALIGNMENT_PAD_SIZE(p, pad_size);
  496. add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
  497. if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
  498. r = add_bytes(reg, mbuf->p, mbuf->used);
  499. /* padding for return value from compile_length_cclass_node() to be fix. */
  500. pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
  501. if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
  502. return r;
  503. #endif
  504. }
  505. static int
  506. compile_length_cclass_node(CClassNode* cc, regex_t* reg)
  507. {
  508. int len;
  509. if (IS_NULL(cc->mbuf)) {
  510. len = SIZE_OPCODE + SIZE_BITSET;
  511. }
  512. else {
  513. if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
  514. len = SIZE_OPCODE;
  515. }
  516. else {
  517. len = SIZE_OPCODE + SIZE_BITSET;
  518. }
  519. #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
  520. len += SIZE_LENGTH + cc->mbuf->used;
  521. #else
  522. len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
  523. #endif
  524. }
  525. return len;
  526. }
  527. static int
  528. compile_cclass_node(CClassNode* cc, regex_t* reg)
  529. {
  530. int r;
  531. if (IS_NULL(cc->mbuf)) {
  532. if (IS_NCCLASS_NOT(cc))
  533. add_opcode(reg, OP_CCLASS_NOT);
  534. else
  535. add_opcode(reg, OP_CCLASS);
  536. r = add_bitset(reg, cc->bs);
  537. }
  538. else {
  539. if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
  540. if (IS_NCCLASS_NOT(cc))
  541. add_opcode(reg, OP_CCLASS_MB_NOT);
  542. else
  543. add_opcode(reg, OP_CCLASS_MB);
  544. r = add_multi_byte_cclass(cc->mbuf, reg);
  545. }
  546. else {
  547. if (IS_NCCLASS_NOT(cc))
  548. add_opcode(reg, OP_CCLASS_MIX_NOT);
  549. else
  550. add_opcode(reg, OP_CCLASS_MIX);
  551. r = add_bitset(reg, cc->bs);
  552. if (r) return r;
  553. r = add_multi_byte_cclass(cc->mbuf, reg);
  554. }
  555. }
  556. return r;
  557. }
  558. static int
  559. entry_repeat_range(regex_t* reg, int id, int lower, int upper)
  560. {
  561. #define REPEAT_RANGE_ALLOC 4
  562. OnigRepeatRange* p;
  563. if (reg->repeat_range_alloc == 0) {
  564. p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
  565. CHECK_NULL_RETURN_MEMERR(p);
  566. reg->repeat_range = p;
  567. reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
  568. }
  569. else if (reg->repeat_range_alloc <= id) {
  570. int n;
  571. n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
  572. p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
  573. sizeof(OnigRepeatRange) * n);
  574. CHECK_NULL_RETURN_MEMERR(p);
  575. reg->repeat_range = p;
  576. reg->repeat_range_alloc = n;
  577. }
  578. else {
  579. p = reg->repeat_range;
  580. }
  581. p[id].lower = lower;
  582. p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
  583. return 0;
  584. }
  585. static int
  586. compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
  587. regex_t* reg)
  588. {
  589. int r;
  590. int num_repeat = reg->num_repeat;
  591. r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
  592. if (r) return r;
  593. r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
  594. reg->num_repeat++;
  595. if (r) return r;
  596. r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
  597. if (r) return r;
  598. r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
  599. if (r) return r;
  600. r = compile_tree_empty_check(qn->target, reg, empty_info);
  601. if (r) return r;
  602. if (
  603. #ifdef USE_SUBEXP_CALL
  604. reg->num_call > 0 ||
  605. #endif
  606. IS_QUANTIFIER_IN_REPEAT(qn)) {
  607. r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
  608. }
  609. else {
  610. r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
  611. }
  612. if (r) return r;
  613. r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
  614. return r;
  615. }
  616. static int
  617. is_anychar_star_quantifier(QtfrNode* qn)
  618. {
  619. if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
  620. NTYPE(qn->target) == NT_CANY)
  621. return 1;
  622. else
  623. return 0;
  624. }
  625. #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
  626. #define CKN_ON (ckn > 0)
  627. #ifdef USE_COMBINATION_EXPLOSION_CHECK
  628. static int
  629. compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
  630. {
  631. int len, mod_tlen, cklen;
  632. int ckn;
  633. int infinite = IS_REPEAT_INFINITE(qn->upper);
  634. int empty_info = qn->target_empty_info;
  635. int tlen = compile_length_tree(qn->target, reg);
  636. if (tlen < 0) return tlen;
  637. ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
  638. cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
  639. /* anychar repeat */
  640. if (NTYPE(qn->target) == NT_CANY) {
  641. if (qn->greedy && infinite) {
  642. if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
  643. return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
  644. else
  645. return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
  646. }
  647. }
  648. if (empty_info != 0)
  649. mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
  650. else
  651. mod_tlen = tlen;
  652. if (infinite && qn->lower <= 1) {
  653. if (qn->greedy) {
  654. if (qn->lower == 1)
  655. len = SIZE_OP_JUMP;
  656. else
  657. len = 0;
  658. len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
  659. }
  660. else {
  661. if (qn->lower == 0)
  662. len = SIZE_OP_JUMP;
  663. else
  664. len = 0;
  665. len += mod_tlen + SIZE_OP_PUSH + cklen;
  666. }
  667. }
  668. else if (qn->upper == 0) {
  669. if (qn->is_referred != 0) /* /(?<n>..){0}/ */
  670. len = SIZE_OP_JUMP + tlen;
  671. else
  672. len = 0;
  673. }
  674. else if (qn->upper == 1 && qn->greedy) {
  675. if (qn->lower == 0) {
  676. if (CKN_ON) {
  677. len = SIZE_OP_STATE_CHECK_PUSH + tlen;
  678. }
  679. else {
  680. len = SIZE_OP_PUSH + tlen;
  681. }
  682. }
  683. else {
  684. len = tlen;
  685. }
  686. }
  687. else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
  688. len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
  689. }
  690. else {
  691. len = SIZE_OP_REPEAT_INC
  692. + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
  693. if (CKN_ON)
  694. len += SIZE_OP_STATE_CHECK;
  695. }
  696. return len;
  697. }
  698. static int
  699. compile_quantifier_node(QtfrNode* qn, regex_t* reg)
  700. {
  701. int r, mod_tlen;
  702. int ckn;
  703. int infinite = IS_REPEAT_INFINITE(qn->upper);
  704. int empty_info = qn->target_empty_info;
  705. int tlen = compile_length_tree(qn->target, reg);
  706. if (tlen < 0) return tlen;
  707. ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
  708. if (is_anychar_star_quantifier(qn)) {
  709. r = compile_tree_n_times(qn->target, qn->lower, reg);
  710. if (r) return r;
  711. if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
  712. if (IS_MULTILINE(reg->options))
  713. r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
  714. else
  715. r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
  716. if (r) return r;
  717. if (CKN_ON) {
  718. r = add_state_check_num(reg, ckn);
  719. if (r) return r;
  720. }
  721. return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
  722. }
  723. else {
  724. if (IS_MULTILINE(reg->options)) {
  725. r = add_opcode(reg, (CKN_ON ?
  726. OP_STATE_CHECK_ANYCHAR_ML_STAR
  727. : OP_ANYCHAR_ML_STAR));
  728. }
  729. else {
  730. r = add_opcode(reg, (CKN_ON ?
  731. OP_STATE_CHECK_ANYCHAR_STAR
  732. : OP_ANYCHAR_STAR));
  733. }
  734. if (r) return r;
  735. if (CKN_ON)
  736. r = add_state_check_num(reg, ckn);
  737. return r;
  738. }
  739. }
  740. if (empty_info != 0)
  741. mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
  742. else
  743. mod_tlen = tlen;
  744. if (infinite && qn->lower <= 1) {
  745. if (qn->greedy) {
  746. if (qn->lower == 1) {
  747. r = add_opcode_rel_addr(reg, OP_JUMP,
  748. (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
  749. if (r) return r;
  750. }
  751. if (CKN_ON) {
  752. r = add_opcode(reg, OP_STATE_CHECK_PUSH);
  753. if (r) return r;
  754. r = add_state_check_num(reg, ckn);
  755. if (r) return r;
  756. r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
  757. }
  758. else {
  759. r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
  760. }
  761. if (r) return r;
  762. r = compile_tree_empty_check(qn->target, reg, empty_info);
  763. if (r) return r;
  764. r = add_opcode_rel_addr(reg, OP_JUMP,
  765. -(mod_tlen + (int )SIZE_OP_JUMP
  766. + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
  767. }
  768. else {
  769. if (qn->lower == 0) {
  770. r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
  771. if (r) return r;
  772. }
  773. r = compile_tree_empty_check(qn->target, reg, empty_info);
  774. if (r) return r;
  775. if (CKN_ON) {
  776. r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
  777. if (r) return r;
  778. r = add_state_check_num(reg, ckn);
  779. if (r) return r;
  780. r = add_rel_addr(reg,
  781. -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
  782. }
  783. else
  784. r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
  785. }
  786. }
  787. else if (qn->upper == 0) {
  788. if (qn->is_referred != 0) { /* /(?<n>..){0}/ */
  789. r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
  790. if (r) return r;
  791. r = compile_tree(qn->target, reg);
  792. }
  793. else
  794. r = 0;
  795. }
  796. else if (qn->upper == 1 && qn->greedy) {
  797. if (qn->lower == 0) {
  798. if (CKN_ON) {
  799. r = add_opcode(reg, OP_STATE_CHECK_PUSH);
  800. if (r) return r;
  801. r = add_state_check_num(reg, ckn);
  802. if (r) return r;
  803. r = add_rel_addr(reg, tlen);
  804. }
  805. else {
  806. r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
  807. }
  808. if (r) return r;
  809. }
  810. r = compile_tree(qn->target, reg);
  811. }
  812. else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
  813. if (CKN_ON) {
  814. r = add_opcode(reg, OP_STATE_CHECK_PUSH);
  815. if (r) return r;
  816. r = add_state_check_num(reg, ckn);
  817. if (r) return r;
  818. r = add_rel_addr(reg, SIZE_OP_JUMP);
  819. }
  820. else {
  821. r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
  822. }
  823. if (r) return r;
  824. r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
  825. if (r) return r;
  826. r = compile_tree(qn->target, reg);
  827. }
  828. else {
  829. r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
  830. if (CKN_ON) {
  831. if (r) return r;
  832. r = add_opcode(reg, OP_STATE_CHECK);
  833. if (r) return r;
  834. r = add_state_check_num(reg, ckn);
  835. }
  836. }
  837. return r;
  838. }
  839. #else /* USE_COMBINATION_EXPLOSION_CHECK */
  840. static int
  841. compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
  842. {
  843. int len, mod_tlen;
  844. int infinite = IS_REPEAT_INFINITE(qn->upper);
  845. int empty_info = qn->target_empty_info;
  846. int tlen = compile_length_tree(qn->target, reg);
  847. if (tlen < 0) return tlen;
  848. /* anychar repeat */
  849. if (NTYPE(qn->target) == NT_CANY) {
  850. if (qn->greedy && infinite) {
  851. if (IS_NOT_NULL(qn->next_head_exact))
  852. return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
  853. else
  854. return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
  855. }
  856. }
  857. if (empty_info != 0)
  858. mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
  859. else
  860. mod_tlen = tlen;
  861. if (infinite &&
  862. (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
  863. if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
  864. len = SIZE_OP_JUMP;
  865. }
  866. else {
  867. len = tlen * qn->lower;
  868. }
  869. if (qn->greedy) {
  870. #ifdef USE_OP_PUSH_OR_JUMP_EXACT
  871. if (IS_NOT_NULL(qn->head_exact))
  872. len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
  873. else
  874. #endif
  875. if (IS_NOT_NULL(qn->next_head_exact))
  876. len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
  877. else
  878. len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
  879. }
  880. else
  881. len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
  882. }
  883. else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
  884. len = SIZE_OP_JUMP + tlen;
  885. }
  886. else if (!infinite && qn->greedy &&
  887. (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
  888. <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
  889. len = tlen * qn->lower;
  890. len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
  891. }
  892. else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
  893. len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
  894. }
  895. else {
  896. len = SIZE_OP_REPEAT_INC
  897. + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
  898. }
  899. return len;
  900. }
  901. static int
  902. compile_quantifier_node(QtfrNode* qn, regex_t* reg)
  903. {
  904. int i, r, mod_tlen;
  905. int infinite = IS_REPEAT_INFINITE(qn->upper);
  906. int empty_info = qn->target_empty_info;
  907. int tlen = compile_length_tree(qn->target, reg);
  908. if (tlen < 0) return tlen;
  909. if (is_anychar_star_quantifier(qn)) {
  910. r = compile_tree_n_times(qn->target, qn->lower, reg);
  911. if (r) return r;
  912. if (IS_NOT_NULL(qn->next_head_exact)) {
  913. if (IS_MULTILINE(reg->options))
  914. r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
  915. else
  916. r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
  917. if (r) return r;
  918. return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
  919. }
  920. else {
  921. if (IS_MULTILINE(reg->options))
  922. return add_opcode(reg, OP_ANYCHAR_ML_STAR);
  923. else
  924. return add_opcode(reg, OP_ANYCHAR_STAR);
  925. }
  926. }
  927. if (empty_info != 0)
  928. mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
  929. else
  930. mod_tlen = tlen;
  931. if (infinite &&
  932. (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
  933. if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
  934. if (qn->greedy) {
  935. #ifdef USE_OP_PUSH_OR_JUMP_EXACT
  936. if (IS_NOT_NULL(qn->head_exact))
  937. r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
  938. else
  939. #endif
  940. if (IS_NOT_NULL(qn->next_head_exact))
  941. r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
  942. else
  943. r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
  944. }
  945. else {
  946. r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
  947. }
  948. if (r) return r;
  949. }
  950. else {
  951. r = compile_tree_n_times(qn->target, qn->lower, reg);
  952. if (r) return r;
  953. }
  954. if (qn->greedy) {
  955. #ifdef USE_OP_PUSH_OR_JUMP_EXACT
  956. if (IS_NOT_NULL(qn->head_exact)) {
  957. r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
  958. mod_tlen + SIZE_OP_JUMP);
  959. if (r) return r;
  960. add_bytes(reg, NSTR(qn->head_exact)->s, 1);
  961. r = compile_tree_empty_check(qn->target, reg, empty_info);
  962. if (r) return r;
  963. r = add_opcode_rel_addr(reg, OP_JUMP,
  964. -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
  965. }
  966. else
  967. #endif
  968. if (IS_NOT_NULL(qn->next_head_exact)) {
  969. r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
  970. mod_tlen + SIZE_OP_JUMP);
  971. if (r) return r;
  972. add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
  973. r = compile_tree_empty_check(qn->target, reg, empty_info);
  974. if (r) return r;
  975. r = add_opcode_rel_addr(reg, OP_JUMP,
  976. -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
  977. }
  978. else {
  979. r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
  980. if (r) return r;
  981. r = compile_tree_empty_check(qn->target, reg, empty_info);
  982. if (r) return r;
  983. r = add_opcode_rel_addr(reg, OP_JUMP,
  984. -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
  985. }
  986. }
  987. else {
  988. r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
  989. if (r) return r;
  990. r = compile_tree_empty_check(qn->target, reg, empty_info);
  991. if (r) return r;
  992. r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
  993. }
  994. }
  995. else if (qn->upper == 0 && qn->is_referred != 0) { /* /(?<n>..){0}/ */
  996. r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
  997. if (r) return r;
  998. r = compile_tree(qn->target, reg);
  999. }
  1000. else if (!infinite && qn->greedy &&
  1001. (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
  1002. <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
  1003. int n = qn->upper - qn->lower;
  1004. r = compile_tree_n_times(qn->target, qn->lower, reg);
  1005. if (r) return r;
  1006. for (i = 0; i < n; i++) {
  1007. r = add_opcode_rel_addr(reg, OP_PUSH,
  1008. (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
  1009. if (r) return r;
  1010. r = compile_tree(qn->target, reg);
  1011. if (r) return r;
  1012. }
  1013. }
  1014. else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
  1015. r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
  1016. if (r) return r;
  1017. r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
  1018. if (r) return r;
  1019. r = compile_tree(qn->target, reg);
  1020. }
  1021. else {
  1022. r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
  1023. }
  1024. return r;
  1025. }
  1026. #endif /* USE_COMBINATION_EXPLOSION_CHECK */
  1027. static int
  1028. compile_length_option_node(EncloseNode* node, regex_t* reg)
  1029. {
  1030. int tlen;
  1031. OnigOptionType prev = reg->options;
  1032. reg->options = node->option;
  1033. tlen = compile_length_tree(node->target, reg);
  1034. reg->options = prev;
  1035. if (tlen < 0) return tlen;
  1036. if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
  1037. return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
  1038. + tlen + SIZE_OP_SET_OPTION;
  1039. }
  1040. else
  1041. return tlen;
  1042. }
  1043. static int
  1044. compile_option_node(EncloseNode* node, regex_t* reg)
  1045. {
  1046. int r;
  1047. OnigOptionType prev = reg->options;
  1048. if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
  1049. r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
  1050. if (r) return r;
  1051. r = add_opcode_option(reg, OP_SET_OPTION, prev);
  1052. if (r) return r;
  1053. r = add_opcode(reg, OP_FAIL);
  1054. if (r) return r;
  1055. }
  1056. reg->options = node->option;
  1057. r = compile_tree(node->target, reg);
  1058. reg->options = prev;
  1059. if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
  1060. if (r) return r;
  1061. r = add_opcode_option(reg, OP_SET_OPTION, prev);
  1062. }
  1063. return r;
  1064. }
  1065. static int
  1066. compile_length_enclose_node(EncloseNode* node, regex_t* reg)
  1067. {
  1068. int len;
  1069. int tlen;
  1070. if (node->type == ENCLOSE_OPTION)
  1071. return compile_length_option_node(node, reg);
  1072. if (node->target) {
  1073. tlen = compile_length_tree(node->target, reg);
  1074. if (tlen < 0) return tlen;
  1075. }
  1076. else
  1077. tlen = 0;
  1078. switch (node->type) {
  1079. case ENCLOSE_MEMORY:
  1080. #ifdef USE_SUBEXP_CALL
  1081. if (IS_ENCLOSE_CALLED(node)) {
  1082. len = SIZE_OP_MEMORY_START_PUSH + tlen
  1083. + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
  1084. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1085. len += (IS_ENCLOSE_RECURSION(node)
  1086. ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
  1087. else
  1088. len += (IS_ENCLOSE_RECURSION(node)
  1089. ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
  1090. }
  1091. else if (IS_ENCLOSE_RECURSION(node)) {
  1092. len = SIZE_OP_MEMORY_START_PUSH;
  1093. len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
  1094. ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
  1095. }
  1096. else
  1097. #endif
  1098. {
  1099. if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
  1100. len = SIZE_OP_MEMORY_START_PUSH;
  1101. else
  1102. len = SIZE_OP_MEMORY_START;
  1103. len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
  1104. ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
  1105. }
  1106. break;
  1107. case ENCLOSE_STOP_BACKTRACK:
  1108. if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
  1109. QtfrNode* qn = NQTFR(node->target);
  1110. tlen = compile_length_tree(qn->target, reg);
  1111. if (tlen < 0) return tlen;
  1112. len = tlen * qn->lower
  1113. + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
  1114. }
  1115. else {
  1116. len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
  1117. }
  1118. break;
  1119. case ENCLOSE_CONDITION:
  1120. len = SIZE_OP_CONDITION;
  1121. if (NTYPE(node->target) == NT_ALT) {
  1122. Node* x = node->target;
  1123. tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
  1124. if (tlen < 0) return tlen;
  1125. len += tlen + SIZE_OP_JUMP;
  1126. if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
  1127. x = NCDR(x);
  1128. tlen = compile_length_tree(NCAR(x), reg); /* no-node */
  1129. if (tlen < 0) return tlen;
  1130. len += tlen;
  1131. if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
  1132. }
  1133. else {
  1134. return ONIGERR_PARSER_BUG;
  1135. }
  1136. break;
  1137. case ENCLOSE_ABSENT:
  1138. len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
  1139. break;
  1140. default:
  1141. return ONIGERR_TYPE_BUG;
  1142. break;
  1143. }
  1144. return len;
  1145. }
  1146. static int get_char_length_tree(Node* node, regex_t* reg, int* len);
  1147. static int
  1148. compile_enclose_node(EncloseNode* node, regex_t* reg)
  1149. {
  1150. int r, len;
  1151. if (node->type == ENCLOSE_OPTION)
  1152. return compile_option_node(node, reg);
  1153. switch (node->type) {
  1154. case ENCLOSE_MEMORY:
  1155. #ifdef USE_SUBEXP_CALL
  1156. if (IS_ENCLOSE_CALLED(node)) {
  1157. r = add_opcode(reg, OP_CALL);
  1158. if (r) return r;
  1159. node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
  1160. node->state |= NST_ADDR_FIXED;
  1161. r = add_abs_addr(reg, (int )node->call_addr);
  1162. if (r) return r;
  1163. len = compile_length_tree(node->target, reg);
  1164. len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
  1165. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1166. len += (IS_ENCLOSE_RECURSION(node)
  1167. ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
  1168. else
  1169. len += (IS_ENCLOSE_RECURSION(node)
  1170. ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
  1171. r = add_opcode_rel_addr(reg, OP_JUMP, len);
  1172. if (r) return r;
  1173. }
  1174. #endif
  1175. if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
  1176. r = add_opcode(reg, OP_MEMORY_START_PUSH);
  1177. else
  1178. r = add_opcode(reg, OP_MEMORY_START);
  1179. if (r) return r;
  1180. r = add_mem_num(reg, node->regnum);
  1181. if (r) return r;
  1182. r = compile_tree(node->target, reg);
  1183. if (r) return r;
  1184. #ifdef USE_SUBEXP_CALL
  1185. if (IS_ENCLOSE_CALLED(node)) {
  1186. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1187. r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
  1188. ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
  1189. else
  1190. r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
  1191. ? OP_MEMORY_END_REC : OP_MEMORY_END));
  1192. if (r) return r;
  1193. r = add_mem_num(reg, node->regnum);
  1194. if (r) return r;
  1195. r = add_opcode(reg, OP_RETURN);
  1196. }
  1197. else if (IS_ENCLOSE_RECURSION(node)) {
  1198. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1199. r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
  1200. else
  1201. r = add_opcode(reg, OP_MEMORY_END_REC);
  1202. if (r) return r;
  1203. r = add_mem_num(reg, node->regnum);
  1204. }
  1205. else
  1206. #endif
  1207. {
  1208. if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
  1209. r = add_opcode(reg, OP_MEMORY_END_PUSH);
  1210. else
  1211. r = add_opcode(reg, OP_MEMORY_END);
  1212. if (r) return r;
  1213. r = add_mem_num(reg, node->regnum);
  1214. }
  1215. break;
  1216. case ENCLOSE_STOP_BACKTRACK:
  1217. if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
  1218. QtfrNode* qn = NQTFR(node->target);
  1219. r = compile_tree_n_times(qn->target, qn->lower, reg);
  1220. if (r) return r;
  1221. len = compile_length_tree(qn->target, reg);
  1222. if (len < 0) return len;
  1223. r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
  1224. if (r) return r;
  1225. r = compile_tree(qn->target, reg);
  1226. if (r) return r;
  1227. r = add_opcode(reg, OP_POP);
  1228. if (r) return r;
  1229. r = add_opcode_rel_addr(reg, OP_JUMP,
  1230. -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
  1231. }
  1232. else {
  1233. r = add_opcode(reg, OP_PUSH_STOP_BT);
  1234. if (r) return r;
  1235. r = compile_tree(node->target, reg);
  1236. if (r) return r;
  1237. r = add_opcode(reg, OP_POP_STOP_BT);
  1238. }
  1239. break;
  1240. case ENCLOSE_CONDITION:
  1241. r = add_opcode(reg, OP_CONDITION);
  1242. if (r) return r;
  1243. r = add_mem_num(reg, node->regnum);
  1244. if (r) return r;
  1245. if (NTYPE(node->target) == NT_ALT) {
  1246. Node* x = node->target;
  1247. int len2;
  1248. len = compile_length_tree(NCAR(x), reg); /* yes-node */
  1249. if (len < 0) return len;
  1250. if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
  1251. x = NCDR(x);
  1252. len2 = compile_length_tree(NCAR(x), reg); /* no-node */
  1253. if (len2 < 0) return len2;
  1254. if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
  1255. x = node->target;
  1256. r = add_rel_addr(reg, len + SIZE_OP_JUMP);
  1257. if (r) return r;
  1258. r = compile_tree(NCAR(x), reg); /* yes-node */
  1259. if (r) return r;
  1260. r = add_opcode_rel_addr(reg, OP_JUMP, len2);
  1261. if (r) return r;
  1262. x = NCDR(x);
  1263. r = compile_tree(NCAR(x), reg); /* no-node */
  1264. }
  1265. else {
  1266. return ONIGERR_PARSER_BUG;
  1267. }
  1268. break;
  1269. case ENCLOSE_ABSENT:
  1270. len = compile_length_tree(node->target, reg);
  1271. if (len < 0) return len;
  1272. r = add_opcode(reg, OP_PUSH_ABSENT_POS);
  1273. if (r) return r;
  1274. r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
  1275. if (r) return r;
  1276. r = compile_tree(node->target, reg);
  1277. if (r) return r;
  1278. r = add_opcode(reg, OP_ABSENT_END);
  1279. break;
  1280. default:
  1281. return ONIGERR_TYPE_BUG;
  1282. break;
  1283. }
  1284. return r;
  1285. }
  1286. static int
  1287. compile_length_anchor_node(AnchorNode* node, regex_t* reg)
  1288. {
  1289. int len;
  1290. int tlen = 0;
  1291. if (node->target) {
  1292. tlen = compile_length_tree(node->target, reg);
  1293. if (tlen < 0) return tlen;
  1294. }
  1295. switch (node->type) {
  1296. case ANCHOR_PREC_READ:
  1297. len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
  1298. break;
  1299. case ANCHOR_PREC_READ_NOT:
  1300. len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
  1301. break;
  1302. case ANCHOR_LOOK_BEHIND:
  1303. len = SIZE_OP_LOOK_BEHIND + tlen;
  1304. break;
  1305. case ANCHOR_LOOK_BEHIND_NOT:
  1306. len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
  1307. break;
  1308. default:
  1309. len = SIZE_OPCODE;
  1310. break;
  1311. }
  1312. return len;
  1313. }
  1314. static int
  1315. compile_anchor_node(AnchorNode* node, regex_t* reg)
  1316. {
  1317. int r, len;
  1318. switch (node->type) {
  1319. case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
  1320. case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
  1321. case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
  1322. case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
  1323. case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
  1324. case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
  1325. case ANCHOR_WORD_BOUND:
  1326. if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
  1327. else r = add_opcode(reg, OP_WORD_BOUND);
  1328. break;
  1329. case ANCHOR_NOT_WORD_BOUND:
  1330. if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
  1331. else r = add_opcode(reg, OP_NOT_WORD_BOUND);
  1332. break;
  1333. #ifdef USE_WORD_BEGIN_END
  1334. case ANCHOR_WORD_BEGIN:
  1335. if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
  1336. else r = add_opcode(reg, OP_WORD_BEGIN);
  1337. break;
  1338. case ANCHOR_WORD_END:
  1339. if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
  1340. else r = add_opcode(reg, OP_WORD_END);
  1341. break;
  1342. #endif
  1343. case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
  1344. case ANCHOR_PREC_READ:
  1345. r = add_opcode(reg, OP_PUSH_POS);
  1346. if (r) return r;
  1347. r = compile_tree(node->target, reg);
  1348. if (r) return r;
  1349. r = add_opcode(reg, OP_POP_POS);
  1350. break;
  1351. case ANCHOR_PREC_READ_NOT:
  1352. len = compile_length_tree(node->target, reg);
  1353. if (len < 0) return len;
  1354. r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
  1355. if (r) return r;
  1356. r = compile_tree(node->target, reg);
  1357. if (r) return r;
  1358. r = add_opcode(reg, OP_FAIL_POS);
  1359. break;
  1360. case ANCHOR_LOOK_BEHIND:
  1361. {
  1362. int n;
  1363. r = add_opcode(reg, OP_LOOK_BEHIND);
  1364. if (r) return r;
  1365. if (node->char_len < 0) {
  1366. r = get_char_length_tree(node->target, reg, &n);
  1367. if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
  1368. }
  1369. else
  1370. n = node->char_len;
  1371. r = add_length(reg, n);
  1372. if (r) return r;
  1373. r = compile_tree(node->target, reg);
  1374. }
  1375. break;
  1376. case ANCHOR_LOOK_BEHIND_NOT:
  1377. {
  1378. int n;
  1379. len = compile_length_tree(node->target, reg);
  1380. r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
  1381. len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
  1382. if (r) return r;
  1383. if (node->char_len < 0) {
  1384. r = get_char_length_tree(node->target, reg, &n);
  1385. if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
  1386. }
  1387. else
  1388. n = node->char_len;
  1389. r = add_length(reg, n);
  1390. if (r) return r;
  1391. r = compile_tree(node->target, reg);
  1392. if (r) return r;
  1393. r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
  1394. }
  1395. break;
  1396. default:
  1397. return ONIGERR_TYPE_BUG;
  1398. break;
  1399. }
  1400. return r;
  1401. }
  1402. static int
  1403. compile_length_tree(Node* node, regex_t* reg)
  1404. {
  1405. int len, type, r;
  1406. type = NTYPE(node);
  1407. switch (type) {
  1408. case NT_LIST:
  1409. len = 0;
  1410. do {
  1411. r = compile_length_tree(NCAR(node), reg);
  1412. if (r < 0) return r;
  1413. len += r;
  1414. } while (IS_NOT_NULL(node = NCDR(node)));
  1415. r = len;
  1416. break;
  1417. case NT_ALT:
  1418. {
  1419. int n = 0;
  1420. len = 0;
  1421. do {
  1422. r = compile_length_tree(NCAR(node), reg);
  1423. if (r < 0) return r;
  1424. len += r;
  1425. n++;
  1426. } while (IS_NOT_NULL(node = NCDR(node)));
  1427. r = len;
  1428. r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
  1429. }
  1430. break;
  1431. case NT_STR:
  1432. if (NSTRING_IS_RAW(node))
  1433. r = compile_length_string_raw_node(NSTR(node), reg);
  1434. else
  1435. r = compile_length_string_node(node, reg);
  1436. break;
  1437. case NT_CCLASS:
  1438. r = compile_length_cclass_node(NCCLASS(node), reg);
  1439. break;
  1440. case NT_CTYPE:
  1441. case NT_CANY:
  1442. r = SIZE_OPCODE;
  1443. break;
  1444. case NT_BREF:
  1445. {
  1446. BRefNode* br = NBREF(node);
  1447. #ifdef USE_BACKREF_WITH_LEVEL
  1448. if (IS_BACKREF_NEST_LEVEL(br)) {
  1449. r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
  1450. SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
  1451. }
  1452. else
  1453. #endif
  1454. if (br->back_num == 1) {
  1455. r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
  1456. ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
  1457. }
  1458. else {
  1459. r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
  1460. }
  1461. }
  1462. break;
  1463. #ifdef USE_SUBEXP_CALL
  1464. case NT_CALL:
  1465. r = SIZE_OP_CALL;
  1466. break;
  1467. #endif
  1468. case NT_QTFR:
  1469. r = compile_length_quantifier_node(NQTFR(node), reg);
  1470. break;
  1471. case NT_ENCLOSE:
  1472. r = compile_length_enclose_node(NENCLOSE(node), reg);
  1473. break;
  1474. case NT_ANCHOR:
  1475. r = compile_length_anchor_node(NANCHOR(node), reg);
  1476. break;
  1477. default:
  1478. return ONIGERR_TYPE_BUG;
  1479. break;
  1480. }
  1481. return r;
  1482. }
  1483. static int
  1484. compile_tree(Node* node, regex_t* reg)
  1485. {
  1486. int n, type, len, pos, r = 0;
  1487. type = NTYPE(node);
  1488. switch (type) {
  1489. case NT_LIST:
  1490. do {
  1491. r = compile_tree(NCAR(node), reg);
  1492. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1493. break;
  1494. case NT_ALT:
  1495. {
  1496. Node* x = node;
  1497. len = 0;
  1498. do {
  1499. len += compile_length_tree(NCAR(x), reg);
  1500. if (NCDR(x) != NULL) {
  1501. len += SIZE_OP_PUSH + SIZE_OP_JUMP;
  1502. }
  1503. } while (IS_NOT_NULL(x = NCDR(x)));
  1504. pos = reg->used + len; /* goal position */
  1505. do {
  1506. len = compile_length_tree(NCAR(node), reg);
  1507. if (IS_NOT_NULL(NCDR(node))) {
  1508. r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
  1509. if (r) break;
  1510. }
  1511. r = compile_tree(NCAR(node), reg);
  1512. if (r) break;
  1513. if (IS_NOT_NULL(NCDR(node))) {
  1514. len = pos - (reg->used + SIZE_OP_JUMP);
  1515. r = add_opcode_rel_addr(reg, OP_JUMP, len);
  1516. if (r) break;
  1517. }
  1518. } while (IS_NOT_NULL(node = NCDR(node)));
  1519. }
  1520. break;
  1521. case NT_STR:
  1522. if (NSTRING_IS_RAW(node))
  1523. r = compile_string_raw_node(NSTR(node), reg);
  1524. else
  1525. r = compile_string_node(node, reg);
  1526. break;
  1527. case NT_CCLASS:
  1528. r = compile_cclass_node(NCCLASS(node), reg);
  1529. break;
  1530. case NT_CTYPE:
  1531. {
  1532. int op;
  1533. switch (NCTYPE(node)->ctype) {
  1534. case ONIGENC_CTYPE_WORD:
  1535. if (NCTYPE(node)->ascii_range != 0) {
  1536. if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
  1537. else op = OP_ASCII_WORD;
  1538. }
  1539. else {
  1540. if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
  1541. else op = OP_WORD;
  1542. }
  1543. break;
  1544. default:
  1545. return ONIGERR_TYPE_BUG;
  1546. break;
  1547. }
  1548. r = add_opcode(reg, op);
  1549. }
  1550. break;
  1551. case NT_CANY:
  1552. if (IS_MULTILINE(reg->options))
  1553. r = add_opcode(reg, OP_ANYCHAR_ML);
  1554. else
  1555. r = add_opcode(reg, OP_ANYCHAR);
  1556. break;
  1557. case NT_BREF:
  1558. {
  1559. BRefNode* br = NBREF(node);
  1560. #ifdef USE_BACKREF_WITH_LEVEL
  1561. if (IS_BACKREF_NEST_LEVEL(br)) {
  1562. r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
  1563. if (r) return r;
  1564. r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
  1565. if (r) return r;
  1566. r = add_length(reg, br->nest_level);
  1567. if (r) return r;
  1568. goto add_bacref_mems;
  1569. }
  1570. else
  1571. #endif
  1572. if (br->back_num == 1) {
  1573. n = br->back_static[0];
  1574. if (IS_IGNORECASE(reg->options)) {
  1575. r = add_opcode(reg, OP_BACKREFN_IC);
  1576. if (r) return r;
  1577. r = add_mem_num(reg, n);
  1578. }
  1579. else {
  1580. switch (n) {
  1581. case 1: r = add_opcode(reg, OP_BACKREF1); break;
  1582. case 2: r = add_opcode(reg, OP_BACKREF2); break;
  1583. default:
  1584. r = add_opcode(reg, OP_BACKREFN);
  1585. if (r) return r;
  1586. r = add_mem_num(reg, n);
  1587. break;
  1588. }
  1589. }
  1590. }
  1591. else {
  1592. int i;
  1593. int* p;
  1594. if (IS_IGNORECASE(reg->options)) {
  1595. r = add_opcode(reg, OP_BACKREF_MULTI_IC);
  1596. }
  1597. else {
  1598. r = add_opcode(reg, OP_BACKREF_MULTI);
  1599. }
  1600. if (r) return r;
  1601. #ifdef USE_BACKREF_WITH_LEVEL
  1602. add_bacref_mems:
  1603. #endif
  1604. r = add_length(reg, br->back_num);
  1605. if (r) return r;
  1606. p = BACKREFS_P(br);
  1607. for (i = br->back_num - 1; i >= 0; i--) {
  1608. r = add_mem_num(reg, p[i]);
  1609. if (r) return r;
  1610. }
  1611. }
  1612. }
  1613. break;
  1614. #ifdef USE_SUBEXP_CALL
  1615. case NT_CALL:
  1616. r = compile_call(NCALL(node), reg);
  1617. break;
  1618. #endif
  1619. case NT_QTFR:
  1620. r = compile_quantifier_node(NQTFR(node), reg);
  1621. break;
  1622. case NT_ENCLOSE:
  1623. r = compile_enclose_node(NENCLOSE(node), reg);
  1624. break;
  1625. case NT_ANCHOR:
  1626. r = compile_anchor_node(NANCHOR(node), reg);
  1627. break;
  1628. default:
  1629. #ifdef ONIG_DEBUG
  1630. fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
  1631. #endif
  1632. break;
  1633. }
  1634. return r;
  1635. }
  1636. #ifdef USE_NAMED_GROUP
  1637. static int
  1638. noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
  1639. {
  1640. int r = 0;
  1641. Node* node = *plink;
  1642. switch (NTYPE(node)) {
  1643. case NT_LIST:
  1644. case NT_ALT:
  1645. do {
  1646. r = noname_disable_map(&(NCAR(node)), map, counter);
  1647. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1648. break;
  1649. case NT_QTFR:
  1650. {
  1651. Node** ptarget = &(NQTFR(node)->target);
  1652. Node* old = *ptarget;
  1653. r = noname_disable_map(ptarget, map, counter);
  1654. if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
  1655. onig_reduce_nested_quantifier(node, *ptarget);
  1656. }
  1657. }
  1658. break;
  1659. case NT_ENCLOSE:
  1660. {
  1661. EncloseNode* en = NENCLOSE(node);
  1662. if (en->type == ENCLOSE_MEMORY) {
  1663. if (IS_ENCLOSE_NAMED_GROUP(en)) {
  1664. (*counter)++;
  1665. map[en->regnum].new_val = *counter;
  1666. en->regnum = *counter;
  1667. }
  1668. else if (en->regnum != 0) {
  1669. *plink = en->target;
  1670. en->target = NULL_NODE;
  1671. onig_node_free(node);
  1672. r = noname_disable_map(plink, map, counter);
  1673. break;
  1674. }
  1675. }
  1676. r = noname_disable_map(&(en->target), map, counter);
  1677. }
  1678. break;
  1679. case NT_ANCHOR:
  1680. if (NANCHOR(node)->target)
  1681. r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
  1682. break;
  1683. default:
  1684. break;
  1685. }
  1686. return r;
  1687. }
  1688. static int
  1689. renumber_node_backref(Node* node, GroupNumRemap* map, const int num_mem)
  1690. {
  1691. int i, pos, n, old_num;
  1692. int *backs;
  1693. BRefNode* bn = NBREF(node);
  1694. if (! IS_BACKREF_NAME_REF(bn))
  1695. return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
  1696. old_num = bn->back_num;
  1697. if (IS_NULL(bn->back_dynamic))
  1698. backs = bn->back_static;
  1699. else
  1700. backs = bn->back_dynamic;
  1701. for (i = 0, pos = 0; i < old_num; i++) {
  1702. if (backs[i] > num_mem) return ONIGERR_INVALID_BACKREF;
  1703. n = map[backs[i]].new_val;
  1704. if (n > 0) {
  1705. backs[pos] = n;
  1706. pos++;
  1707. }
  1708. }
  1709. bn->back_num = pos;
  1710. return 0;
  1711. }
  1712. static int
  1713. renumber_by_map(Node* node, GroupNumRemap* map, const int num_mem)
  1714. {
  1715. int r = 0;
  1716. switch (NTYPE(node)) {
  1717. case NT_LIST:
  1718. case NT_ALT:
  1719. do {
  1720. r = renumber_by_map(NCAR(node), map, num_mem);
  1721. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1722. break;
  1723. case NT_QTFR:
  1724. r = renumber_by_map(NQTFR(node)->target, map, num_mem);
  1725. break;
  1726. case NT_ENCLOSE:
  1727. {
  1728. EncloseNode* en = NENCLOSE(node);
  1729. if (en->type == ENCLOSE_CONDITION) {
  1730. if (en->regnum > num_mem) return ONIGERR_INVALID_BACKREF;
  1731. en->regnum = map[en->regnum].new_val;
  1732. }
  1733. r = renumber_by_map(en->target, map, num_mem);
  1734. }
  1735. break;
  1736. case NT_BREF:
  1737. r = renumber_node_backref(node, map, num_mem);
  1738. break;
  1739. case NT_ANCHOR:
  1740. if (NANCHOR(node)->target)
  1741. r = renumber_by_map(NANCHOR(node)->target, map, num_mem);
  1742. break;
  1743. default:
  1744. break;
  1745. }
  1746. return r;
  1747. }
  1748. static int
  1749. numbered_ref_check(Node* node)
  1750. {
  1751. int r = 0;
  1752. switch (NTYPE(node)) {
  1753. case NT_LIST:
  1754. case NT_ALT:
  1755. do {
  1756. r = numbered_ref_check(NCAR(node));
  1757. } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
  1758. break;
  1759. case NT_QTFR:
  1760. r = numbered_ref_check(NQTFR(node)->target);
  1761. break;
  1762. case NT_ENCLOSE:
  1763. r = numbered_ref_check(NENCLOSE(node)->target);
  1764. break;
  1765. case NT_BREF:
  1766. if (! IS_BACKREF_NAME_REF(NBREF(node)))
  1767. return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
  1768. break;
  1769. case NT_ANCHOR:
  1770. if (NANCHOR(node)->target)
  1771. r = numbered_ref_check(NANCHOR(node)->target);
  1772. break;
  1773. default:
  1774. break;
  1775. }
  1776. return r;
  1777. }
  1778. static int
  1779. disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
  1780. {
  1781. int r, i, pos, counter;
  1782. BitStatusType loc;
  1783. GroupNumRemap* map;
  1784. map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
  1785. CHECK_NULL_RETURN_MEMERR(map);
  1786. for (i = 1; i <= env->num_mem; i++) {
  1787. map[i].new_val = 0;
  1788. }
  1789. counter = 0;
  1790. r = noname_disable_map(root, map, &counter);
  1791. if (r != 0) return r;
  1792. r = renumber_by_map(*root, map, env->num_mem);
  1793. if (r != 0) return r;
  1794. for (i = 1, pos = 1; i <= env->num_mem; i++) {
  1795. if (map[i].new_val > 0) {
  1796. SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
  1797. pos++;
  1798. }
  1799. }
  1800. loc = env->capture_history;
  1801. BIT_STATUS_CLEAR(env->capture_history);
  1802. for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
  1803. if (BIT_STATUS_AT(loc, i)) {
  1804. BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
  1805. }
  1806. }
  1807. env->num_mem = env->num_named;
  1808. reg->num_mem = env->num_named;
  1809. return onig_renumber_name_table(reg, map);
  1810. }
  1811. #endif /* USE_NAMED_GROUP */
  1812. #ifdef USE_SUBEXP_CALL
  1813. static int
  1814. unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
  1815. {
  1816. int i, offset;
  1817. EncloseNode* en;
  1818. AbsAddrType addr;
  1819. for (i = 0; i < u

Large files files are truncated, but you can click here to view the full file