PageRenderTime 68ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/unicode_far/RegExp.cpp

https://bitbucket.org/regent/farmanager
C++ | 5054 lines | 4201 code | 724 blank | 129 comment | 755 complexity | f22272cb582641abcd3d3fd99c498d1c MD5 | raw file
  1. /*
  2. Copyright Š 2000 Konstantin Stupnik
  3. Copyright Š 2008 Far Group
  4. All rights reserved.
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions
  7. are met:
  8. 1. Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. 2. Redistributions in binary form must reproduce the above copyright
  11. notice, this list of conditions and the following disclaimer in the
  12. documentation and/or other materials provided with the distribution.
  13. 3. The name of the authors may not be used to endorse or promote products
  14. derived from this software without specific prior written permission.
  15. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  16. IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20. NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24. THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. Regular expressions support library.
  26. Syntax and semantics of regexps very close to
  27. syntax and semantics of perl regexps.
  28. */
  29. #include "headers.hpp"
  30. #pragma hdrstop
  31. #include "RegExp.hpp"
  32. #ifndef RE_FAR_MODE
  33. #ifndef UNICODE
  34. #ifndef RE_EXTERNAL_CTYPE
  35. #include <ctype.h>
  36. #endif
  37. #else
  38. #ifndef __LINUX
  39. #include <windows.h>
  40. #endif
  41. #endif
  42. #ifndef RE_NO_STRING_H
  43. #include <string.h>
  44. #endif
  45. #else
  46. #define malloc xf_malloc
  47. #define free xf_free
  48. #endif
  49. #ifdef RE_DEBUG
  50. #include <stdio.h>
  51. #ifdef dpf
  52. #undef dpf
  53. #endif
  54. #define dpf(x) printf x
  55. char *ops[]=
  56. {
  57. "opNone",
  58. "opLineStart",
  59. "opLineEnd",
  60. "opDataStart",
  61. "opDataEnd",
  62. "opWordBound",
  63. "opNotWordBound",
  64. "opType",
  65. "opNotType",
  66. "opCharAny",
  67. "opCharAnyAll",
  68. "opSymbol",
  69. "opNotSymbol",
  70. "opSymbolIgnoreCase",
  71. "opNotSymbolIgnoreCase",
  72. "opSymbolClass",
  73. "opOpenBracket",
  74. "opClosingBracket",
  75. "opAlternative",
  76. "opBackRef",
  77. #ifdef NAMEDBRACKETS
  78. "opNamedBracket",
  79. "opNamedBackRef",
  80. #endif
  81. "opRangesBegin",
  82. "opRange",
  83. "opMinRange",
  84. "opSymbolRange",
  85. "opSymbolMinRange",
  86. "opNotSymbolRange",
  87. "opNotSymbolMinRange",
  88. "opAnyRange",
  89. "opAnyMinRange",
  90. "opTypeRange",
  91. "opTypeMinRange",
  92. "opNotTypeRange",
  93. "opNotTypeMinRange",
  94. "opClassRange",
  95. "opClassMinRange",
  96. "opBracketRange",
  97. "opBracketMinRange",
  98. "opBackRefRange",
  99. "opBackRefMinRange",
  100. #ifdef NAMEDBRACKETS
  101. "opNamedRefRange",
  102. "opNamedRefMinRange",
  103. #endif
  104. "opRangesEnd",
  105. "opAssertionsBegin",
  106. "opLookAhead",
  107. "opNotLookAhead",
  108. "opLookBehind",
  109. "opNotLookBehind",
  110. "opAsserionsEnd",
  111. "opNoReturn",
  112. #ifdef RELIB
  113. "opLibCall",
  114. #endif
  115. "opRegExpEnd",
  116. };
  117. #else
  118. #define dpf(x)
  119. #endif
  120. #ifndef UNICODE
  121. #ifdef RE_STATIC_LOCALE
  122. #ifdef RE_EXTERNAL_CTYPE
  123. prechar RegExp::lc;
  124. prechar RegExp::uc;
  125. prechar RegExp::chartypes;
  126. #else
  127. int RegExp::ilc[256/sizeof(int)];
  128. int RegExp::iuc[256/sizeof(int)];
  129. int RegExp::ichartypes[256/sizeof(int)];
  130. rechar* RegExp::lc=(rechar*)RegExp::ilc;
  131. rechar* RegExp::uc=(rechar*)RegExp::iuc;
  132. rechar* RegExp::chartypes=(rechar*)RegExp::ichartypes;
  133. #endif
  134. int RegExp::icharbits[256/sizeof(int)];
  135. rechar* RegExp::charbits=(rechar*)RegExp::icharbits;
  136. #endif
  137. #endif
  138. #ifdef UNICODE
  139. #ifndef __LINUX
  140. #define ISDIGIT(c) iswdigit(c)
  141. #define ISSPACE(c) iswspace(c)
  142. #define ISWORD(c) (IsCharAlphaNumeric(c) || c=='_')
  143. #define ISLOWER(c) IsCharLower(c)
  144. #define ISUPPER(c) IsCharUpper(c)
  145. #define ISALPHA(c) IsCharAlpha(c)
  146. #define TOUPPER(c) ((rechar)(DWORD_PTR)CharUpper((LPTSTR)(DWORD_PTR)c))
  147. #define TOLOWER(c) ((rechar)(DWORD_PTR)CharLower((LPTSTR)(DWORD_PTR)c))
  148. #else
  149. #define ISDIGIT(c) iswdigit(c)
  150. #define ISSPACE(c) iswspace(c)
  151. #define ISWORD(c) (iswalnum(c) || c=='_')
  152. #define ISLOWER(c) iswlower(c)
  153. #define ISUPPER(c) iswupper(c)
  154. #define ISALPHA(c) iswalpha(c)
  155. #define TOUPPER(c) towupper(c)
  156. #define TOLOWER(c) towlower(c)
  157. #endif
  158. #define ISTYPE(c,t) isType(c,t)
  159. int isType(rechar chr,int type)
  160. {
  161. switch (type)
  162. {
  163. case TYPE_DIGITCHAR:return ISDIGIT(chr);
  164. case TYPE_SPACECHAR:return ISSPACE(chr);
  165. case TYPE_WORDCHAR: return ISWORD(chr);
  166. case TYPE_LOWCASE: return ISLOWER(chr);
  167. case TYPE_UPCASE: return ISUPPER(chr);
  168. case TYPE_ALPHACHAR:return ISALPHA(chr);
  169. }
  170. return false;
  171. }
  172. int ushlen(const rechar* str)
  173. {
  174. rechar ch;
  175. int len = -1;
  176. do
  177. {
  178. ch = str[len+1];
  179. len++;
  180. }
  181. while (ch);
  182. return len;
  183. }
  184. #define strlen ushlen
  185. struct UniSet
  186. {
  187. unsigned char* high[256];
  188. char types;
  189. char nottypes;
  190. char negative;
  191. UniSet()
  192. {
  193. ClearArray(high);
  194. types=0;
  195. nottypes=0;
  196. negative=0;
  197. }
  198. UniSet(const UniSet& src)
  199. {
  200. for (int i=0; i<256; i++)
  201. {
  202. if (src.high[i])
  203. {
  204. high[i]=new unsigned char[32];
  205. memcpy(high[i],src.high[i],32);
  206. }
  207. else
  208. {
  209. high[i]=nullptr;
  210. }
  211. }
  212. types=src.types;
  213. nottypes=src.nottypes;
  214. negative=src.negative;
  215. }
  216. UniSet& operator=(const UniSet& src)
  217. {
  218. if (this != &src)
  219. {
  220. for (int i=0; i<256; i++)
  221. {
  222. if (src.high[i])
  223. {
  224. if (!high[i])high[i]=new unsigned char[32];
  225. memcpy(high[i],src.high[i],32);
  226. }
  227. else
  228. {
  229. if (high[i])delete [] high[i];
  230. high[i]=nullptr;
  231. }
  232. }
  233. types=src.types;
  234. nottypes=src.nottypes;
  235. negative=src.negative;
  236. }
  237. return (*this);
  238. }
  239. void Reset()
  240. {
  241. for (int i=0; i<256; i++)
  242. {
  243. if (high[i])
  244. {
  245. delete [] high[i];
  246. high[i]=0;
  247. }
  248. }
  249. types=0;
  250. nottypes=0;
  251. negative=0;
  252. }
  253. struct Setter
  254. {
  255. UniSet& set;
  256. rechar idx;
  257. Setter(UniSet& s,rechar chr):set(s),idx(chr)
  258. {
  259. }
  260. void operator=(int val)
  261. {
  262. if (val)set.SetBit(idx);
  263. else set.ClearBit(idx);
  264. }
  265. bool operator!()const
  266. {
  267. return !set.GetBit(idx);
  268. }
  269. };
  270. const bool operator[](rechar idx)const
  271. {
  272. return GetBit(idx);
  273. }
  274. Setter operator[](rechar idx)
  275. {
  276. return Setter(*this,idx);
  277. }
  278. ~UniSet()
  279. {
  280. for (int i=0; i<256; i++)
  281. {
  282. if (high[i])delete [] high[i];
  283. }
  284. }
  285. bool CheckType(int t, rechar chr) const
  286. {
  287. switch (t)
  288. {
  289. case TYPE_DIGITCHAR:if (ISDIGIT(chr))return true; else break;
  290. case TYPE_SPACECHAR:if (ISSPACE(chr))return true; else break;
  291. case TYPE_WORDCHAR: if (ISWORD(chr)) return true; else break;
  292. case TYPE_LOWCASE: if (ISLOWER(chr))return true; else break;
  293. case TYPE_UPCASE: if (ISUPPER(chr))return true; else break;
  294. case TYPE_ALPHACHAR:if (ISALPHA(chr))return true; else break;
  295. }
  296. return false;
  297. }
  298. bool GetBit(rechar chr) const
  299. {
  300. if (types)
  301. {
  302. int t=TYPE_ALPHACHAR;
  303. while (t)
  304. {
  305. if (types&t)
  306. {
  307. if (CheckType(t,chr))
  308. return negative?false:true;
  309. }
  310. t>>=1;
  311. }
  312. }
  313. if (nottypes)
  314. {
  315. int t=TYPE_ALPHACHAR;
  316. while (t)
  317. {
  318. if (nottypes&t)
  319. {
  320. if (!CheckType(t,chr))
  321. return negative?false:true;
  322. }
  323. t>>=1;
  324. }
  325. }
  326. unsigned char h=(chr&0xff00)>>8;
  327. if (!high[h]) return negative?true:false;
  328. if (((high[h][(chr&0xff)>>3]&(1<<(chr&7)))?1:0))
  329. {
  330. return negative?false:true;
  331. }
  332. return negative?true:false;
  333. }
  334. void SetBit(rechar chr)
  335. {
  336. unsigned char h=(chr&0xff00)>>8;
  337. if (!high[h])
  338. {
  339. high[h]=new unsigned char[32];
  340. memset(high[h],0,32);
  341. }
  342. high[h][(chr&0xff)>>3]|=1<<(chr&7);
  343. }
  344. void ClearBit(rechar chr)
  345. {
  346. unsigned char h=(chr&0xff00)>>8;
  347. if (!high[h])
  348. {
  349. high[h]=new unsigned char[32];
  350. memset(high[h],0,32);
  351. }
  352. high[h][(chr&0xff)>>3]&=~(1<<(chr&7));
  353. }
  354. };
  355. #define GetBit(cls,chr) cls->GetBit(chr)
  356. #define SetBit(cls,chr) cls->SetBit(chr)
  357. #else
  358. #define ISDIGIT(c) ((chartypes[c]&TYPE_DIGITCHAR))
  359. #define ISSPACE(c) ((chartypes[c]&TYPE_SPACECHAR))
  360. #define ISWORD(c) ((chartypes[c]&TYPE_WORDCHAR))
  361. #define ISLOWER(c) ((chartypes[c]&TYPE_LOWCASE))
  362. #define ISUPPER(c) ((chartypes[c]&TYPE_UPCASE))
  363. #define ISALPHA(c) ((chartypes[c]&TYPE_ALPHACHAR))
  364. #define TOUPPER(c) uc[c]
  365. #define TOLOWER(c) lc[c]
  366. #define ISTYPE(c,t) (chartypes[c]&t)
  367. #endif //UNICODE
  368. enum REOp
  369. {
  370. opLineStart=0x1, // ^
  371. opLineEnd, // $
  372. opDataStart, // \A and ^ in single line mode
  373. opDataEnd, // \Z and $ in signle line mode
  374. opWordBound, // \b
  375. opNotWordBound, // \B
  376. opType, // \d\s\w\l\u\e
  377. opNotType, // \D\S\W\L\U\E
  378. opCharAny, // .
  379. opCharAnyAll, // . in single line mode
  380. opSymbol, // single char
  381. opNotSymbol, // [^c] negative charclass with one char
  382. opSymbolIgnoreCase, // symbol with IGNORE_CASE turned on
  383. opNotSymbolIgnoreCase, // [^c] with ignore case set.
  384. opSymbolClass, // [chars]
  385. opOpenBracket, // (
  386. opClosingBracket, // )
  387. opAlternative, // |
  388. opBackRef, // \1
  389. #ifdef NAMEDBRACKETS
  390. opNamedBracket, // (?{name}
  391. opNamedBackRef, // \p{name}
  392. #endif
  393. opRangesBegin, // for op type check
  394. opRange, // generic range
  395. opMinRange, // generic minimizing range
  396. opSymbolRange, // quantifier applied to single char
  397. opSymbolMinRange, // minimizing quantifier
  398. opNotSymbolRange, // [^x]
  399. opNotSymbolMinRange,
  400. opAnyRange, // .
  401. opAnyMinRange,
  402. opTypeRange, // \w, \d, \s
  403. opTypeMinRange,
  404. opNotTypeRange, // \W, \D, \S
  405. opNotTypeMinRange,
  406. opClassRange, // for char classes
  407. opClassMinRange,
  408. opBracketRange, // for brackets
  409. opBracketMinRange,
  410. opBackRefRange, // for backrefs
  411. opBackRefMinRange,
  412. #ifdef NAMEDBRACKETS
  413. opNamedRefRange,
  414. opNamedRefMinRange,
  415. #endif
  416. opRangesEnd, // end of ranges
  417. opAssertionsBegin,
  418. opLookAhead,
  419. opNotLookAhead,
  420. opLookBehind,
  421. opNotLookBehind,
  422. opAsserionsEnd,
  423. opNoReturn,
  424. #ifdef RELIB
  425. opLibCall,
  426. #endif
  427. opRegExpEnd
  428. };
  429. struct REOpCode
  430. {
  431. int op;
  432. REOpCode *next,*prev;
  433. #ifdef RE_DEBUG
  434. int srcpos;
  435. #endif
  436. #ifdef RE_NO_NEWARRAY
  437. static void OnCreate(void *ptr);
  438. static void OnDelete(void *ptr);
  439. #else
  440. REOpCode()
  441. {
  442. ClearStruct(*this);
  443. }
  444. ~REOpCode();
  445. #endif
  446. struct SBracket
  447. {
  448. REOpCode* nextalt;
  449. int index;
  450. REOpCode* pairindex;
  451. };
  452. struct SRange
  453. {
  454. union
  455. {
  456. SBracket bracket;
  457. int op;
  458. rechar symbol;
  459. #ifdef UNICODE
  460. UniSet *symbolclass;
  461. #else
  462. prechar symbolclass;
  463. #endif
  464. REOpCode* nextalt;
  465. int refindex;
  466. #ifdef NAMEDBRACKETS
  467. prechar refname;
  468. #endif
  469. int type;
  470. };
  471. int min,max;
  472. };
  473. struct SNamedBracket
  474. {
  475. REOpCode* nextalt;
  476. prechar name;
  477. REOpCode* pairindex;
  478. };
  479. struct SAssert
  480. {
  481. REOpCode* nextalt;
  482. int length;
  483. REOpCode* pairindex;
  484. };
  485. struct SAlternative
  486. {
  487. REOpCode* nextalt;
  488. REOpCode* endindex;
  489. };
  490. union
  491. {
  492. SRange range;
  493. SBracket bracket;
  494. #ifdef NAMEDBRACKETS
  495. SNamedBracket nbracket;
  496. #endif
  497. SAssert assert;
  498. SAlternative alternative;
  499. rechar symbol;
  500. #ifdef UNICODE
  501. UniSet *symbolclass;
  502. #else
  503. prechar symbolclass;
  504. #endif
  505. int refindex;
  506. #ifdef NAMEDBRACKETS
  507. prechar refname;
  508. #endif
  509. #ifdef RELIB
  510. prechar rename;
  511. #endif
  512. int type;
  513. };
  514. };
  515. #ifdef RE_NO_NEWARRAY
  516. void StateStackItem::OnCreate(void *ptr)
  517. {
  518. memset(ptr,0,sizeof(StateStackItem));
  519. }
  520. void REOpCode::OnCreate(void *ptr)
  521. {
  522. memset(ptr,0,sizeof(REOpCode));
  523. }
  524. void REOpCode::OnDelete(void *ptr)
  525. {
  526. REOpCode &o=*static_cast<REOpCode*>(ptr);
  527. switch (o.op)
  528. {
  529. case opSymbolClass:
  530. if (o.symbolclass)
  531. free(o.symbolclass);
  532. break;
  533. case opClassRange:
  534. case opClassMinRange:
  535. if (o.range.symbolclass)
  536. free(o.range.symbolclass);
  537. break;
  538. #ifdef NAMEDBRACKETS
  539. case opNamedBracket:
  540. if (o.nbracket.name)
  541. free(o.nbracket.name);
  542. break;
  543. case opNamedBackRef:
  544. if (o.refname)
  545. free(o.refname);
  546. break;
  547. #endif
  548. #ifdef RELIB
  549. case opLibCall:
  550. if (o.rename)
  551. free(o.rename);
  552. break;
  553. #endif
  554. }
  555. }
  556. void *RegExp::CreateArray(const unsigned int size, const unsigned int total,
  557. ON_CREATE_FUNC Create)
  558. {
  559. if (total && size)
  560. {
  561. /* record[0] - sizeof
  562. record[1] - total
  563. record[2] - array
  564. */
  565. unsigned char *record=static_cast<unsigned char*>
  566. (malloc(sizeof(unsigned int)*2+size*total));
  567. if (record)
  568. {
  569. unsigned char *array=record+2*sizeof(unsigned int);
  570. *reinterpret_cast<int*>(record)=size;
  571. *reinterpret_cast<int*>(record+sizeof(unsigned int))=total;
  572. if (Create)
  573. for (unsigned int f=0; f<total; ++f)
  574. Create(array+size*f);
  575. return array;
  576. }
  577. }
  578. return nullptr;
  579. }
  580. void RegExp::DeleteArray(void **array, ON_DELETE_FUNC Delete)
  581. {
  582. if (array && *array)
  583. {
  584. unsigned char *record=reinterpret_cast<unsigned char*>(*array)-
  585. 2*sizeof(unsigned int);
  586. if (Delete)
  587. {
  588. unsigned char *m=static_cast<unsigned char*>(*array);
  589. unsigned int size=*reinterpret_cast<int*>(record),
  590. total=*reinterpret_cast<int*>(record+sizeof(unsigned int));
  591. for (unsigned int f=0; f<total; ++f)
  592. Delete(m+size*f);
  593. }
  594. free(record);
  595. *array=nullptr;
  596. }
  597. }
  598. #else // RE_NO_NEWARRAY
  599. REOpCode::~REOpCode()
  600. {
  601. switch (op)
  602. {
  603. #ifdef UNICODE
  604. case opSymbolClass:delete symbolclass; break;
  605. #else
  606. case opSymbolClass:delete [] symbolclass; break;
  607. #endif
  608. #ifdef UNICODE
  609. case opClassRange:
  610. case opClassMinRange:delete range.symbolclass; break;
  611. #else
  612. case opClassRange:
  613. case opClassMinRange:delete [] range.symbolclass; break;
  614. #endif
  615. #ifdef NAMEDBRACKETS
  616. case opNamedBracket:delete [] nbracket.name; break;
  617. case opNamedBackRef:delete [] refname; break;
  618. #endif
  619. #ifdef RELIB
  620. case opLibCall:delete [] rename; break;
  621. #endif
  622. }
  623. }
  624. #endif // RE_NO_NEWARRAY
  625. void RegExp::Init(const prechar expr,int options)
  626. {
  627. //ClearStruct(*this);
  628. code=nullptr;
  629. brhandler=nullptr;
  630. brhdata=nullptr;
  631. #ifndef UNICODE
  632. #ifndef RE_STATIC_LOCALE
  633. #ifndef RE_EXTERNAL_CTYPE
  634. InitLocale();
  635. #endif //RE_EXTERNAL_CTYPE
  636. #endif//RE_STATIC_LOCALE
  637. #endif //UNICODE
  638. #ifdef NAMEDBRACKETS
  639. havenamedbrackets=0;
  640. #endif
  641. stack=&initstack[0];
  642. st=&stack[0];
  643. initstackpage.stack=stack;
  644. firstpage=lastpage=&initstackpage;
  645. firstpage->next=nullptr;
  646. firstpage->prev=nullptr;
  647. #ifdef UNICODE
  648. firstptr=new UniSet();
  649. #define first (*firstptr)
  650. #endif
  651. start=nullptr;
  652. end=nullptr;
  653. trimend=nullptr;
  654. Compile((const RECHAR*)expr,options);
  655. }
  656. RegExp::RegExp():
  657. code(nullptr),
  658. #ifdef NAMEDBRACKETS
  659. havenamedbrackets(0),
  660. #endif
  661. stack(&initstack[0]),
  662. st(&stack[0]),
  663. slashChar('/'),
  664. backslashChar('\\'),
  665. firstpage(&initstackpage),
  666. lastpage(&initstackpage),
  667. #ifdef UNICODE
  668. firstptr(new UniSet()),
  669. #endif
  670. errorcode(errNotCompiled),
  671. start(nullptr),
  672. end(nullptr),
  673. trimend(nullptr),
  674. #ifdef RE_DEBUG
  675. resrc(nullptr),
  676. #endif
  677. brhandler(nullptr),
  678. brhdata(nullptr)
  679. {
  680. #ifndef UNICODE
  681. #ifndef RE_STATIC_LOCALE
  682. #ifndef RE_EXTERNAL_CTYPE
  683. InitLocale();
  684. #endif
  685. #endif
  686. #endif//UNICODE
  687. initstackpage.stack=stack;
  688. firstpage->next=nullptr;
  689. firstpage->prev=nullptr;
  690. }
  691. RegExp::RegExp(const RECHAR* expr,int options)
  692. {
  693. slashChar='/';
  694. backslashChar='\\';
  695. #ifdef RE_DEBUG
  696. resrc=nullptr;
  697. #endif
  698. Init((const prechar)expr,options);
  699. }
  700. RegExp::~RegExp()
  701. {
  702. #ifdef RE_DEBUG
  703. #ifdef RE_NO_NEWARRAY
  704. if (resrc)
  705. free(resrc);
  706. #else
  707. delete [] resrc;
  708. #endif // RE_NO_NEWARRAY
  709. #endif
  710. if (code)
  711. {
  712. #ifdef RE_NO_NEWARRAY
  713. DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
  714. #else
  715. delete [] code;
  716. code=nullptr;
  717. #endif
  718. }
  719. CleanStack();
  720. #ifdef UNICODE
  721. delete firstptr;
  722. #endif
  723. }
  724. #ifndef UNICODE
  725. #ifndef RE_EXTERNAL_CTYPE
  726. void RegExp::InitLocale()
  727. {
  728. for (int i=0; i<256; i++)
  729. {
  730. lc[i]=tolower(i);
  731. uc[i]=toupper(i);
  732. }
  733. for (int i=0; i<256; i++)
  734. {
  735. char res=0;
  736. if (isalnum(i) || i=='_')res|=TYPE_WORDCHAR;
  737. if (isalpha(i))res|=TYPE_ALPHACHAR;
  738. if (isdigit(i))res|=TYPE_DIGITCHAR;
  739. if (isspace(i))res|=TYPE_SPACECHAR;
  740. if (lc[i]==i && uc[i]!=i)res|=TYPE_LOWCASE;
  741. if (uc[i]==i && lc[i]!=i)res|=TYPE_UPCASE;
  742. chartypes[i]=res;
  743. }
  744. memset(charbits,0,sizeof(charbits));
  745. for (int i=0,j=0,k=1; i<256; i++)
  746. {
  747. if (chartypes[i]&TYPE_DIGITCHAR) {charbits[j]|=k;}
  748. if (chartypes[i]&TYPE_SPACECHAR) {charbits[32+j]|=k;}
  749. if (chartypes[i]&TYPE_WORDCHAR) {charbits[64+j]|=k;}
  750. if (chartypes[i]&TYPE_LOWCASE) {charbits[96+j]|=k;}
  751. if (chartypes[i]&TYPE_UPCASE) {charbits[128+j]|=k;}
  752. if (chartypes[i]&TYPE_ALPHACHAR) {charbits[160+j]|=k;}
  753. k<<=1;
  754. if (k==256) {k=1; j++;}
  755. }
  756. }
  757. #endif
  758. #endif
  759. int RegExp::CalcLength(const prechar src,int srclength)
  760. {
  761. int length=3;//global brackets
  762. int brackets[MAXDEPTH];
  763. int count=0;
  764. int i,save;
  765. bracketscount=1;
  766. int inquote=0;
  767. for (i=0; i<srclength; i++,length++)
  768. {
  769. if (inquote && src[i]!=backslashChar && src[i+1]!='E')
  770. {
  771. continue;
  772. }
  773. if (src[i]==backslashChar)
  774. {
  775. i++;
  776. if (src[i]=='Q')inquote=1;
  777. if (src[i]=='E')inquote=0;
  778. if (src[i]=='x')
  779. {
  780. i++;
  781. if(isxdigit(src[i]))
  782. {
  783. for(int j=1,k=i;j<4;j++)
  784. {
  785. if(isxdigit(src[k+j]))
  786. {
  787. i++;
  788. }
  789. else
  790. {
  791. break;
  792. }
  793. }
  794. }
  795. else return SetError(errSyntax,i);
  796. }
  797. #ifdef NAMEDBRACKETS
  798. if (src[i]=='p')
  799. {
  800. i++;
  801. if (src[i]!='{')
  802. return SetError(errSyntax,i);
  803. i++;
  804. int save2=i;
  805. while (i<srclength && (ISWORD(src[i]) || ISSPACE(src[i])) && src[i]!='}')
  806. i++;
  807. if (i>=srclength)
  808. return SetError(errBrackets,save2);
  809. if (src[i]!='}' && !(ISWORD(src[i]) || ISSPACE(src[i])))
  810. return SetError(errSyntax,i);
  811. }
  812. #endif
  813. continue;
  814. }
  815. switch (src[i])
  816. {
  817. case '(':
  818. {
  819. brackets[count]=i;
  820. count++;
  821. if (count==MAXDEPTH)return SetError(errMaxDepth,i);
  822. if (src[i+1]=='?')
  823. {
  824. i+=2;
  825. #ifdef NAMEDBRACKETS
  826. if (src[i]=='{')
  827. {
  828. save=i;
  829. i++;
  830. while (i<srclength && (ISWORD(src[i]) || ISSPACE(src[i])) && src[i]!='}')
  831. i++;
  832. if (i>=srclength)
  833. return SetError(errBrackets,save);
  834. if (src[i]!='}' && !(ISWORD(src[i]) || ISSPACE(src[i])))
  835. return SetError(errSyntax,i);
  836. }
  837. #endif
  838. }
  839. else
  840. {
  841. bracketscount++;
  842. }
  843. break;
  844. }
  845. case ')':
  846. {
  847. count--;
  848. if (count<0)return SetError(errBrackets,i);
  849. break;
  850. }
  851. case '{':
  852. case '*':
  853. case '+':
  854. case '?':
  855. {
  856. length--;
  857. if (src[i]=='{')
  858. {
  859. save=i;
  860. while (i<srclength && src[i]!='}')i++;
  861. if (i>=srclength)return SetError(errBrackets,save);
  862. }
  863. if (src[i+1]=='?')i++;
  864. break;
  865. }
  866. case '[':
  867. {
  868. save=i;
  869. while (i<srclength && src[i]!=']')i++;
  870. if (i>=srclength)return SetError(errBrackets,save);
  871. break;
  872. }
  873. #ifdef RELIB
  874. case '%':
  875. {
  876. i++;
  877. save=i;
  878. while (i<srclength && src[i]!='%')i++;
  879. if (i>=srclength)return SetError(errBrackets,save-1);
  880. if (save==i)return SetError(errSyntax,save);
  881. } break;
  882. #endif
  883. }
  884. }
  885. if (count)
  886. {
  887. errorpos=brackets[0];
  888. errorcode=errBrackets;
  889. return 0;
  890. }
  891. return length;
  892. }
  893. int RegExp::Compile(const RECHAR* src,int options)
  894. {
  895. int srcstart=0,srclength/*=0*/,relength;
  896. if (options&OP_CPPMODE)
  897. {
  898. slashChar='\\';
  899. backslashChar='/';
  900. }
  901. else
  902. {
  903. slashChar='/';
  904. backslashChar='\\';
  905. }
  906. havefirst=0;
  907. #ifdef RE_NO_NEWARRAY
  908. DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
  909. #else
  910. if (code)delete [] code;
  911. code=nullptr;
  912. #endif
  913. if (options&OP_PERLSTYLE)
  914. {
  915. if (src[0]!=slashChar)return SetError(errSyntax,0);
  916. srcstart=1;
  917. srclength=1;
  918. while (src[srclength] && src[srclength]!=slashChar)
  919. {
  920. if (src[srclength]==backslashChar && src[srclength+1])
  921. {
  922. srclength++;
  923. }
  924. srclength++;
  925. }
  926. if (!src[srclength])
  927. {
  928. return SetError(errSyntax,srclength-1);
  929. }
  930. int i=srclength+1;
  931. srclength--;
  932. while (src[i])
  933. {
  934. switch (src[i])
  935. {
  936. case 'i':options|=OP_IGNORECASE; break;
  937. case 's':options|=OP_SINGLELINE; break;
  938. case 'm':options|=OP_MULTILINE; break;
  939. case 'x':options|=OP_XTENDEDSYNTAX; break;
  940. case 'o':options|=OP_OPTIMIZE; break;
  941. default:return SetError(errOptions,i);
  942. }
  943. i++;
  944. }
  945. }
  946. else
  947. {
  948. srclength=(int)strlen(src);
  949. }
  950. ignorecase=options&OP_IGNORECASE?1:0;
  951. relength=CalcLength((const prechar)src+srcstart,srclength);
  952. if (!relength)
  953. {
  954. return 0;
  955. }
  956. #ifdef RE_NO_NEWARRAY
  957. code=static_cast<REOpCode*>
  958. (CreateArray(sizeof(REOpCode), relength, REOpCode::OnCreate));
  959. #else
  960. code=new REOpCode[relength];
  961. memset(code,0,sizeof(REOpCode)*relength);
  962. #endif
  963. for (int i=0; i<relength; i++)
  964. {
  965. code[i].next=i<relength-1?code+i+1:0;
  966. code[i].prev=i>0?code+i-1:0;
  967. }
  968. int result=InnerCompile((const prechar)src+srcstart,srclength,options);
  969. if (!result)
  970. {
  971. #ifdef RE_NO_NEWARRAY
  972. DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
  973. #else
  974. delete [] code;
  975. code=nullptr;
  976. #endif
  977. }
  978. else
  979. {
  980. errorcode=errNone;
  981. minlength=0;
  982. if (options&OP_OPTIMIZE)Optimize();
  983. }
  984. return result;
  985. }
  986. int RegExp::GetNum(const prechar src,int& i)
  987. {
  988. int res=0;//atoi((const char*)src+i);
  989. while (ISDIGIT(src[i]))
  990. {
  991. res*=10;
  992. res+=src[i]-'0';
  993. i++;
  994. }
  995. return res;
  996. }
  997. static int CalcPatternLength(PREOpCode from,PREOpCode to)
  998. {
  999. int len=0;
  1000. int altcnt=0;
  1001. int altlen=-1;
  1002. for (; from->prev!=to; from=from->next)
  1003. {
  1004. switch (from->op)
  1005. {
  1006. //zero width
  1007. case opLineStart:
  1008. case opLineEnd:
  1009. case opDataStart:
  1010. case opDataEnd:
  1011. case opWordBound:
  1012. case opNotWordBound:continue;
  1013. case opType:
  1014. case opNotType:
  1015. case opCharAny:
  1016. case opCharAnyAll:
  1017. case opSymbol:
  1018. case opNotSymbol:
  1019. case opSymbolIgnoreCase:
  1020. case opNotSymbolIgnoreCase:
  1021. case opSymbolClass:
  1022. len++;
  1023. altcnt++;
  1024. continue;
  1025. #ifdef NAMEDBRACKETS
  1026. case opNamedBracket:
  1027. #endif
  1028. case opOpenBracket:
  1029. {
  1030. int l=CalcPatternLength(from->next,from->bracket.pairindex->prev);
  1031. if (l==-1)return -1;
  1032. len+=l;
  1033. altcnt+=l;
  1034. from=from->bracket.pairindex;
  1035. continue;
  1036. }
  1037. case opClosingBracket:
  1038. break;
  1039. case opAlternative:
  1040. if (altlen!=-1 && altcnt!=altlen)return -1;
  1041. altlen=altcnt;
  1042. altcnt=0;
  1043. continue;
  1044. case opBackRef:
  1045. #ifdef NAMEDBRACKETS
  1046. case opNamedBackRef:
  1047. #endif
  1048. return -1;
  1049. case opRangesBegin:
  1050. case opRange:
  1051. case opMinRange:
  1052. case opSymbolRange:
  1053. case opSymbolMinRange:
  1054. case opNotSymbolRange:
  1055. case opNotSymbolMinRange:
  1056. case opAnyRange:
  1057. case opAnyMinRange:
  1058. case opTypeRange:
  1059. case opTypeMinRange:
  1060. case opNotTypeRange:
  1061. case opNotTypeMinRange:
  1062. case opClassRange:
  1063. case opClassMinRange:
  1064. if (from->range.min!=from->range.max)return -1;
  1065. len+=from->range.min;
  1066. altcnt+=from->range.min;
  1067. continue;
  1068. case opBracketRange:
  1069. case opBracketMinRange:
  1070. {
  1071. if (from->range.min!=from->range.max)return -1;
  1072. int l=CalcPatternLength(from->next,from->bracket.pairindex->prev);
  1073. if (l==-1)return -1;
  1074. len+=from->range.min*l;
  1075. altcnt+=from->range.min*l;
  1076. from=from->bracket.pairindex;
  1077. continue;
  1078. }
  1079. case opBackRefRange:
  1080. case opBackRefMinRange:
  1081. #ifdef NAMEDBRACKETS
  1082. case opNamedRefRange:
  1083. case opNamedRefMinRange:
  1084. #endif
  1085. return -1;
  1086. case opRangesEnd:
  1087. case opAssertionsBegin:
  1088. case opLookAhead:
  1089. case opNotLookAhead:
  1090. case opLookBehind:
  1091. case opNotLookBehind:
  1092. from=from->assert.pairindex;
  1093. continue;
  1094. case opAsserionsEnd:
  1095. case opNoReturn:
  1096. continue;
  1097. #ifdef RELIB
  1098. case opLibCall:
  1099. return -1;
  1100. #endif
  1101. }
  1102. }
  1103. if (altlen!=-1 && altlen!=altcnt)return -1;
  1104. return altlen==-1?len:altlen;
  1105. }
  1106. int RegExp::InnerCompile(const prechar src,int srclength,int options)
  1107. {
  1108. int i,j;
  1109. PREOpCode brackets[MAXDEPTH];
  1110. // current brackets depth
  1111. // one place reserved for surrounding 'main' brackets
  1112. int brdepth=1;
  1113. // compiling interior of lookbehind
  1114. // used to apply restrictions of lookbehind
  1115. int lookbehind=0;
  1116. // counter of normal brackets
  1117. int brcount=0;
  1118. // counter of closed brackets
  1119. // used to check correctness of backreferences
  1120. bool closedbrackets[MAXDEPTH];
  1121. // quoting is active
  1122. int inquote=0;
  1123. maxbackref=0;
  1124. #ifdef UNICODE
  1125. UniSet *tmpclass;
  1126. #else
  1127. rechar tmpclass[32];
  1128. int *itmpclass=(int*)tmpclass;
  1129. #endif
  1130. code->op=opOpenBracket;
  1131. code->bracket.index=0;
  1132. #ifdef NAMEDBRACKETS
  1133. MatchHash h;
  1134. SMatch m;
  1135. #endif
  1136. int pos=1;
  1137. register PREOpCode op;//=code;
  1138. brackets[0]=code;
  1139. #ifdef RE_DEBUG
  1140. #ifdef RE_NO_NEWARRAY
  1141. resrc=static_cast<rechar*>(malloc(sizeof(rechar)*(srclength+4)));
  1142. #else
  1143. resrc=new rechar[srclength+4];
  1144. #endif // RE_NO_NEWARRAY
  1145. resrc[0]='(';
  1146. resrc[1]=0;
  1147. memcpy(resrc+1,src,srclength*sizeof(rechar));
  1148. resrc[srclength+1]=')';
  1149. resrc[srclength+2]=27;
  1150. resrc[srclength+3]=0;
  1151. #endif
  1152. havelookahead=0;
  1153. for (i=0; i<srclength; i++)
  1154. {
  1155. op=code+pos;
  1156. pos++;
  1157. #ifdef RE_DEBUG
  1158. op->srcpos=i+1;
  1159. #endif
  1160. if (inquote && src[i]!=backslashChar)
  1161. {
  1162. op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
  1163. op->symbol=ignorecase?TOLOWER(src[i]):src[i];
  1164. if (ignorecase && TOUPPER(op->symbol)==op->symbol)op->op=opSymbol;
  1165. continue;
  1166. }
  1167. if (src[i]==backslashChar)
  1168. {
  1169. i++;
  1170. if (inquote && src[i]!='E')
  1171. {
  1172. op->op=opSymbol;
  1173. op->symbol=backslashChar;
  1174. op=code+pos;
  1175. pos++;
  1176. op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
  1177. op->symbol=ignorecase?TOLOWER(src[i]):src[i];
  1178. if (ignorecase && TOUPPER(op->symbol)==op->symbol)op->op=opSymbol;
  1179. continue;
  1180. }
  1181. op->op=opType;
  1182. switch (src[i])
  1183. {
  1184. case 'Q':inquote=1; pos--; continue;
  1185. case 'E':inquote=0; pos--; continue;
  1186. case 'b':op->op=opWordBound; continue;
  1187. case 'B':op->op=opNotWordBound; continue;
  1188. case 'D':op->op=opNotType;
  1189. case 'd':op->type=TYPE_DIGITCHAR; continue;
  1190. case 'S':op->op=opNotType;
  1191. case 's':op->type=TYPE_SPACECHAR; continue;
  1192. case 'W':op->op=opNotType;
  1193. case 'w':op->type=TYPE_WORDCHAR; continue;
  1194. case 'U':op->op=opNotType;
  1195. case 'u':op->type=TYPE_UPCASE; continue;
  1196. case 'L':op->op=opNotType;
  1197. case 'l':op->type=TYPE_LOWCASE; continue;
  1198. case 'I':op->op=opNotType;
  1199. case 'i':op->type=TYPE_ALPHACHAR; continue;
  1200. case 'A':op->op=opDataStart; continue;
  1201. case 'Z':op->op=opDataEnd; continue;
  1202. case 'n':op->op=opSymbol; op->symbol='\n'; continue;
  1203. case 'r':op->op=opSymbol; op->symbol='\r'; continue;
  1204. case 't':op->op=opSymbol; op->symbol='\t'; continue;
  1205. case 'f':op->op=opSymbol; op->symbol='\f'; continue;
  1206. case 'e':op->op=opSymbol; op->symbol=27; continue;
  1207. case 'O':op->op=opNoReturn; continue;
  1208. #ifdef NAMEDBRACKETS
  1209. case 'p':
  1210. {
  1211. op->op=opNamedBackRef;
  1212. i++;
  1213. if (src[i]!='{')return SetError(errSyntax,i);
  1214. int len=0; i++;
  1215. while (src[i+len]!='}')len++;
  1216. if (len>0)
  1217. {
  1218. #ifdef RE_NO_NEWARRAY
  1219. op->refname=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
  1220. #else
  1221. op->refname=new rechar[len+1];
  1222. #endif
  1223. memcpy(op->refname,src+i,len*sizeof(rechar));
  1224. op->refname[len]=0;
  1225. if (!h.Exists((char*)op->refname))
  1226. {
  1227. return SetError(errReferenceToUndefinedNamedBracket,i);
  1228. }
  1229. i+=len;
  1230. }
  1231. else
  1232. {
  1233. return SetError(errSyntax,i);
  1234. }
  1235. } continue;
  1236. #endif
  1237. case 'x':
  1238. {
  1239. i++;
  1240. if (i>=srclength)return SetError(errSyntax,i-1);
  1241. if(isxdigit(src[i]))
  1242. {
  1243. int c=TOLOWER(src[i])-'0';
  1244. if (c>9)c-='a'-'0'-10;
  1245. op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
  1246. op->symbol=c;
  1247. for(int j=1,k=i;j<4 && k+j<srclength;j++)
  1248. {
  1249. if(isxdigit(src[k+j]))
  1250. {
  1251. i++;
  1252. c=TOLOWER(src[k+j])-'0';
  1253. if (c>9)c-='a'-'0'-10;
  1254. op->symbol<<=4;
  1255. op->symbol|=c;
  1256. }
  1257. else
  1258. {
  1259. break;
  1260. }
  1261. }
  1262. if (ignorecase)
  1263. {
  1264. op->symbol=TOLOWER(op->symbol);
  1265. if (TOUPPER(op->symbol)==TOLOWER(op->symbol))
  1266. {
  1267. op->op=opSymbol;
  1268. }
  1269. }
  1270. }
  1271. else return SetError(errSyntax,i);
  1272. continue;
  1273. }
  1274. default:
  1275. {
  1276. if (ISDIGIT(src[i]))
  1277. {
  1278. int save=i;
  1279. op->op=opBackRef;
  1280. op->refindex=GetNum(src,i); i--;
  1281. if (op->refindex<=0 || op->refindex>brcount || !closedbrackets[op->refindex])
  1282. {
  1283. return SetError(errInvalidBackRef,save-1);
  1284. }
  1285. if (op->refindex>maxbackref)maxbackref=op->refindex;
  1286. }
  1287. else
  1288. {
  1289. if (options&OP_STRICT && ISALPHA(src[i]))
  1290. {
  1291. return SetError(errInvalidEscape,i-1);
  1292. }
  1293. op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
  1294. op->symbol=ignorecase?TOLOWER(src[i]):src[i];
  1295. if (TOLOWER(op->symbol)==TOUPPER(op->symbol))
  1296. {
  1297. op->op=opSymbol;
  1298. }
  1299. }
  1300. }
  1301. }
  1302. continue;
  1303. }
  1304. switch (src[i])
  1305. {
  1306. case '.':
  1307. {
  1308. if (options&OP_SINGLELINE)
  1309. {
  1310. op->op=opCharAnyAll;
  1311. }
  1312. else
  1313. {
  1314. op->op=opCharAny;
  1315. }
  1316. continue;
  1317. }
  1318. case '^':
  1319. {
  1320. if (options&OP_MULTILINE)
  1321. {
  1322. op->op=opLineStart;
  1323. }
  1324. else
  1325. {
  1326. op->op=opDataStart;
  1327. }
  1328. continue;
  1329. }
  1330. case '$':
  1331. {
  1332. if (options&OP_MULTILINE)
  1333. {
  1334. op->op=opLineEnd;
  1335. }
  1336. else
  1337. {
  1338. op->op=opDataEnd;
  1339. }
  1340. continue;
  1341. }
  1342. case '|':
  1343. {
  1344. if (brackets[brdepth-1]->op==opAlternative)
  1345. {
  1346. brackets[brdepth-1]->alternative.nextalt=op;
  1347. }
  1348. else
  1349. {
  1350. if (brackets[brdepth-1]->op==opOpenBracket)
  1351. {
  1352. brackets[brdepth-1]->bracket.nextalt=op;
  1353. }
  1354. else
  1355. {
  1356. brackets[brdepth-1]->assert.nextalt=op;
  1357. }
  1358. }
  1359. if (brdepth==MAXDEPTH)return SetError(errMaxDepth,i);
  1360. brackets[brdepth++]=op;
  1361. op->op=opAlternative;
  1362. continue;
  1363. }
  1364. case '(':
  1365. {
  1366. op->op=opOpenBracket;
  1367. if (src[i+1]=='?')
  1368. {
  1369. i+=2;
  1370. switch (src[i])
  1371. {
  1372. case ':':op->bracket.index=-1; break;
  1373. case '=':op->op=opLookAhead; havelookahead=1; break;
  1374. case '!':op->op=opNotLookAhead; havelookahead=1; break;
  1375. case '<':
  1376. {
  1377. i++;
  1378. if (src[i]=='=')
  1379. {
  1380. op->op=opLookBehind;
  1381. }
  1382. else if (src[i]=='!')
  1383. {
  1384. op->op=opNotLookBehind;
  1385. }
  1386. else return SetError(errSyntax,i);
  1387. lookbehind++;
  1388. } break;
  1389. #ifdef NAMEDBRACKETS
  1390. case '{':
  1391. {
  1392. op->op=opNamedBracket;
  1393. havenamedbrackets=1;
  1394. int len=0;
  1395. i++;
  1396. while (src[i+len]!='}')len++;
  1397. if (len>0)
  1398. {
  1399. #ifdef RE_NO_NEWARRAY
  1400. op->nbracket.name=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
  1401. #else
  1402. op->nbracket.name=new rechar[len+1];
  1403. #endif
  1404. memcpy(op->nbracket.name,src+i,len*sizeof(rechar));
  1405. op->nbracket.name[len]=0;
  1406. //h.SetItem((char*)op->nbracket.name,m);
  1407. }
  1408. else
  1409. {
  1410. op->op=opOpenBracket;
  1411. op->bracket.index=-1;
  1412. }
  1413. i+=len;
  1414. } break;
  1415. #endif
  1416. default:
  1417. {
  1418. return SetError(errSyntax,i);
  1419. }
  1420. }
  1421. }
  1422. else
  1423. {
  1424. brcount++;
  1425. closedbrackets[brcount]=false;
  1426. op->bracket.index=brcount;
  1427. }
  1428. brackets[brdepth]=op;
  1429. brdepth++;
  1430. continue;
  1431. }
  1432. case ')':
  1433. {
  1434. op->op=opClosingBracket;
  1435. brdepth--;
  1436. while (brackets[brdepth]->op==opAlternative)
  1437. {
  1438. brackets[brdepth]->alternative.endindex=op;
  1439. brdepth--;
  1440. }
  1441. switch (brackets[brdepth]->op)
  1442. {
  1443. case opOpenBracket:
  1444. {
  1445. op->bracket.pairindex=brackets[brdepth];
  1446. brackets[brdepth]->bracket.pairindex=op;
  1447. op->bracket.index=brackets[brdepth]->bracket.index;
  1448. if (op->bracket.index!=-1)
  1449. {
  1450. closedbrackets[op->bracket.index]=true;
  1451. }
  1452. break;
  1453. }
  1454. #ifdef NAMEDBRACKETS
  1455. case opNamedBracket:
  1456. {
  1457. op->nbracket.pairindex=brackets[brdepth];
  1458. brackets[brdepth]->nbracket.pairindex=op;
  1459. op->nbracket.name=brackets[brdepth]->nbracket.name;
  1460. h.SetItem((char*)op->nbracket.name,m);
  1461. break;
  1462. }
  1463. #endif
  1464. case opLookBehind:
  1465. case opNotLookBehind:
  1466. {
  1467. lookbehind--;
  1468. int l=CalcPatternLength(brackets[brdepth]->next,op->prev);
  1469. if (l==-1)return SetError(errVariableLengthLookBehind,i);
  1470. brackets[brdepth]->assert.length=l;
  1471. }// there is no break and this is correct!
  1472. case opLookAhead:
  1473. case opNotLookAhead:
  1474. {
  1475. op->assert.pairindex=brackets[brdepth];
  1476. brackets[brdepth]->assert.pairindex=op;
  1477. break;
  1478. }
  1479. }
  1480. continue;
  1481. }
  1482. case '[':
  1483. {
  1484. i++;
  1485. int negative=0;
  1486. if (src[i]=='^')
  1487. {
  1488. negative=1;
  1489. i++;
  1490. }
  1491. int lastchar=0;
  1492. int classsize=0;
  1493. op->op=opSymbolClass;
  1494. //op->symbolclass=new rechar[32];
  1495. //memset(op->symbolclass,0,32);
  1496. #ifdef UNICODE
  1497. op->symbolclass=new UniSet();
  1498. tmpclass=op->symbolclass;
  1499. #define IF_U(t)
  1500. #else
  1501. for (j=0; j<8; j++)itmpclass[j]=0;
  1502. int classindex=0;
  1503. #define IF_U(t) t
  1504. #endif
  1505. for (; src[i]!=']'; i++)
  1506. {
  1507. if (src[i]==backslashChar)
  1508. {
  1509. i++;
  1510. int isnottype=0;
  1511. int type=0;
  1512. lastchar=0;
  1513. switch (src[i])
  1514. {
  1515. case 'D':isnottype=1;
  1516. case 'd':type=TYPE_DIGITCHAR; IF_U(classindex=0); break;
  1517. case 'W':isnottype=1;
  1518. case 'w':type=TYPE_WORDCHAR; IF_U(classindex=64); break;
  1519. case 'S':isnottype=1;
  1520. case 's':type=TYPE_SPACECHAR; IF_U(classindex=32); break;
  1521. case 'L':isnottype=1;
  1522. case 'l':type=TYPE_LOWCASE; IF_U(lassindex=96); break;
  1523. case 'U':isnottype=1;
  1524. case 'u':type=TYPE_UPCASE; IF_U(classindex=128); break;
  1525. case 'I':isnottype=1;
  1526. case 'i':type=TYPE_ALPHACHAR; IF_U(classindex=160); break;
  1527. case 'n':lastchar='\n'; break;
  1528. case 'r':lastchar='\r'; break;
  1529. case 't':lastchar='\t'; break;
  1530. case 'f':lastchar='\f'; break;
  1531. case 'e':lastchar=27; break;
  1532. case 'x':
  1533. {
  1534. i++;
  1535. if (i>=srclength)return SetError(errSyntax,i-1);
  1536. if (isxdigit(src[i]))
  1537. {
  1538. int c=TOLOWER(src[i])-'0';
  1539. if (c>9)c-='a'-'0'-10;
  1540. lastchar=c;
  1541. for(int j=1,k=i;j<4 && k+j<srclength;j++)
  1542. {
  1543. if (isxdigit(src[k+j]))
  1544. {
  1545. i++;
  1546. c=TOLOWER(src[k+j])-'0';
  1547. if (c>9)c-='a'-'0'-10;
  1548. lastchar<<=4;
  1549. lastchar|=c;
  1550. }
  1551. else
  1552. {
  1553. break;
  1554. }
  1555. }
  1556. dpf(("Last char=%c(%02x)\n",lastchar,lastchar));
  1557. }
  1558. else return SetError(errSyntax,i);
  1559. break;
  1560. }
  1561. default:
  1562. {
  1563. if (options&OP_STRICT && ISALPHA(src[i]))
  1564. {
  1565. return SetError(errInvalidEscape,i-1);
  1566. }
  1567. lastchar=src[i];
  1568. }
  1569. }
  1570. if (type)
  1571. {
  1572. #ifdef UNICODE
  1573. if (isnottype)
  1574. {
  1575. tmpclass->nottypes|=type;
  1576. }
  1577. else
  1578. {
  1579. tmpclass->types|=type;
  1580. }
  1581. #else
  1582. isnottype=isnottype?0xffffffff:0;
  1583. int *b=(int*)(charbits+classindex);
  1584. for (j=0; j<8; j++)
  1585. {
  1586. itmpclass[j]|=b[j]^isnottype;
  1587. }
  1588. #endif
  1589. classsize=257;
  1590. //for(int j=0;j<32;j++)op->symbolclass[j]|=charbits[classindex+j]^isnottype;
  1591. //classsize+=charsizes[classindex>>5];
  1592. //int setbit;
  1593. /*for(int j=0;j<256;j++)
  1594. {
  1595. setbit=(chartypes[j]^isnottype)&type;
  1596. if(setbit)
  1597. {
  1598. if(ignorecase)
  1599. {
  1600. SetBit(op->symbolclass,lc[j]);
  1601. SetBit(op->symbolclass,uc[j]);
  1602. }else
  1603. {
  1604. SetBit(op->symbolclass,j);
  1605. }
  1606. classsize++;
  1607. }
  1608. }*/
  1609. }
  1610. else
  1611. {
  1612. if (options&OP_IGNORECASE)
  1613. {
  1614. SetBit(tmpclass,TOLOWER(lastchar));
  1615. SetBit(tmpclass,TOUPPER(lastchar));
  1616. }
  1617. else
  1618. {
  1619. SetBit(tmpclass,lastchar);
  1620. }
  1621. classsize++;
  1622. }
  1623. continue;
  1624. }
  1625. if (src[i]=='-')
  1626. {
  1627. if (lastchar && src[i+1]!=']')
  1628. {
  1629. int to=src[i+1];
  1630. if (to==backslashChar)
  1631. {
  1632. to=src[i+2];
  1633. if (to=='x')
  1634. {
  1635. i+=2;
  1636. to=TOLOWER(src[i+1]);
  1637. if(isxdigit(to))
  1638. {
  1639. to-='0';
  1640. if (to>9)to-='a'-'0'-10;
  1641. for(int j=1,k=(i+1);j<4 && k+j<srclength;j++)
  1642. {
  1643. int c=TOLOWER(src[k+j]);
  1644. if(isxdigit(c))
  1645. {
  1646. i++;
  1647. c-='0';
  1648. if (c>9)c-='a'-'0'-10;
  1649. to<<=4;
  1650. to|=c;
  1651. }
  1652. else
  1653. {
  1654. break;
  1655. }
  1656. }
  1657. }
  1658. else return SetError(errSyntax,i);
  1659. }
  1660. else
  1661. {
  1662. SetBit(tmpclass,'-');
  1663. classsize++;
  1664. continue;
  1665. }
  1666. }
  1667. i++;
  1668. dpf(("from %d to %d\n",lastchar,to));
  1669. for (j=lastchar; j<=to; j++)
  1670. {
  1671. if (ignorecase)
  1672. {
  1673. SetBit(tmpclass,TOLOWER(j));
  1674. SetBit(tmpclass,TOUPPER(j));
  1675. }
  1676. else
  1677. {
  1678. SetBit(tmpclass,j);
  1679. }
  1680. classsize++;
  1681. }
  1682. continue;
  1683. }
  1684. }
  1685. lastchar=src[i];
  1686. if (ignorecase)
  1687. {
  1688. SetBit(tmpclass,TOLOWER(lastchar));
  1689. SetBit(tmpclass,TOUPPER(lastchar));
  1690. }
  1691. else
  1692. {
  1693. SetBit(tmpclass,lastchar);
  1694. }
  1695. classsize++;
  1696. }
  1697. if (negative && classsize>1)
  1698. {
  1699. #ifdef UNICODE
  1700. tmpclass->negative=negative;
  1701. #else
  1702. for (int jj=0; jj<8; jj++)itmpclass[jj]^=0xffffffff;
  1703. #endif
  1704. //for(int j=0;j<32;j++)op->symbolclass[j]^=0xff;
  1705. }
  1706. if (classsize==1)
  1707. {
  1708. #ifdef UNICODE
  1709. delete op->symbolclass;
  1710. op->symbolclass=0;
  1711. tmpclass=0;
  1712. #endif
  1713. op->op=negative?opNotSymbol:opSymbol;
  1714. if (ignorecase)
  1715. {
  1716. op->op+=2;
  1717. op->symbol=TOLOWER(lastchar);
  1718. }
  1719. else
  1720. {
  1721. op->symbol=lastchar;
  1722. }
  1723. }
  1724. #ifdef UNICODE
  1725. if (tmpclass)tmpclass->negative=negative;
  1726. #else
  1727. else if (classsize==256 && !negative)
  1728. {
  1729. op->op=options&OP_SINGLELINE?opCharAnyAll:opCharAny;
  1730. }
  1731. else
  1732. {
  1733. #ifdef RE_NO_NEWARRAY
  1734. op->symbolclass=static_cast<rechar*>(malloc(sizeof(rechar)*32));
  1735. #else
  1736. op->symbolclass=new rechar[32];
  1737. #endif
  1738. for (j=0; j<8; j++)((int*)op->symbolclass)[j]=itmpclass[j];
  1739. }
  1740. #endif
  1741. continue;
  1742. }
  1743. case '+':
  1744. case '*':
  1745. case '?':
  1746. case '{':
  1747. {
  1748. int min=0,max=0;
  1749. switch (src[i])
  1750. {
  1751. case '+':min=1; max=-2; break;
  1752. case '*':min=0; max=-2; break;
  1753. case '?':
  1754. {
  1755. //if(src[i+1]=='?') return SetError(errInvalidQuantifiersCombination,i);
  1756. min=0; max=1;
  1757. break;
  1758. }
  1759. case '{':
  1760. {
  1761. i++;
  1762. int save=i;
  1763. min=GetNum(src,i);
  1764. max=min;
  1765. if (min<0)return SetError(errInvalidRange,save);
  1766. // i++;
  1767. if (src[i]==',')
  1768. {
  1769. if (src[i+1]=='}')
  1770. {
  1771. i++;
  1772. max=-2;
  1773. }
  1774. else
  1775. {
  1776. i++;
  1777. max=GetNum(src,i);
  1778. // i++;
  1779. if (max<min)return SetError(errInvalidRange,save);
  1780. }
  1781. }
  1782. if (src[i]!='}')return SetError(errInvalidRange,save);
  1783. }
  1784. }
  1785. pos--;
  1786. op=code+pos-1;
  1787. if (min==1 && max==1)continue;
  1788. op->range.min=min;
  1789. op->range.max=max;
  1790. switch (op->op)
  1791. {
  1792. case opLineStart:
  1793. case opLineEnd:
  1794. case opDataStart:
  1795. case opDataEnd:
  1796. case opWordBound:
  1797. case opNotWordBound:
  1798. {
  1799. return SetError(errInvalidQuantifiersCombination,i);
  1800. // op->range.op=op->op;
  1801. // op->op=opRange;
  1802. // continue;
  1803. }
  1804. case opCharAny:
  1805. case opCharAnyAll:
  1806. {
  1807. op->range.op=op->op;
  1808. op->op=opAnyRange;
  1809. break;
  1810. }
  1811. case opType:
  1812. {
  1813. op->op=opTypeRange;
  1814. break;
  1815. }
  1816. case opNotType:
  1817. {
  1818. op->op=opNotTypeRange;
  1819. break;
  1820. }
  1821. case opSymbolIgnoreCase:
  1822. case opSymbol:
  1823. {
  1824. op->op=opSymbolRange;
  1825. break;
  1826. }
  1827. case opNotSymbol:
  1828. case opNotSymbolIgnoreCase:
  1829. {
  1830. op->op=opNotSymbolRange;
  1831. break;
  1832. }
  1833. case opSymbolClass:
  1834. {
  1835. op->op=opClassRange;
  1836. break;
  1837. }
  1838. case opBackRef:
  1839. {
  1840. op->op=opBackRefRange;
  1841. break;
  1842. }
  1843. #ifdef NAMEDBRACKETS
  1844. case opNamedBackRef:
  1845. {
  1846. op->op=opNamedRefRange;
  1847. } break;
  1848. #endif
  1849. case opClosingBracket:
  1850. {
  1851. op=op->bracket.pairindex;
  1852. if (op->op!=opOpenBracket)return SetError(errInvalidQuantifiersCombination,i);
  1853. op->range.min=min;
  1854. op->range.max=max;
  1855. op->op=opBracketRange;
  1856. break;
  1857. }
  1858. default:
  1859. {
  1860. dpf(("OP=%d\n",op->op));
  1861. return SetError(errInvalidQuantifiersCombination,i);
  1862. }
  1863. }//switch(code.op)
  1864. if (src[i+1]=='?')
  1865. {
  1866. op->op++;
  1867. i++;
  1868. }
  1869. continue;
  1870. }// case +*?{
  1871. case ' ':
  1872. case '\t':
  1873. case '\n':
  1874. case '\r':
  1875. {
  1876. if (options&OP_XTENDEDSYNTAX)
  1877. {
  1878. pos--;
  1879. continue;
  1880. }
  1881. }
  1882. #ifdef RELIB
  1883. case '%':
  1884. {
  1885. i++;
  1886. int len=0;
  1887. while (src[i+len]!='%')len++;
  1888. op->op=opLibCall;
  1889. #ifdef RE_NO_NEWARRAY
  1890. op->rename=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
  1891. #else
  1892. op->rename=new rechar[len+1];
  1893. #endif
  1894. memcpy(op->rename,src+i,len*sizeof(rechar));
  1895. op->rename[len]=0;
  1896. i+=len;
  1897. continue;
  1898. }
  1899. #endif
  1900. default:
  1901. {
  1902. op->op=options&OP_IGNORECASE?opSymbolIgnoreCase:opSymbol;
  1903. if (ignorecase)
  1904. {
  1905. op->symbol=TOLOWER(src[i]);
  1906. }
  1907. else
  1908. {
  1909. op->symbol=src[i];
  1910. }
  1911. }
  1912. }//switch(src[i])
  1913. }//for()
  1914. op=code+pos;
  1915. pos++;
  1916. brdepth--;
  1917. while (brdepth>=0 && brackets[brdepth]->op==opAlternative)
  1918. {
  1919. brackets[brdepth]->alternative.endindex=op;
  1920. brdepth--;
  1921. }
  1922. op->op=opClosingBracket;
  1923. op->bracket.pairindex=code;
  1924. code->bracket.pairindex=op;
  1925. #ifdef RE_DEBUG
  1926. op->srcpos=i;
  1927. #endif
  1928. op=code+pos;
  1929. //pos++;
  1930. op->op=opRegExpEnd;
  1931. #ifdef RE_DEBUG
  1932. op->srcpos=i+1;
  1933. #endif
  1934. return 1;
  1935. }
  1936. inline void RegExp::PushState()
  1937. {
  1938. stackcount++;
  1939. #ifdef RELIB
  1940. stackusage++;
  1941. #endif
  1942. if (stackcount==STACK_PAGE_SIZE)
  1943. {
  1944. if (lastpage->next)
  1945. {
  1946. lastpage=lastpage->next;
  1947. stack=lastpage->stack;
  1948. }
  1949. else
  1950. {
  1951. lastpage->next=new StateStackPage;
  1952. lastpage->next->prev=lastpage;
  1953. lastpage=lastpage->next;
  1954. lastpage->next=nullptr;
  1955. #ifdef RE_NO_NEWARRAY
  1956. lastpage->stack=static_cast<StateStackItem*>
  1957. (CreateArray(sizeof(StateStackItem), STACK_PAGE_SIZE,
  1958. StateStackItem::OnCreate));
  1959. #else
  1960. lastpage->stack=new StateStackItem[STACK_PAGE_SIZE];
  1961. #endif // RE_NO_NEWARRAY
  1962. stack=lastpage->stack;
  1963. }
  1964. stackcount=0;
  1965. }
  1966. st=&stack[stackcount];
  1967. }
  1968. inline int RegExp::PopState()
  1969. {
  1970. stackcount--;
  1971. #ifdef RELIB
  1972. stackusage--;
  1973. if (stackusage<0)return 0;
  1974. #endif
  1975. if (stackcount<0)
  1976. {
  1977. if (!lastpage->prev)
  1978. return 0;
  1979. lastpage=lastpage->prev;
  1980. stack=lastpage->stack;
  1981. stackcount=STACK_PAGE_SIZE-1;
  1982. }
  1983. st=&stack[stackcount];
  1984. return 1;
  1985. }
  1986. inline StateStackItem *RegExp::GetState()
  1987. {
  1988. int tempcount=stackcount;
  1989. #ifdef RELIB
  1990. if (!stackusage)return 0;
  1991. #endif
  1992. StateStackPage* temppage=lastpage;
  1993. StateStackItem* tempstack=lastpage->stack;
  1994. tempcount--;
  1995. if (tempcount<0)
  1996. {
  1997. if (!temppage->prev)
  1998. return 0;
  1999. temppage=temppage->prev;
  2000. tempstack=temppage->stack;
  2001. tempcount=STACK_PAGE_SIZE-1;
  2002. }
  2003. return &tempstack[tempcount];
  2004. }
  2005. inline StateStackItem *RegExp::FindStateByPos(PREOpCode pos,int op)
  2006. {
  2007. #ifdef RELIB
  2008. int tempusage=stackusage;
  2009. #endif
  2010. int tempcount=stackcount;
  2011. StateStackPage* temppage=lastpage;
  2012. StateStackItem* tempstack=lastpage->stack;
  2013. do
  2014. {
  2015. tempcount--;
  2016. #ifdef RELIB
  2017. tempusage--;
  2018. if (tempusage<0)return 0;
  2019. #endif
  2020. if (tempcount<0)
  2021. {
  2022. if (!temppage->prev)
  2023. return 0;
  2024. temppage=temppage->prev;
  2025. tempstack=temppage->stack;
  2026. tempcount=STACK_PAGE_SIZE-1;
  2027. }
  2028. }
  2029. while (tempstack[tempcount].pos!=pos || tempstack[tempcount].op!=op);
  2030. return &tempstack[tempcount];
  2031. }
  2032. inline int RegExp::StrCmp(const prechar& str,const prechar _st,const prechar ed)
  2033. {
  2034. const prechar save=str;
  2035. if (ignorecase)
  2036. {
  2037. while (_st<ed)
  2038. {
  2039. if (TOLOWER(*str)!=TOLOWER(*_st)) {str=save; return 0;}
  2040. str++;
  2041. _st++;
  2042. }
  2043. }
  2044. else
  2045. {
  2046. while (_st<ed)
  2047. {
  2048. if (*str!=*_st) {str=save; return 0;}
  2049. str++;
  2050. _st++;
  2051. }
  2052. }
  2053. return 1;
  2054. }
  2055. #define OP (*op)
  2056. #define MINSKIP(cmp) \
  2057. { int jj; \
  2058. switch(op->next->op) \
  2059. { \
  2060. case opSymbol: \
  2061. { \
  2062. jj=op->next->symbol; \
  2063. if(*str!=jj) \
  2064. while(str<strend && cmp && st->max--)\
  2065. {\
  2066. str++;\
  2067. if(str[1]!=jj)break;\
  2068. } \
  2069. break; \
  2070. } \
  2071. case opNotSymbol: \
  2072. { \
  2073. jj=op->next->symbol; \
  2074. if(*str==jj) \
  2075. while(str<strend && cmp && st->max--)\
  2076. {\
  2077. str++;\
  2078. if(str[1]==jj)break;\
  2079. } \
  2080. break; \
  2081. } \
  2082. case opSymbolIgnoreCase: \
  2083. { \
  2084. jj=op->next->symbol; \
  2085. if(TOLOWER(*str)!=jj) \
  2086. while(str<strend && cmp && st->max--)\
  2087. {\
  2088. str++;\
  2089. if(TOLOWER(str[1])!=jj)break;\
  2090. } \
  2091. break; \
  2092. } \
  2093. case opNotSymbolIgnoreCase: \
  2094. { \
  2095. jj=op->next->symbol; \
  2096. if(TOLOWER(*str)==jj) \
  2097. while(str<strend && cmp && st->max--)\
  2098. {\
  2099. str++;\
  2100. if(TOLOWER(str[1])==jj)break;\
  2101. } \
  2102. break; \
  2103. } \
  2104. case opType: \
  2105. { \
  2106. jj=op->next->type; \
  2107. if(!(ISTYPE(*str,jj))) \
  2108. while(str<strend && cmp && st->max--)\
  2109. {\
  2110. str++;\
  2111. if(!(ISTYPE(str[1],jj)))break;\
  2112. } \
  2113. break; \
  2114. } \
  2115. case opNotType: \
  2116. { \
  2117. jj=op->next->type; \
  2118. if((ISTYPE(*str,jj))) \
  2119. while(str<strend && cmp && st->max--)\
  2120. {\
  2121. str++;\
  2122. if((ISTYPE(str[1],jj)))break;\
  2123. } \
  2124. break; \
  2125. } \
  2126. case opSymbolClass: \
  2127. { \
  2128. cl=op->next->symbolclass; \
  2129. if(!GetBit(cl,*str)) \
  2130. while(str<strend && cmp && st->max--)\
  2131. {\
  2132. str++;\
  2133. if(!GetBit(cl,str[1]))break;\
  2134. } \
  2135. break; \
  2136. } \
  2137. } \
  2138. }
  2139. #ifdef RELIB
  2140. static void KillMatchList(MatchList *ml)
  2141. {
  2142. for (int i=0; i<ml->Count(); i++)
  2143. {
  2144. KillMatchList((*ml)[i].sublist);
  2145. (*ml)[i].sublist=nullptr;
  2146. }
  2147. ml->Clean();
  2148. }
  2149. #endif
  2150. int RegExp::InnerMatch(const prechar str,const prechar strend,PMatch match,int& matchcount
  2151. #ifdef NAMEDBRACKETS
  2152. ,PMatchHash hmatch
  2153. #endif
  2154. )
  2155. {
  2156. // register prechar str=start;
  2157. int i,j;
  2158. int minimizing;
  2159. PREOpCode op,tmp=nullptr;
  2160. PMatch m;
  2161. #ifdef UNICODE
  2162. UniSet *cl;
  2163. #else
  2164. prechar cl;
  2165. #endif
  2166. #ifdef RELIB
  2167. SMatchListItem ml;
  2168. #endif
  2169. int inrangebracket=0;
  2170. if (errorcode==errNotCompiled)return 0;
  2171. if (matchcount<maxbackref)return SetError(errNotEnoughMatches,maxbackref);
  2172. #ifdef NAMEDBRACKETS
  2173. if (havenamedbrackets && !hmatch)return SetError(errNoStorageForNB,0);
  2174. #endif
  2175. #ifdef RELIB
  2176. if (reclevel<=1)
  2177. {
  2178. #endif
  2179. stackcount=0;
  2180. lastpage=firstpage;
  2181. stack=lastpage->stack;
  2182. st=&stack[0];
  2183. #ifdef RELIB
  2184. }
  2185. #endif
  2186. StateStackItem *ps;
  2187. errorcode=errNone;
  2188. /*for(i=0;i<matchcount;i++)
  2189. {
  2190. match[i].start=-1;
  2191. match[i].end=-1;
  2192. }*/
  2193. if (bracketscount<matchcount)matchcount=bracketscount;
  2194. memset(match,-1,sizeof(*match)*matchcount);
  2195. for (op=code; op; op=op->next)
  2196. {
  2197. //dpf(("op:%s,\tpos:%d,\tstr:%d\n",ops[OP.op],pos,str-start));
  2198. dpf(("=================\n"));
  2199. dpf(("S:%s\n%*s\n",start,str-start+3,"^"));
  2200. dpf(("R:%s\n%*s\n",resrc,OP.srcpos+3,"^"));
  2201. if (str<=strend)
  2202. switch (OP.op)
  2203. {
  2204. case opLineStart:
  2205. {
  2206. if (str==start || str[-1]==0x0d || str[-1]==0x0a)continue;
  2207. break;
  2208. }
  2209. case opLineEnd:
  2210. {
  2211. if (str==strend)continue;
  2212. if (str[0]==0x0d || str[0]==0x0a)
  2213. {
  2214. if (str[0]==0x0d)str++;
  2215. if (str[0]==0x0a)str++;
  2216. continue;
  2217. }
  2218. break;
  2219. }
  2220. case opDataStart:
  2221. {
  2222. if (str==start)continue;
  2223. break;
  2224. }
  2225. case opDataEnd:
  2226. {
  2227. if (str==strend)continue;
  2228. break;
  2229. }
  2230. case opWordBound:
  2231. {
  2232. if ((str==start && ISWORD(*str))||
  2233. (!(ISWORD(str[-1])) && ISWORD(*str)) ||
  2234. (!(ISWORD(*str)) && ISWORD(str[-1])) ||
  2235. (str==strend && ISWORD(str[-1])))continue;
  2236. break;
  2237. }
  2238. case opNotWordBound:
  2239. {
  2240. if (!((str==start && ISWORD(*str))||
  2241. (!(ISWORD(str[-1])) && ISWORD(*str)) ||
  2242. (!(ISWORD(*str)) && ISWORD(str[-1])) ||
  2243. (str==strend && ISWORD(str[-1]))))continue;
  2244. break;
  2245. }
  2246. case opType:
  2247. {
  2248. if (ISTYPE(*str,OP.type))
  2249. {
  2250. str++;
  2251. continue;
  2252. }
  2253. break;
  2254. }
  2255. case opNotType:
  2256. {
  2257. if (!(ISTYPE(*str,OP.type)))
  2258. {
  2259. str++;
  2260. continue;
  2261. }
  2262. break;
  2263. }
  2264. case opCharAny:
  2265. {
  2266. if (*str!=0x0d && *str!=0x0a)
  2267. {
  2268. str++;
  2269. continue;
  2270. }
  2271. break;
  2272. }
  2273. case opCharAnyAll:
  2274. {
  2275. str++;
  2276. continue;
  2277. }
  2278. case opSymbol:
  2279. {
  2280. if (*str==OP.symbol)
  2281. {
  2282. str++;
  2283. continue;
  2284. }
  2285. break;
  2286. }
  2287. case opNotSymbol:
  2288. {
  2289. if (*str!=OP.symbol)
  2290. {
  2291. str++;
  2292. continue;
  2293. }
  2294. break;
  2295. }
  2296. case opSymbolIgnoreCase:
  2297. {
  2298. if (TOLOWER(*str)==OP.symbol)
  2299. {
  2300. str++;
  2301. continue;
  2302. }
  2303. break;
  2304. }
  2305. case opNotSymbolIgnoreCase:
  2306. {
  2307. if (TOLOWER(*str)!=OP.symbol)
  2308. {
  2309. str++;
  2310. continue;
  2311. }
  2312. break;
  2313. }
  2314. case opSymbolClass:
  2315. {
  2316. if (GetBit(OP.symbolclass,*str))
  2317. {
  2318. str++;
  2319. continue;
  2320. }
  2321. break;
  2322. }
  2323. case opOpenBracket:
  2324. {
  2325. if (OP.bracket.index>=0 && OP.bracket.index<matchcount)
  2326. {
  2327. //if (inrangebracket) Mantis#1388
  2328. {
  2329. st->op=opOpenBracket;
  2330. st->pos=op;
  2331. st->min=match[OP.bracket.index].start;
  2332. st->max=match[OP.bracket.index].end;
  2333. PushState();
  2334. }
  2335. match[OP.bracket.index].start=(int)(str-start);
  2336. }
  2337. if (OP.bracket.nextalt)
  2338. {
  2339. st->op=opAlternative;
  2340. st->pos=OP.bracket.nextalt;
  2341. st->savestr=str;
  2342. PushState();
  2343. }
  2344. continue;
  2345. }
  2346. #ifdef NAMEDBRACKETS
  2347. case opNamedBracket:
  2348. {
  2349. if (hmatch)
  2350. {
  2351. PMatch m2;
  2352. if (!hmatch->Exists((char*)OP.nbracket.name))
  2353. {
  2354. tag_Match sm;
  2355. sm.start=-1;
  2356. sm.end=-1;
  2357. m2=hmatch->SetItem((char*)OP.nbracket.name,sm);
  2358. }
  2359. else
  2360. {
  2361. m2=hmatch->GetPtr((char*)OP.nbracket.name);
  2362. }
  2363. //if (inrangebracket) Mantis#1388
  2364. {
  2365. st->op=opNamedBracket;
  2366. st->pos=op;
  2367. st->min=m2->start;
  2368. st->max=m2->end;
  2369. PushState();
  2370. }
  2371. m2->start=(int)(str-start);
  2372. }
  2373. if (OP.bracket.nextalt)
  2374. {
  2375. st->op=opAlternative;
  2376. st->pos=OP.bracket.nextalt;
  2377. st->savestr=str;
  2378. PushState();
  2379. }
  2380. continue;
  2381. }
  2382. #endif
  2383. case opClosingBracket:
  2384. {
  2385. switch (OP.bracket.pairindex->op)
  2386. {
  2387. case opOpenBracket:
  2388. {
  2389. if (OP.bracket.index>=0 && OP.bracket.index<matchcount)
  2390. {
  2391. match[OP.bracket.index].end=(int)(str-start);
  2392. if (brhandler)
  2393. {
  2394. if (
  2395. !brhandler
  2396. (
  2397. brhdata,
  2398. bhMatch,
  2399. OP.bracket.index,
  2400. match[OP.bracket.index].start,
  2401. match[OP.bracket.index].end
  2402. )
  2403. )
  2404. {
  2405. return -1;
  2406. }
  2407. }
  2408. }
  2409. continue;
  2410. }
  2411. #ifdef NAMEDBRACKETS
  2412. case opNamedBracket:
  2413. {
  2414. if (hmatch)
  2415. {
  2416. PMatch m2=hmatch->GetPtr((char*)OP.nbracket.name);
  2417. m2->end=(int)(str-start);
  2418. }
  2419. continue;
  2420. }
  2421. #endif
  2422. case opBracketRange:
  2423. {
  2424. ps=FindStateByPos(OP.bracket.pairindex,opBracketRange);
  2425. *st=*ps;
  2426. if (str==st->startstr)
  2427. {
  2428. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  2429. {
  2430. match[OP.range.bracket.index].end=(int)(str-start);
  2431. if (brhandler)
  2432. {
  2433. if (
  2434. !brhandler
  2435. (
  2436. brhdata,
  2437. bhMatch,
  2438. OP.range.bracket.index,
  2439. match[OP.range.bracket.index].start,
  2440. match[OP.range.bracket.index].end
  2441. )
  2442. )
  2443. {
  2444. return -1;
  2445. }
  2446. }
  2447. }
  2448. inrangebracket--;
  2449. continue;
  2450. }
  2451. if (st->min>0)st->min--;
  2452. if (st->min)
  2453. {
  2454. st->max--;
  2455. st->startstr=str;
  2456. st->savestr=str;
  2457. op=st->pos;
  2458. PushState();
  2459. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  2460. {
  2461. match[OP.range.bracket.index].start=(int)(str-start);
  2462. st->op=opOpenBracket;
  2463. st->pos=op;
  2464. st->min=match[OP.range.bracket.index].start;
  2465. st->max=match[OP.range.bracket.index].end;
  2466. PushState();
  2467. }
  2468. if (OP.range.bracket.nextalt)
  2469. {
  2470. st->op=opAlternative;
  2471. st->pos=OP.range.bracket.nextalt;
  2472. st->savestr=str;
  2473. PushState();
  2474. }
  2475. continue;
  2476. }
  2477. st->max--;
  2478. if (!st->max)
  2479. {
  2480. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  2481. {
  2482. match[OP.range.bracket.index].end=(int)(str-start);
  2483. if (brhandler)
  2484. {
  2485. if (
  2486. !brhandler
  2487. (
  2488. brhdata,
  2489. bhMatch,
  2490. OP.range.bracket.index,
  2491. match[OP.range.bracket.index].start,
  2492. match[OP.range.bracket.index].end
  2493. )
  2494. )
  2495. {
  2496. return -1;
  2497. }
  2498. }
  2499. }
  2500. inrangebracket--;
  2501. continue;
  2502. }
  2503. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  2504. {
  2505. match[OP.range.bracket.index].end=(int)(str-start);
  2506. if (brhandler)
  2507. {
  2508. if (
  2509. !brhandler
  2510. (
  2511. brhdata,
  2512. bhMatch,
  2513. OP.range.bracket.index,
  2514. match[OP.range.bracket.index].start,
  2515. match[OP.range.bracket.index].end
  2516. )
  2517. )
  2518. {
  2519. return -1;
  2520. }
  2521. }
  2522. tmp=op;
  2523. }
  2524. st->startstr=str;
  2525. st->savestr=str;
  2526. op=st->pos;
  2527. PushState();
  2528. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  2529. {
  2530. st->op=opOpenBracket;
  2531. st->pos=tmp;
  2532. st->min=match[OP.range.bracket.index].start;
  2533. st->max=match[OP.range.bracket.index].end;
  2534. PushState();
  2535. match[OP.range.bracket.index].start=(int)(str-start);
  2536. }
  2537. if (OP.range.bracket.nextalt)
  2538. {
  2539. st->op=opAlternative;
  2540. st->pos=OP.range.bracket.nextalt;
  2541. st->savestr=str;
  2542. PushState();
  2543. }
  2544. continue;
  2545. }
  2546. case opBracketMinRange:
  2547. {
  2548. ps=FindStateByPos(OP.bracket.pairindex,opBracketMinRange);
  2549. *st=*ps;
  2550. if (st->min>0)st->min--;
  2551. if (st->min)
  2552. {
  2553. //st->min--;
  2554. st->max--;
  2555. st->startstr=str;
  2556. st->savestr=str;
  2557. op=st->pos;
  2558. PushState();
  2559. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  2560. {
  2561. if (brhandler)
  2562. {
  2563. if (
  2564. !brhandler
  2565. (
  2566. brhdata,
  2567. bhMatch,
  2568. OP.range.bracket.index,
  2569. match[OP.range.bracket.index].start,
  2570. (int)(str-start)
  2571. )
  2572. )
  2573. {
  2574. return -1;
  2575. }
  2576. }
  2577. match[OP.range.bracket.index].start=(int)(str-start);
  2578. st->op=opOpenBracket;
  2579. st->pos=op;
  2580. st->min=match[OP.range.bracket.index].start;
  2581. st->max=match[OP.range.bracket.index].end;
  2582. PushState();
  2583. }
  2584. if (OP.range.bracket.nextalt)
  2585. {
  2586. st->op=opAlternative;
  2587. st->pos=OP.range.bracket.nextalt;
  2588. st->savestr=str;
  2589. PushState();
  2590. }
  2591. continue;
  2592. }
  2593. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  2594. {
  2595. match[OP.range.bracket.index].end=(int)(str-start);
  2596. if (brhandler)
  2597. {
  2598. if (
  2599. !brhandler
  2600. (
  2601. brhdata,
  2602. bhMatch,
  2603. OP.range.bracket.index,
  2604. match[OP.range.bracket.index].start,
  2605. match[OP.range.bracket.index].end
  2606. )
  2607. )
  2608. {
  2609. return -1;
  2610. }
  2611. }
  2612. }
  2613. st->max--;
  2614. inrangebracket--;
  2615. if (!st->max)continue;
  2616. st->forward=str>ps->startstr?1:0;
  2617. st->startstr=str;
  2618. st->savestr=str;
  2619. PushState();
  2620. if (OP.range.bracket.nextalt)
  2621. {
  2622. st->op=opAlternative;
  2623. st->pos=OP.range.bracket.nextalt;
  2624. st->savestr=str;
  2625. PushState();
  2626. }
  2627. continue;
  2628. }
  2629. case opLookAhead:
  2630. {
  2631. tmp=OP.bracket.pairindex;
  2632. do
  2633. {
  2634. PopState();
  2635. }
  2636. while (st->pos!=tmp || st->op!=opLookAhead);
  2637. str=st->savestr;
  2638. continue;
  2639. }
  2640. case opNotLookAhead:
  2641. {
  2642. do
  2643. {
  2644. PopState();
  2645. }
  2646. while (st->op!=opNotLookAhead);
  2647. str=st->savestr;
  2648. break;
  2649. }
  2650. case opLookBehind:
  2651. {
  2652. continue;
  2653. }
  2654. case opNotLookBehind:
  2655. {
  2656. ps=GetState();
  2657. ps->forward=0;
  2658. break;
  2659. }
  2660. }//switch(code[pairindex].op)
  2661. break;
  2662. }//case opClosingBracket
  2663. case opAlternative:
  2664. {
  2665. op=OP.alternative.endindex->prev;
  2666. continue;
  2667. }
  2668. #ifdef NAMEDBRACKETS
  2669. case opNamedBackRef:
  2670. #endif
  2671. case opBackRef:
  2672. {
  2673. #ifdef NAMEDBRACKETS
  2674. if (OP.op==opNamedBackRef)
  2675. {
  2676. if (!hmatch || !hmatch->Exists((char*)OP.refname))break;
  2677. m=hmatch->GetPtr((char*)OP.refname);
  2678. }
  2679. else
  2680. {
  2681. m=&match[OP.refindex];
  2682. }
  2683. #else
  2684. m=&match[OP.refindex];
  2685. #endif
  2686. if (m->start==-1 || m->end==-1)break;
  2687. if (ignorecase)
  2688. {
  2689. j=m->end;
  2690. for (i=m->start; i<j; i++,str++)
  2691. {
  2692. if (TOLOWER(start[i])!=TOLOWER(*str))break;
  2693. if (str>strend)break;
  2694. }
  2695. if (i<j)break;
  2696. }
  2697. else
  2698. {
  2699. j=m->end;
  2700. for (i=m->start; i<j; i++,str++)
  2701. {
  2702. if (start[i]!=*str)break;
  2703. if (str>strend)break;
  2704. }
  2705. if (i<j)break;
  2706. }
  2707. continue;
  2708. }
  2709. case opAnyRange:
  2710. case opAnyMinRange:
  2711. {
  2712. st->op=OP.op;
  2713. minimizing=OP.op==opAnyMinRange;
  2714. j=OP.range.min;
  2715. st->max=OP.range.max-j;
  2716. if (OP.range.op==opCharAny)
  2717. {
  2718. for (i=0; i<j; i++,str++)
  2719. {
  2720. if (str>strend || *str==0x0d || *str==0x0a)break;
  2721. }
  2722. if (i<j)
  2723. {
  2724. break;
  2725. }
  2726. st->startstr=str;
  2727. if (!minimizing)
  2728. {
  2729. while (str<strend && *str!=0x0d && *str!=0x0a && st->max--)str++;
  2730. }
  2731. else
  2732. {
  2733. MINSKIP(*str!=0x0d && *str!=0x0a);
  2734. if (st->max==-1)break;
  2735. }
  2736. }
  2737. else
  2738. {
  2739. //opCharAnyAll:
  2740. str+=j;
  2741. if (str>strend)break;
  2742. st->startstr=str;
  2743. if (!minimizing)
  2744. {
  2745. if (st->max>=0)
  2746. {
  2747. if (str+st->max<strend)
  2748. {
  2749. str+=st->max;
  2750. st->max=0;
  2751. }
  2752. else
  2753. {
  2754. st->max-=(int)(strend-str);
  2755. str=strend;
  2756. }
  2757. }
  2758. else
  2759. {
  2760. str=strend;
  2761. }
  2762. }
  2763. else
  2764. {
  2765. MINSKIP(1);
  2766. if (st->max==-1)break;
  2767. }
  2768. }
  2769. if (OP.range.max==j)continue;
  2770. st->savestr=str;
  2771. st->pos=op;
  2772. PushState();
  2773. continue;
  2774. }
  2775. case opSymbolRange:
  2776. case opSymbolMinRange:
  2777. {
  2778. st->op=OP.op;
  2779. minimizing=OP.op==opSymbolMinRange;
  2780. j=OP.range.min;
  2781. st->max=OP.range.max-j;
  2782. if (ignorecase)
  2783. {
  2784. for (i=0; i<j; i++,str++)
  2785. {
  2786. if (str>strend || TOLOWER(*str)!=OP.range.symbol)break;
  2787. }
  2788. if (i<j)break;
  2789. st->startstr=str;
  2790. if (!minimizing)
  2791. {
  2792. while (str<strend && TOLOWER(*str)==OP.range.symbol && st->max--)str++;
  2793. }
  2794. else
  2795. {
  2796. MINSKIP(TOLOWER(*str)==OP.range.symbol);
  2797. if (st->max==-1)break;
  2798. }
  2799. }
  2800. else
  2801. {
  2802. for (i=0; i<j; i++,str++)
  2803. {
  2804. if (str>strend || *str!=OP.range.symbol)break;
  2805. }
  2806. if (i<j)break;
  2807. st->startstr=str;
  2808. if (!minimizing)
  2809. {
  2810. while (str<strend && *str==OP.range.symbol && st->max--)str++;
  2811. }
  2812. else
  2813. {
  2814. MINSKIP(*str==OP.range.symbol);
  2815. if (st->max==-1)break;
  2816. }
  2817. }
  2818. if (OP.range.max==j)continue;
  2819. st->savestr=str;
  2820. st->pos=op;
  2821. PushState();
  2822. continue;
  2823. }
  2824. case opNotSymbolRange:
  2825. case opNotSymbolMinRange:
  2826. {
  2827. st->op=OP.op;
  2828. minimizing=OP.op==opNotSymbolMinRange;
  2829. j=OP.range.min;
  2830. st->max=OP.range.max-j;
  2831. if (ignorecase)
  2832. {
  2833. for (i=0; i<j; i++,str++)
  2834. {
  2835. if (str>strend || TOLOWER(*str)==OP.range.symbol)break;
  2836. }
  2837. if (i<j)break;
  2838. st->startstr=str;
  2839. if (!minimizing)
  2840. {
  2841. while (str<strend && TOLOWER(*str)!=OP.range.symbol && st->max--)str++;
  2842. }
  2843. else
  2844. {
  2845. MINSKIP(TOLOWER(*str)!=OP.range.symbol);
  2846. if (st->max==-1)break;
  2847. }
  2848. }
  2849. else
  2850. {
  2851. for (i=0; i<j; i++,str++)
  2852. {
  2853. if (str>strend || *str==OP.range.symbol)break;
  2854. }
  2855. if (i<j)break;
  2856. st->startstr=str;
  2857. if (!minimizing)
  2858. {
  2859. while (str<strend && *str!=OP.range.symbol && st->max--)str++;
  2860. }
  2861. else
  2862. {
  2863. MINSKIP(*str!=OP.range.symbol);
  2864. if (st->max==-1)break;
  2865. }
  2866. }
  2867. if (OP.range.max==j)continue;
  2868. st->savestr=str;
  2869. st->pos=op;
  2870. PushState();
  2871. continue;
  2872. }
  2873. case opClassRange:
  2874. case opClassMinRange:
  2875. {
  2876. st->op=OP.op;
  2877. minimizing=OP.op==opClassMinRange;
  2878. j=OP.range.min;
  2879. st->max=OP.range.max-j;
  2880. for (i=0; i<j; i++,str++)
  2881. {
  2882. if (str>strend || !GetBit(OP.range.symbolclass,*str))break;
  2883. }
  2884. if (i<j)break;
  2885. st->startstr=str;
  2886. if (!minimizing)
  2887. {
  2888. while (str<strend && GetBit(OP.range.symbolclass,*str) && st->max--)str++;
  2889. }
  2890. else
  2891. {
  2892. MINSKIP(GetBit(OP.range.symbolclass,*str));
  2893. if (st->max==-1)break;
  2894. }
  2895. if (OP.range.max==j)continue;
  2896. st->savestr=str;
  2897. st->pos=op;
  2898. PushState();
  2899. continue;
  2900. }
  2901. case opTypeRange:
  2902. case opTypeMinRange:
  2903. {
  2904. st->op=OP.op;
  2905. minimizing=OP.op==opTypeMinRange;
  2906. j=OP.range.min;
  2907. st->max=OP.range.max-j;
  2908. for (i=0; i<j; i++,str++)
  2909. {
  2910. if (str>strend || !ISTYPE(*str,OP.range.type))break;
  2911. }
  2912. if (i<j)break;
  2913. st->startstr=str;
  2914. if (!minimizing)
  2915. {
  2916. while (str<strend && (ISTYPE(*str,OP.range.type)) && st->max--)str++;
  2917. }
  2918. else
  2919. {
  2920. MINSKIP((ISTYPE(*str,OP.range.type)));
  2921. if (st->max==-1)break;
  2922. }
  2923. if (OP.range.max==j)continue;
  2924. st->savestr=str;
  2925. st->pos=op;
  2926. PushState();
  2927. continue;
  2928. }
  2929. case opNotTypeRange:
  2930. case opNotTypeMinRange:
  2931. {
  2932. st->op=OP.op;
  2933. minimizing=OP.op==opNotTypeMinRange;
  2934. j=OP.range.min;
  2935. st->max=OP.range.max-j;
  2936. for (i=0; i<j; i++,str++)
  2937. {
  2938. if (str>strend || (ISTYPE(*str,OP.range.type)))break;
  2939. }
  2940. if (i<j)break;
  2941. st->startstr=str;
  2942. if (!minimizing)
  2943. {
  2944. while (str<strend && !ISTYPE(*str,OP.range.type) && st->max--)str++;
  2945. }
  2946. else
  2947. {
  2948. MINSKIP(!ISTYPE(*str,OP.range.type));
  2949. if (st->max==-1)break;
  2950. }
  2951. if (OP.range.max==j)continue;
  2952. st->savestr=str;
  2953. st->pos=op;
  2954. PushState();
  2955. continue;
  2956. }
  2957. #ifdef NAMEDBRACKETS
  2958. case opNamedRefRange:
  2959. case opNamedRefMinRange:
  2960. #endif
  2961. case opBackRefRange:
  2962. case opBackRefMinRange:
  2963. {
  2964. st->op=OP.op;
  2965. #ifdef NAMEDBRACKETS
  2966. minimizing=OP.op==opBackRefMinRange || OP.op==opNamedRefMinRange;
  2967. #else
  2968. minimizing=OP.op==opBackRefMinRange;
  2969. #endif
  2970. j=OP.range.min;
  2971. st->max=OP.range.max-j;
  2972. #ifdef NAMEDBRACKETS
  2973. if (OP.op==opBackRefRange || OP.op==opBackRefMinRange)
  2974. {
  2975. m=&match[OP.range.refindex];
  2976. }
  2977. else
  2978. {
  2979. m=hmatch->GetPtr((char*)OP.range.refname);
  2980. }
  2981. #else
  2982. m=&match[OP.range.refindex];
  2983. #endif
  2984. if (!m)break;
  2985. if (m->start==-1 || m->end==-1)
  2986. {
  2987. if (!j)continue;
  2988. else break;
  2989. }
  2990. for (i=0; i<j; i++)
  2991. {
  2992. if (str>strend || !StrCmp(str,start+m->start,start+m->end))break;
  2993. }
  2994. if (i<j)break;
  2995. st->startstr=str;
  2996. if (!minimizing)
  2997. {
  2998. while (str<strend && StrCmp(str,start+m->start,start+m->end) && st->max--);
  2999. }
  3000. else
  3001. {
  3002. MINSKIP(StrCmp(str,start+m->start,start+m->end));
  3003. if (st->max==-1)break;
  3004. }
  3005. if (OP.range.max==j)continue;
  3006. st->savestr=str;
  3007. st->pos=op;
  3008. PushState();
  3009. continue;
  3010. }
  3011. case opBracketRange:
  3012. case opBracketMinRange:
  3013. {
  3014. if (inrangebracket && OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  3015. {
  3016. st->op=opOpenBracket;
  3017. st->pos=OP.range.bracket.pairindex;
  3018. st->min=match[OP.range.bracket.index].start;
  3019. st->max=match[OP.range.bracket.index].end;
  3020. PushState();
  3021. }
  3022. st->op=OP.op;
  3023. st->pos=op;
  3024. st->min=OP.range.min;
  3025. st->max=OP.range.max;
  3026. st->startstr=str;
  3027. st->savestr=str;
  3028. st->forward=1;
  3029. PushState();
  3030. if (OP.range.nextalt)
  3031. {
  3032. st->op=opAlternative;
  3033. st->pos=OP.range.bracket.nextalt;
  3034. st->savestr=str;
  3035. PushState();
  3036. }
  3037. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  3038. {
  3039. match[OP.range.bracket.index].start=
  3040. /*match[OP.range.bracket.index].end=*/(int)(str-start);
  3041. }
  3042. if (OP.op==opBracketMinRange && !OP.range.min)
  3043. {
  3044. op=OP.range.bracket.pairindex;
  3045. }
  3046. else
  3047. {
  3048. inrangebracket++;
  3049. }
  3050. continue;
  3051. }
  3052. case opLookAhead:
  3053. case opNotLookAhead:
  3054. {
  3055. st->op=OP.op;
  3056. st->savestr=str;
  3057. st->pos=op;
  3058. st->forward=1;
  3059. PushState();
  3060. if (OP.assert.nextalt)
  3061. {
  3062. st->op=opAlternative;
  3063. st->pos=OP.assert.nextalt;
  3064. st->savestr=str;
  3065. PushState();
  3066. }
  3067. continue;
  3068. }
  3069. case opLookBehind:
  3070. case opNotLookBehind:
  3071. {
  3072. if (str-OP.assert.length<start)
  3073. {
  3074. if (OP.op==opLookBehind)break;
  3075. op=OP.assert.pairindex;
  3076. continue;
  3077. }
  3078. st->op=OP.op;
  3079. st->savestr=str;
  3080. st->pos=op;
  3081. st->forward=1;
  3082. str-=OP.assert.length;
  3083. PushState();
  3084. if (OP.assert.nextalt)
  3085. {
  3086. st->op=opAlternative;
  3087. st->pos=OP.assert.nextalt;
  3088. st->savestr=str;
  3089. PushState();
  3090. }
  3091. continue;
  3092. }
  3093. case opNoReturn:
  3094. {
  3095. st->op=opNoReturn;
  3096. PushState();
  3097. continue;
  3098. }
  3099. #ifdef RELIB
  3100. case opLibCall:
  3101. {
  3102. if (!relib->Exists((char*)OP.rename))return 0;
  3103. /* int ok=1;
  3104. PMatchList curlist=matchlist;
  3105. while(curlist)
  3106. {
  3107. if(!curlist->Count())
  3108. {
  3109. curlist=curlist->parent;
  3110. continue;
  3111. }
  3112. int k=curlist->Count()-1;
  3113. while(k>=0)
  3114. {
  3115. if((*curlist)[k].start!=str-start){ok=2;break;}
  3116. if(!strcmp((char*)(*curlist)[k].name,(char*)OP.rename) && (*curlist)[k].pos==pos)
  3117. {
  3118. ok=0;
  3119. break;
  3120. }
  3121. k--;
  3122. }
  3123. if(!ok || ok==2)break;
  3124. if(k<0)curlist=curlist->parent;
  3125. }
  3126. if(!ok)
  3127. {
  3128. dpf(("Recursion detected! Declain call of %s\n",OP.rename));
  3129. break;
  3130. }
  3131. */
  3132. RegExp *re=(*relib)[(char*)OP.rename];
  3133. // if(matchlist->parent && matchlist->parent->Count())
  3134. // {
  3135. prechar callfrom=matchlist->parent->Last().Get().name;
  3136. int curpos=str-start;
  3137. int k=re->cs.Count()-1;
  3138. int ok=1;
  3139. while (k>=0)
  3140. {
  3141. if (re->cs[k].strpos!=curpos)break;
  3142. if (!strcmp((char*)re->cs[k].name,(char*)callfrom))
  3143. {
  3144. dpf(("Recursive call rejected: %s:%d\n",OP.rename,curpos));
  3145. ok=0;
  3146. break;
  3147. }
  3148. k--;
  3149. }
  3150. if (!ok)break;
  3151. // }
  3152. SCallStackItem csi;
  3153. csi.name=callfrom;
  3154. csi.strpos=curpos;
  3155. re->cs.Append(csi);
  3156. st->op=opLibCall;
  3157. st->pos=op;
  3158. st->startstr=OP.rename;
  3159. st->savestr=str;
  3160. st->cnt=matchlist->Count();
  3161. PushState();
  3162. SMatchListItem *pml;
  3163. int res;
  3164. if (re->havefirst && !re->first[*str])
  3165. {
  3166. dpf(("Fail first char check: %c\n",*str));
  3167. res=0;
  3168. }
  3169. else
  3170. {
  3171. ml.name=OP.rename;
  3172. ml.start=str-start;
  3173. ml.sublist=new MatchList;
  3174. ml.sublist->parent=matchlist;
  3175. pml=&matchlist->Append(ml);
  3176. SMatch mt[10];
  3177. PMatch mtch=mt;
  3178. if (re->bracketscount>10)
  3179. #ifdef RE_NO_NEWARRAY
  3180. mtch=static_cast<PMatch>(CreateArray(sizeof(SMatch),
  3181. re->bracketscount, nullptr));
  3182. #else
  3183. mtch=new SMatch[re->bracketscount];
  3184. #endif // RE_NO_NEWARRAY
  3185. MatchList *mls;
  3186. mls=matchlist;
  3187. re->SetMatchList(ml.sublist);
  3188. int mcnt=re->bracketscount;
  3189. int savecnt=re->stackcount;
  3190. int savestack=re->stackusage;
  3191. PStateStackPage savepage=re->lastpage;
  3192. re->start=start;
  3193. re->reclevel++;
  3194. re->stackusage=0;
  3195. dpf(("Call: %s\n",OP.rename));
  3196. #ifdef NAMEDBRACKETS
  3197. MatchHash h;
  3198. #endif
  3199. res=re->InnerMatch(str,strend,mtch,mcnt
  3200. #ifdef NAMEDBRACKETS
  3201. ,&h
  3202. #endif
  3203. );
  3204. dpf(("Return from: %s - %s\n",OP.rename,res?"ok":"fail"));
  3205. re->cs.Pop(csi);
  3206. matchlist=mls;
  3207. re->reclevel--;
  3208. re->stackusage=savestack;
  3209. pml->end=mtch[0].end;
  3210. if (mtch!=mt)
  3211. #ifdef RE_NO_NEWARRAY
  3212. DeleteArray(reinterpret_cast<void**>(&mtch),nullptr);
  3213. #else
  3214. delete [] mtch;
  3215. #endif // RE_NO_NEWARRAY
  3216. re->lastpage=savepage;
  3217. re->stackcount=savecnt;
  3218. re->stack=re->lastpage->stack;
  3219. re->st=&re->stack[re->stackcount];
  3220. if (!res)
  3221. {
  3222. matchlist->Pop(ml);
  3223. KillMatchList(ml.sublist);
  3224. }
  3225. }
  3226. if (res)
  3227. {
  3228. str=start+pml->end;
  3229. }
  3230. else break;
  3231. continue;
  3232. }
  3233. #endif
  3234. case opRegExpEnd:return 1;
  3235. }//switch(op)
  3236. for (;; PopState())
  3237. {
  3238. if (0==(ps=GetState()))return 0;
  3239. //dpf(("ps->op:%s\n",ops[ps->op]));
  3240. switch (ps->op)
  3241. {
  3242. case opAlternative:
  3243. {
  3244. str=ps->savestr;
  3245. op=ps->pos;
  3246. if (OP.alternative.nextalt)
  3247. {
  3248. ps->pos=OP.alternative.nextalt;
  3249. }
  3250. else
  3251. {
  3252. PopState();
  3253. }
  3254. break;
  3255. }
  3256. case opAnyRange:
  3257. case opSymbolRange:
  3258. case opNotSymbolRange:
  3259. case opClassRange:
  3260. case opTypeRange:
  3261. case opNotTypeRange:
  3262. {
  3263. str=ps->savestr-1;
  3264. op=ps->pos;
  3265. if (str<ps->startstr)
  3266. {
  3267. continue;
  3268. }
  3269. ps->savestr=str;
  3270. break;
  3271. }
  3272. #ifdef NAMEDBRACKETS
  3273. case opNamedRefRange:
  3274. #endif
  3275. case opBackRefRange:
  3276. {
  3277. #ifdef NAMEDBRACKETS
  3278. // PMatch m;
  3279. if (ps->op==opBackRefRange)
  3280. {
  3281. m=&match[ps->pos->range.refindex];
  3282. }
  3283. else
  3284. {
  3285. m=hmatch->GetPtr((char*)ps->pos->range.refname);
  3286. }
  3287. #else
  3288. m=&match[ps->pos->range.refindex];
  3289. #endif
  3290. str=ps->savestr-(m->end-m->start);
  3291. op=ps->pos;
  3292. if (str<ps->startstr)
  3293. {
  3294. continue;
  3295. }
  3296. ps->savestr=str;
  3297. break;
  3298. }
  3299. case opAnyMinRange:
  3300. {
  3301. if (!(ps->max--))continue;
  3302. str=ps->savestr;
  3303. op=ps->pos;
  3304. if (ps->pos->range.op==opCharAny)
  3305. {
  3306. if (str<strend && *str!=0x0a && *str!=0x0d)
  3307. {
  3308. str++;
  3309. ps->savestr=str;
  3310. }
  3311. else
  3312. {
  3313. continue;
  3314. }
  3315. }
  3316. else
  3317. {
  3318. if (str<strend)
  3319. {
  3320. str++;
  3321. ps->savestr=str;
  3322. }
  3323. else
  3324. {
  3325. continue;
  3326. }
  3327. }
  3328. break;
  3329. }
  3330. case opSymbolMinRange:
  3331. {
  3332. if (!(ps->max--))continue;
  3333. str=ps->savestr;
  3334. op=ps->pos;
  3335. if (ignorecase)
  3336. {
  3337. if (str<strend && TOLOWER(*str)==OP.symbol)
  3338. {
  3339. str++;
  3340. ps->savestr=str;
  3341. }
  3342. else
  3343. {
  3344. continue;
  3345. }
  3346. }
  3347. else
  3348. {
  3349. if (str<strend && *str==OP.symbol)
  3350. {
  3351. str++;
  3352. ps->savestr=str;
  3353. }
  3354. else
  3355. {
  3356. continue;
  3357. }
  3358. }
  3359. break;
  3360. }
  3361. case opNotSymbolMinRange:
  3362. {
  3363. if (!(ps->max--))continue;
  3364. str=ps->savestr;
  3365. op=ps->pos;
  3366. if (ignorecase)
  3367. {
  3368. if (str<strend && TOLOWER(*str)!=OP.symbol)
  3369. {
  3370. str++;
  3371. ps->savestr=str;
  3372. }
  3373. else
  3374. {
  3375. continue;
  3376. }
  3377. }
  3378. else
  3379. {
  3380. if (str<strend && *str!=OP.symbol)
  3381. {
  3382. str++;
  3383. ps->savestr=str;
  3384. }
  3385. else
  3386. {
  3387. continue;
  3388. }
  3389. }
  3390. break;
  3391. }
  3392. case opClassMinRange:
  3393. {
  3394. if (!(ps->max--))continue;
  3395. str=ps->savestr;
  3396. op=ps->pos;
  3397. if (str<strend && GetBit(OP.range.symbolclass,*str))
  3398. {
  3399. str++;
  3400. ps->savestr=str;
  3401. }
  3402. else
  3403. {
  3404. continue;
  3405. }
  3406. break;
  3407. }
  3408. case opTypeMinRange:
  3409. {
  3410. if (!(ps->max--))continue;
  3411. str=ps->savestr;
  3412. op=ps->pos;
  3413. if (str<strend && ISTYPE(*str,OP.range.type))
  3414. {
  3415. str++;
  3416. ps->savestr=str;
  3417. }
  3418. else
  3419. {
  3420. continue;
  3421. }
  3422. break;
  3423. }
  3424. case opNotTypeMinRange:
  3425. {
  3426. if (!(ps->max--))continue;
  3427. str=ps->savestr;
  3428. op=ps->pos;
  3429. if (str<strend && !ISTYPE(*str,OP.range.type))
  3430. {
  3431. str++;
  3432. ps->savestr=str;
  3433. }
  3434. else
  3435. {
  3436. continue;
  3437. }
  3438. break;
  3439. }
  3440. #ifdef NAMEDBRACKETS
  3441. case opNamedRefMinRange:
  3442. #endif
  3443. case opBackRefMinRange:
  3444. {
  3445. if (!(ps->max--))continue;
  3446. str=ps->savestr;
  3447. op=ps->pos;
  3448. #ifdef NAMEDBRACKETS
  3449. if (ps->op==opBackRefMinRange)
  3450. {
  3451. m=&match[OP.range.refindex];
  3452. }
  3453. else
  3454. {
  3455. m=hmatch->GetPtr((char*)OP.range.refname);
  3456. }
  3457. #else
  3458. m=&match[OP.range.refindex];
  3459. #endif
  3460. if (str+m->end-m->start<strend && StrCmp(str,start+m->start,start+m->end))
  3461. {
  3462. ps->savestr=str;
  3463. }
  3464. else
  3465. {
  3466. continue;
  3467. }
  3468. break;
  3469. }
  3470. case opBracketRange:
  3471. {
  3472. if (ps->pos->range.bracket.index>=0 && brhandler)
  3473. {
  3474. if (
  3475. !brhandler
  3476. (
  3477. brhdata,
  3478. bhRollBack,
  3479. ps->pos->range.bracket.index,
  3480. -1,
  3481. -1
  3482. )
  3483. )
  3484. {
  3485. return -1;
  3486. }
  3487. }
  3488. if (ps->min)
  3489. {
  3490. inrangebracket--;
  3491. continue;
  3492. }
  3493. if (ps->forward)
  3494. {
  3495. ps->forward=0;
  3496. op=ps->pos->range.bracket.pairindex;
  3497. inrangebracket--;
  3498. str=ps->savestr;
  3499. if (OP.range.nextalt)
  3500. {
  3501. st->op=opAlternative;
  3502. st->pos=OP.range.bracket.nextalt;
  3503. st->savestr=str;
  3504. PushState();
  3505. }
  3506. // if(OP.bracket.index>=0 && OP.bracket.index<matchcount)
  3507. // {
  3508. // match[OP.bracket.index].end=str-start;
  3509. // }
  3510. break;
  3511. }
  3512. continue;
  3513. }
  3514. case opBracketMinRange:
  3515. {
  3516. if (ps->pos->range.bracket.index>=0 && brhandler)
  3517. {
  3518. if (
  3519. !brhandler
  3520. (
  3521. brhdata,
  3522. bhRollBack,
  3523. ps->pos->range.bracket.index,
  3524. -1,
  3525. -1
  3526. )
  3527. )
  3528. {
  3529. return -1;
  3530. }
  3531. }
  3532. if (!(ps->max--))
  3533. {
  3534. inrangebracket--;
  3535. continue;
  3536. }
  3537. if (ps->forward)
  3538. {
  3539. ps->forward=0;
  3540. op=ps->pos;
  3541. str=ps->savestr;
  3542. if (OP.range.bracket.index>=0 && OP.range.bracket.index<matchcount)
  3543. {
  3544. match[OP.range.bracket.index].start=(int)(str-start);
  3545. st->op=opOpenBracket;
  3546. st->pos=op;
  3547. st->min=match[OP.range.bracket.index].start;
  3548. st->max=match[OP.range.bracket.index].end;
  3549. PushState();
  3550. }
  3551. if (OP.range.nextalt)
  3552. {
  3553. st->op=opAlternative;
  3554. st->pos=OP.range.bracket.nextalt;
  3555. st->savestr=str;
  3556. PushState();
  3557. }
  3558. inrangebracket++;
  3559. break;
  3560. }
  3561. inrangebracket--;
  3562. continue;
  3563. }
  3564. case opOpenBracket:
  3565. {
  3566. j=ps->pos->bracket.index;
  3567. if (j>=0 && j<matchcount)
  3568. {
  3569. if (brhandler)
  3570. {
  3571. if (
  3572. !brhandler
  3573. (
  3574. brhdata,
  3575. bhRollBack,
  3576. j,
  3577. match[j].start,
  3578. match[j].end
  3579. )
  3580. )
  3581. {
  3582. return -1;
  3583. }
  3584. }
  3585. match[j].start=ps->min;
  3586. match[j].end=ps->max;
  3587. }
  3588. continue;
  3589. }
  3590. #ifdef NAMEDBRACKETS
  3591. case opNamedBracket:
  3592. {
  3593. prechar n=ps->pos->nbracket.name;
  3594. if (n && hmatch)
  3595. {
  3596. SMatch sm;
  3597. sm.start=ps->min;
  3598. sm.end=ps->max;
  3599. hmatch->SetItem((char*)n,sm);
  3600. }
  3601. continue;
  3602. }
  3603. #endif
  3604. case opLookAhead:
  3605. case opLookBehind:
  3606. {
  3607. continue;
  3608. }
  3609. case opNotLookBehind:
  3610. case opNotLookAhead:
  3611. {
  3612. op=ps->pos->assert.pairindex;
  3613. str=ps->savestr;
  3614. if (ps->forward)
  3615. {
  3616. PopState();
  3617. break;
  3618. }
  3619. else
  3620. {
  3621. continue;
  3622. }
  3623. }
  3624. case opNoReturn:
  3625. {
  3626. return 0;
  3627. }
  3628. #ifdef RELIB
  3629. case opLibCall:
  3630. {
  3631. op=ps->pos;
  3632. str=ps->savestr;
  3633. while (matchlist->Count()>ps->cnt)
  3634. {
  3635. matchlist->Pop(ml);
  3636. KillMatchList(ml.sublist);
  3637. ml.sublist=nullptr;
  3638. }
  3639. //PopState();
  3640. continue;
  3641. }
  3642. #endif
  3643. }//switch(op)
  3644. break;
  3645. }
  3646. }
  3647. return 1;
  3648. }
  3649. int RegExp::Match(const RECHAR* textstart,const RECHAR* textend,PMatch match,int& matchcount
  3650. #ifdef NAMEDBRACKETS
  3651. ,PMatchHash hmatch
  3652. #endif
  3653. )
  3654. {
  3655. start=(const prechar)textstart;
  3656. const prechar tempend=(const prechar)textend;
  3657. if (havefirst && !first[*start])return 0;
  3658. TrimTail(tempend);
  3659. if (tempend<start)return 0;
  3660. if (minlength && tempend-start<minlength)return 0;
  3661. int res=InnerMatch(start,tempend,match,matchcount
  3662. #ifdef NAMEDBRACKETS
  3663. ,hmatch
  3664. #endif
  3665. );
  3666. if (res==1)
  3667. {
  3668. for (int i=0; i<matchcount; i++)
  3669. {
  3670. if (match[i].start==-1 || match[i].end==-1 || match[i].start>match[i].end)
  3671. {
  3672. match[i].start=match[i].end=-1;
  3673. }
  3674. }
  3675. }
  3676. return res;
  3677. }
  3678. int RegExp::MatchEx(const RECHAR* datastart,const RECHAR* textstart,const RECHAR* textend,PMatch match,int& matchcount
  3679. #ifdef NAMEDBRACKETS
  3680. ,PMatchHash hmatch
  3681. #endif
  3682. )
  3683. {
  3684. if (havefirst && !first[(rechar)*textstart])return 0;
  3685. const prechar tempend=(const prechar)textend;
  3686. if ((prechar)datastart==start && (prechar)textend==end)
  3687. {
  3688. tempend=trimend;
  3689. }
  3690. else
  3691. {
  3692. start=(const prechar)datastart;
  3693. TrimTail(tempend);
  3694. trimend=tempend;
  3695. }
  3696. end=(const prechar)textend;
  3697. if (tempend<(const prechar)textstart)return 0;
  3698. if (minlength && tempend-start<minlength)return 0;
  3699. int res=InnerMatch((const prechar)textstart,tempend,match,matchcount
  3700. #ifdef NAMEDBRACKETS
  3701. ,hmatch
  3702. #endif
  3703. );
  3704. if (res==1)
  3705. {
  3706. for (int i=0; i<matchcount; i++)
  3707. {
  3708. if (match[i].start==-1 || match[i].end==-1 || match[i].start>match[i].end)
  3709. {
  3710. match[i].start=match[i].end=-1;
  3711. }
  3712. }
  3713. }
  3714. return res;
  3715. }
  3716. int RegExp::Match(const RECHAR* textstart,PMatch match,int& matchcount
  3717. #ifdef NAMEDBRACKETS
  3718. ,PMatchHash hmatch
  3719. #endif
  3720. )
  3721. {
  3722. const RECHAR* textend=textstart+strlen(textstart);
  3723. return Match(textstart,textend,match,matchcount
  3724. #ifdef NAMEDBRACKETS
  3725. ,hmatch
  3726. #endif
  3727. );
  3728. }
  3729. int RegExp::Optimize()
  3730. {
  3731. PREOpCode jumps[MAXDEPTH];
  3732. int jumpcount=0;
  3733. if (havefirst)return 1;
  3734. #ifdef UNICODE
  3735. first.Reset();
  3736. #else
  3737. memset(first,0,sizeof(first));
  3738. #endif
  3739. PREOpCode op;
  3740. minlength=0;
  3741. int mlstackmin[MAXDEPTH];
  3742. int mlstacksave[MAXDEPTH];
  3743. int mlscnt=0;
  3744. for (op=code; op; op=op->next)
  3745. {
  3746. switch (op->op)
  3747. {
  3748. case opType:
  3749. case opNotType:
  3750. case opCharAny:
  3751. case opCharAnyAll:
  3752. case opSymbol:
  3753. case opNotSymbol:
  3754. case opSymbolIgnoreCase:
  3755. case opNotSymbolIgnoreCase:
  3756. case opSymbolClass:
  3757. minlength++;
  3758. continue;
  3759. case opSymbolRange:
  3760. case opSymbolMinRange:
  3761. case opNotSymbolRange:
  3762. case opNotSymbolMinRange:
  3763. case opAnyRange:
  3764. case opAnyMinRange:
  3765. case opTypeRange:
  3766. case opTypeMinRange:
  3767. case opNotTypeRange:
  3768. case opNotTypeMinRange:
  3769. case opClassRange:
  3770. case opClassMinRange:
  3771. minlength+=op->range.min;
  3772. break;
  3773. #ifdef NAMEDBRACKETS
  3774. case opNamedBracket:
  3775. #endif
  3776. case opOpenBracket:
  3777. case opBracketRange:
  3778. case opBracketMinRange:
  3779. mlstacksave[mlscnt]=minlength;
  3780. mlstackmin[mlscnt++]=-1;
  3781. minlength=0;
  3782. continue;
  3783. case opClosingBracket:
  3784. {
  3785. if (op->bracket.pairindex->op>opAssertionsBegin &&
  3786. op->bracket.pairindex->op<opAsserionsEnd)
  3787. {
  3788. continue;
  3789. }
  3790. if (mlstackmin[mlscnt-1]!=-1 && mlstackmin[mlscnt-1]<minlength)
  3791. {
  3792. minlength=mlstackmin[mlscnt-1];
  3793. }
  3794. switch (op->bracket.pairindex->op)
  3795. {
  3796. case opBracketRange:
  3797. case opBracketMinRange:
  3798. minlength*=op->range.min;
  3799. break;
  3800. }
  3801. minlength+=mlstacksave[--mlscnt];
  3802. } continue;
  3803. case opAlternative:
  3804. {
  3805. if (mlstackmin[mlscnt-1]==-1)
  3806. {
  3807. mlstackmin[mlscnt-1]=minlength;
  3808. }
  3809. else
  3810. {
  3811. if (minlength<mlstackmin[mlscnt-1])
  3812. {
  3813. mlstackmin[mlscnt-1]=minlength;
  3814. }
  3815. }
  3816. minlength=0;
  3817. break;
  3818. }
  3819. case opLookAhead:
  3820. case opNotLookAhead:
  3821. case opLookBehind:
  3822. case opNotLookBehind:
  3823. {
  3824. op=op->assert.pairindex;
  3825. continue;
  3826. }
  3827. case opRegExpEnd:
  3828. op=0;
  3829. break;
  3830. }
  3831. if (!op)break;
  3832. }
  3833. dpf(("minlength=%d\n",minlength));
  3834. for (op=code;; op=op->next)
  3835. {
  3836. switch (OP.op)
  3837. {
  3838. default:
  3839. {
  3840. return 0;
  3841. }
  3842. case opType:
  3843. {
  3844. for (int i=0; i<RE_CHAR_COUNT; i++)if (ISTYPE(i,OP.type))first[i]=1;
  3845. break;
  3846. }
  3847. case opNotType:
  3848. {
  3849. for (int i=0; i<RE_CHAR_COUNT; i++)if (!(ISTYPE(i,OP.type)))first[i]=1;
  3850. break;
  3851. }
  3852. case opSymbol:
  3853. {
  3854. first[OP.symbol]=1;
  3855. break;
  3856. }
  3857. case opSymbolIgnoreCase:
  3858. {
  3859. first[OP.symbol]=1;
  3860. first[TOUPPER(OP.symbol)]=1;
  3861. break;
  3862. }
  3863. case opSymbolClass:
  3864. {
  3865. for (int i=0; i<RE_CHAR_COUNT; i++)
  3866. {
  3867. if (GetBit(OP.symbolclass,i))first[i]=1;
  3868. }
  3869. break;
  3870. }
  3871. #ifdef NAMEDBRACKETS
  3872. case opNamedBracket:
  3873. #endif
  3874. case opOpenBracket:
  3875. {
  3876. if (OP.bracket.nextalt)
  3877. {
  3878. jumps[jumpcount++]=OP.bracket.nextalt;
  3879. }
  3880. continue;
  3881. }
  3882. case opClosingBracket:
  3883. {
  3884. continue;
  3885. }
  3886. case opAlternative:
  3887. {
  3888. return 0;
  3889. }
  3890. case opSymbolRange:
  3891. case opSymbolMinRange:
  3892. {
  3893. if (ignorecase)
  3894. {
  3895. first[TOLOWER(OP.range.symbol)]=1;
  3896. first[TOUPPER(OP.range.symbol)]=1;
  3897. }
  3898. else
  3899. {
  3900. first[OP.range.symbol]=1;
  3901. }
  3902. if (!OP.range.min)continue;
  3903. break;
  3904. }
  3905. case opTypeRange:
  3906. case opTypeMinRange:
  3907. {
  3908. for (int i=0; i<RE_CHAR_COUNT; i++)
  3909. {
  3910. if (ISTYPE(i,OP.range.type))first[i]=1;
  3911. }
  3912. if (!OP.range.min)continue;
  3913. break;
  3914. }
  3915. case opNotTypeRange:
  3916. case opNotTypeMinRange:
  3917. {
  3918. for (int i=0; i<RE_CHAR_COUNT; i++)
  3919. {
  3920. if (!(ISTYPE(i,OP.range.type)))first[i]=1;
  3921. }
  3922. if (!OP.range.min)continue;
  3923. break;
  3924. }
  3925. case opClassRange:
  3926. case opClassMinRange:
  3927. {
  3928. for (int i=0; i<RE_CHAR_COUNT; i++)
  3929. {
  3930. if (GetBit(OP.range.symbolclass,i))first[i]=1;
  3931. }
  3932. if (!OP.range.min)continue;
  3933. break;
  3934. }
  3935. case opBracketRange:
  3936. case opBracketMinRange:
  3937. {
  3938. if (!OP.range.min)return 0;
  3939. if (OP.range.bracket.nextalt)
  3940. {
  3941. jumps[jumpcount++]=OP.range.bracket.nextalt;
  3942. }
  3943. continue;
  3944. }
  3945. //case opLookAhead:
  3946. //case opNotLookAhead:
  3947. //case opLookBehind:
  3948. //case opNotLookBehind:
  3949. case opRegExpEnd:return 0;
  3950. }
  3951. if (jumpcount>0)
  3952. {
  3953. op=jumps[--jumpcount];
  3954. if (OP.op==opAlternative && OP.alternative.nextalt)
  3955. {
  3956. jumps[jumpcount++]=OP.alternative.nextalt;
  3957. }
  3958. continue;
  3959. }
  3960. break;
  3961. }
  3962. havefirst=1;
  3963. return 1;
  3964. }
  3965. int RegExp::Search(const RECHAR* textstart,PMatch match,int& matchcount
  3966. #ifdef NAMEDBRACKETS
  3967. ,PMatchHash hmatch
  3968. #endif
  3969. )
  3970. {
  3971. const RECHAR* textend=textstart+strlen(textstart);
  3972. return Search(textstart,textend,match,matchcount
  3973. #ifdef NAMEDBRACKETS
  3974. ,hmatch
  3975. #endif
  3976. );
  3977. }
  3978. int RegExp::Search(const RECHAR* textstart,const RECHAR* textend,PMatch match,int& matchcount
  3979. #ifdef NAMEDBRACKETS
  3980. ,PMatchHash hmatch
  3981. #endif
  3982. )
  3983. {
  3984. start=(const prechar)textstart;
  3985. const prechar str=start;
  3986. const prechar tempend=(prechar)textend;
  3987. TrimTail(tempend);
  3988. if (tempend<start)return 0;
  3989. if (minlength && tempend-start<minlength)return 0;
  3990. if (!code->bracket.nextalt && code->next->op==opDataStart)
  3991. {
  3992. return InnerMatch(start,tempend,match,matchcount
  3993. #ifdef NAMEDBRACKETS
  3994. ,hmatch
  3995. #endif
  3996. );
  3997. }
  3998. if (!code->bracket.nextalt && code->next->op==opDataEnd && code->next->next->op==opClosingBracket)
  3999. {
  4000. matchcount=1;
  4001. match[0].start=(int)(textend-textstart);
  4002. match[0].end=match[0].start;
  4003. return 1;
  4004. }
  4005. int res=0;
  4006. if (havefirst)
  4007. {
  4008. do
  4009. {
  4010. while (!first[*str] && str<tempend)str++;
  4011. if (0!=(res=InnerMatch(str,tempend,match,matchcount
  4012. #ifdef NAMEDBRACKETS
  4013. ,hmatch
  4014. #endif
  4015. )))
  4016. {
  4017. break;
  4018. }
  4019. str++;
  4020. }
  4021. while (str<tempend);
  4022. if (!res && InnerMatch(str,tempend,match,matchcount
  4023. #ifdef NAMEDBRACKETS
  4024. ,hmatch
  4025. #endif
  4026. ))
  4027. {
  4028. res=1;
  4029. }
  4030. }
  4031. else
  4032. {
  4033. do
  4034. {
  4035. if (0!=(res=InnerMatch(str,tempend,match,matchcount
  4036. #ifdef NAMEDBRACKETS
  4037. ,hmatch
  4038. #endif
  4039. )))
  4040. {
  4041. break;
  4042. }
  4043. str++;
  4044. }
  4045. while (str<=tempend);
  4046. }
  4047. if (res==1)
  4048. {
  4049. for (int i=0; i<matchcount; i++)
  4050. {
  4051. if (match[i].start==-1 || match[i].end==-1 || match[i].start>match[i].end)
  4052. {
  4053. match[i].start=match[i].end=-1;
  4054. }
  4055. }
  4056. }
  4057. return res;
  4058. }
  4059. int RegExp::SearchEx(const RECHAR* datastart,const RECHAR* textstart,const RECHAR* textend,PMatch match,int& matchcount
  4060. #ifdef NAMEDBRACKETS
  4061. ,PMatchHash hmatch
  4062. #endif
  4063. )
  4064. {
  4065. start=(const prechar)datastart;
  4066. const prechar str=(const prechar)textstart;
  4067. const prechar tempend=(const prechar)textend;
  4068. TrimTail(tempend);
  4069. if (tempend<start)return 0;
  4070. if (minlength && tempend-start<minlength)return 0;
  4071. if (!code->bracket.nextalt && code->next->op==opDataStart)
  4072. {
  4073. return InnerMatch(str,tempend,match,matchcount
  4074. #ifdef NAMEDBRACKETS
  4075. ,hmatch
  4076. #endif
  4077. );
  4078. }
  4079. if (!code->bracket.nextalt && code->next->op==opDataEnd && code->next->next->op==opClosingBracket)
  4080. {
  4081. matchcount=1;
  4082. match[0].start=(int)(textend-datastart);
  4083. match[0].end=match[0].start;
  4084. return 1;
  4085. }
  4086. int res=0;
  4087. if (havefirst)
  4088. {
  4089. do
  4090. {
  4091. while (!first[*str] && str<tempend)str++;
  4092. if (0!=(res=InnerMatch(str,tempend,match,matchcount
  4093. #ifdef NAMEDBRACKETS
  4094. ,hmatch
  4095. #endif
  4096. )))
  4097. {
  4098. break;
  4099. }
  4100. str++;
  4101. }
  4102. while (str<tempend);
  4103. if (!res && InnerMatch(str,tempend,match,matchcount
  4104. #ifdef NAMEDBRACKETS
  4105. ,hmatch
  4106. #endif
  4107. ))
  4108. {
  4109. res=1;
  4110. }
  4111. }
  4112. else
  4113. {
  4114. do
  4115. {
  4116. if (0!=(res=InnerMatch(str,tempend,match,matchcount
  4117. #ifdef NAMEDBRACKETS
  4118. ,hmatch
  4119. #endif
  4120. )))
  4121. {
  4122. break;
  4123. }
  4124. str++;
  4125. }
  4126. while (str<=tempend);
  4127. }
  4128. if (res==1)
  4129. {
  4130. for (int i=0; i<matchcount; i++)
  4131. {
  4132. if (match[i].start==-1 || match[i].end==-1 || match[i].start>match[i].end)
  4133. {
  4134. match[i].start=match[i].end=-1;
  4135. }
  4136. }
  4137. }
  4138. return res;
  4139. }
  4140. void RegExp::TrimTail(const prechar& strend)
  4141. {
  4142. if (havelookahead)return;
  4143. if (!code || code->bracket.nextalt)return;
  4144. PREOpCode op=code->bracket.pairindex->prev;
  4145. while (OP.op==opClosingBracket)
  4146. {
  4147. if (OP.bracket.pairindex->op!=opOpenBracket)return;
  4148. if (OP.bracket.pairindex->bracket.nextalt)return;
  4149. op=op->prev;
  4150. }
  4151. strend--;
  4152. switch (OP.op)
  4153. {
  4154. case opSymbol:
  4155. {
  4156. while (strend>=start && *strend!=OP.symbol)strend--;
  4157. break;
  4158. }
  4159. case opNotSymbol:
  4160. {
  4161. while (strend>=start && *strend==OP.symbol)strend--;
  4162. break;
  4163. }
  4164. case opSymbolIgnoreCase:
  4165. {
  4166. while (strend>=start && TOLOWER(*strend)!=OP.symbol)strend--;
  4167. break;
  4168. }
  4169. case opNotSymbolIgnoreCase:
  4170. {
  4171. while (strend>=start && TOLOWER(*strend)==OP.symbol)strend--;
  4172. break;
  4173. }
  4174. case opType:
  4175. {
  4176. while (strend>=start && !(ISTYPE(*strend,OP.type)))strend--;
  4177. break;
  4178. }
  4179. case opNotType:
  4180. {
  4181. while (strend>=start && ISTYPE(*strend,OP.type))strend--;
  4182. break;
  4183. }
  4184. case opSymbolClass:
  4185. {
  4186. while (strend>=start && !GetBit(OP.symbolclass,*strend))strend--;
  4187. break;
  4188. }
  4189. case opSymbolRange:
  4190. case opSymbolMinRange:
  4191. {
  4192. if (!OP.range.min)break;
  4193. if (ignorecase)
  4194. {
  4195. while (strend>=start && TOLOWER(*strend)!=OP.range.symbol)strend--;
  4196. }
  4197. else
  4198. {
  4199. while (strend>=start && *strend!=OP.range.symbol)strend--;
  4200. }
  4201. break;
  4202. }
  4203. case opNotSymbolRange:
  4204. case opNotSymbolMinRange:
  4205. {
  4206. if (!OP.range.min)break;
  4207. if (ignorecase)
  4208. {
  4209. while (strend>=start && TOLOWER(*strend)==OP.range.symbol)strend--;
  4210. }
  4211. else
  4212. {
  4213. while (strend>=start && *strend==OP.range.symbol)strend--;
  4214. }
  4215. break;
  4216. }
  4217. case opTypeRange:
  4218. case opTypeMinRange:
  4219. {
  4220. if (!OP.range.min)break;
  4221. while (strend>=start && !(ISTYPE(*strend,OP.range.type)))strend--;
  4222. break;
  4223. }
  4224. case opNotTypeRange:
  4225. case opNotTypeMinRange:
  4226. {
  4227. if (!OP.range.min)break;
  4228. while (strend>=start && ISTYPE(*strend,OP.range.type))strend--;
  4229. break;
  4230. }
  4231. case opClassRange:
  4232. case opClassMinRange:
  4233. {
  4234. if (!OP.range.min)break;
  4235. while (strend>=start && !GetBit(OP.range.symbolclass,*strend))strend--;
  4236. break;
  4237. }
  4238. default:break;
  4239. }
  4240. strend++;
  4241. }
  4242. void RegExp::CleanStack()
  4243. {
  4244. PStateStackPage tmp=firstpage->next,tmp2;
  4245. while (tmp)
  4246. {
  4247. tmp2=tmp->next;
  4248. #ifdef RE_NO_NEWARRAY
  4249. DeleteArray(reinterpret_cast<void**>(&tmp->stack),nullptr);
  4250. #else
  4251. delete [] tmp->stack;
  4252. #endif // RE_NO_NEWARRAY
  4253. delete tmp;
  4254. tmp=tmp2;
  4255. }
  4256. }
  4257. #ifndef UNICODE
  4258. void RegExp::SetLocaleInfo(prechar newlc,prechar newuc,prechar newchartypes
  4259. #if defined(RE_EXTERNAL_CTYPE)
  4260. , prechar newcharbits
  4261. #endif
  4262. )
  4263. {
  4264. #ifndef RE_EXTERNAL_CTYPE
  4265. memcpy(lc,newlc,256);
  4266. memcpy(uc,newuc,256);
  4267. memcpy(chartypes,newchartypes,256);
  4268. #else
  4269. lc=newlc;
  4270. uc=newuc;
  4271. chartypes=newchartypes;
  4272. #endif
  4273. int i,j=0,k=1;
  4274. memset(charbits,0,sizeof(charbits));
  4275. for (i=0; i<256; i++)
  4276. {
  4277. if (ISDIGIT(i)) {charbits[j]|=k;}
  4278. if (ISSPACE(i)) {charbits[32+j]|=k;}
  4279. if (ISWORD(i)) {charbits[64+j]|=k;}
  4280. if (ISLOWER(i)) {charbits[96+j]|=k;}
  4281. if (ISUPPER(i)) {charbits[128+j]|=k;}
  4282. if (ISALPHA(i)) {charbits[160+j]|=k;}
  4283. k<<=1;
  4284. if (k==256) {k=1; j++;}
  4285. }
  4286. }
  4287. #endif //UNICODE
  4288. #ifdef RELIB
  4289. int RELibMatch(RELib& relib,MatchList& ml,const char* name,const char* start)
  4290. {
  4291. return RELibMatch(relib,ml,name,start,start+strlen((char*)start));
  4292. }
  4293. int RELibMatch(RELib& relib,MatchList& ml,const char* name,const char* start,const char* end)
  4294. {
  4295. char* k;
  4296. RegExp *re;
  4297. relib.First();
  4298. while (relib.Next(k,re))
  4299. {
  4300. re->ResetRecursion();
  4301. }
  4302. if (!relib.Exists((char*)name))return 0;
  4303. int cnt=relib[(char*)name]->GetBracketsCount();
  4304. PMatch m=new SMatch[cnt];
  4305. PMatchList pml=new MatchList;
  4306. SMatchListItem li;
  4307. li.name=(const prechar)name;
  4308. li.sublist=pml;
  4309. li.start=0;
  4310. ml.Append(li);
  4311. ml.parent=nullptr;
  4312. pml->parent=&ml;
  4313. relib[(char*)name]->SetMatchList(pml);
  4314. #ifdef NAMEDBRACKETS
  4315. MatchHash h;
  4316. #endif
  4317. int res=relib[(char*)name]->Match(start,end,m,cnt
  4318. #ifdef NAMEDBRACKETS
  4319. ,&h
  4320. #endif
  4321. );
  4322. ml.First().Get().start=m[0].start;
  4323. ml.First().Get().end=m[0].end;
  4324. delete [] m;
  4325. return res;
  4326. }
  4327. #endif