PageRenderTime 74ms CodeModel.GetById 35ms RepoModel.GetById 1ms app.codeStats 0ms

/unicode_far/RegExp.cpp

https://bitbucket.org/regent/farmanager
C++ | 5054 lines | 4201 code | 724 blank | 129 comment | 755 complexity | f22272cb582641abcd3d3fd99c498d1c MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. Copyright Š 2000 Konstantin Stupnik
  3. Copyright Š 2008 Far Group
  4. All rights reserved.
  5. Redistribution and use in source and binary forms, with or without
  6. modification, are permitted provided that the following conditions
  7. are met:
  8. 1. Redistributions of source code must retain the above copyright
  9. notice, this list of conditions and the following disclaimer.
  10. 2. Redistributions in binary form must reproduce the above copyright
  11. notice, this list of conditions and the following disclaimer in the
  12. documentation and/or other materials provided with the distribution.
  13. 3. The name of the authors may not be used to endorse or promote products
  14. derived from this software without specific prior written permission.
  15. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  16. IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  17. OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  18. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  19. INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  20. NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  24. THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. Regular expressions support library.
  26. Syntax and semantics of regexps very close to
  27. syntax and semantics of perl regexps.
  28. */
  29. #include "headers.hpp"
  30. #pragma hdrstop
  31. #include "RegExp.hpp"
  32. #ifndef RE_FAR_MODE
  33. #ifndef UNICODE
  34. #ifndef RE_EXTERNAL_CTYPE
  35. #include <ctype.h>
  36. #endif
  37. #else
  38. #ifndef __LINUX
  39. #include <windows.h>
  40. #endif
  41. #endif
  42. #ifndef RE_NO_STRING_H
  43. #include <string.h>
  44. #endif
  45. #else
  46. #define malloc xf_malloc
  47. #define free xf_free
  48. #endif
  49. #ifdef RE_DEBUG
  50. #include <stdio.h>
  51. #ifdef dpf
  52. #undef dpf
  53. #endif
  54. #define dpf(x) printf x
  55. char *ops[]=
  56. {
  57. "opNone",
  58. "opLineStart",
  59. "opLineEnd",
  60. "opDataStart",
  61. "opDataEnd",
  62. "opWordBound",
  63. "opNotWordBound",
  64. "opType",
  65. "opNotType",
  66. "opCharAny",
  67. "opCharAnyAll",
  68. "opSymbol",
  69. "opNotSymbol",
  70. "opSymbolIgnoreCase",
  71. "opNotSymbolIgnoreCase",
  72. "opSymbolClass",
  73. "opOpenBracket",
  74. "opClosingBracket",
  75. "opAlternative",
  76. "opBackRef",
  77. #ifdef NAMEDBRACKETS
  78. "opNamedBracket",
  79. "opNamedBackRef",
  80. #endif
  81. "opRangesBegin",
  82. "opRange",
  83. "opMinRange",
  84. "opSymbolRange",
  85. "opSymbolMinRange",
  86. "opNotSymbolRange",
  87. "opNotSymbolMinRange",
  88. "opAnyRange",
  89. "opAnyMinRange",
  90. "opTypeRange",
  91. "opTypeMinRange",
  92. "opNotTypeRange",
  93. "opNotTypeMinRange",
  94. "opClassRange",
  95. "opClassMinRange",
  96. "opBracketRange",
  97. "opBracketMinRange",
  98. "opBackRefRange",
  99. "opBackRefMinRange",
  100. #ifdef NAMEDBRACKETS
  101. "opNamedRefRange",
  102. "opNamedRefMinRange",
  103. #endif
  104. "opRangesEnd",
  105. "opAssertionsBegin",
  106. "opLookAhead",
  107. "opNotLookAhead",
  108. "opLookBehind",
  109. "opNotLookBehind",
  110. "opAsserionsEnd",
  111. "opNoReturn",
  112. #ifdef RELIB
  113. "opLibCall",
  114. #endif
  115. "opRegExpEnd",
  116. };
  117. #else
  118. #define dpf(x)
  119. #endif
  120. #ifndef UNICODE
  121. #ifdef RE_STATIC_LOCALE
  122. #ifdef RE_EXTERNAL_CTYPE
  123. prechar RegExp::lc;
  124. prechar RegExp::uc;
  125. prechar RegExp::chartypes;
  126. #else
  127. int RegExp::ilc[256/sizeof(int)];
  128. int RegExp::iuc[256/sizeof(int)];
  129. int RegExp::ichartypes[256/sizeof(int)];
  130. rechar* RegExp::lc=(rechar*)RegExp::ilc;
  131. rechar* RegExp::uc=(rechar*)RegExp::iuc;
  132. rechar* RegExp::chartypes=(rechar*)RegExp::ichartypes;
  133. #endif
  134. int RegExp::icharbits[256/sizeof(int)];
  135. rechar* RegExp::charbits=(rechar*)RegExp::icharbits;
  136. #endif
  137. #endif
  138. #ifdef UNICODE
  139. #ifndef __LINUX
  140. #define ISDIGIT(c) iswdigit(c)
  141. #define ISSPACE(c) iswspace(c)
  142. #define ISWORD(c) (IsCharAlphaNumeric(c) || c=='_')
  143. #define ISLOWER(c) IsCharLower(c)
  144. #define ISUPPER(c) IsCharUpper(c)
  145. #define ISALPHA(c) IsCharAlpha(c)
  146. #define TOUPPER(c) ((rechar)(DWORD_PTR)CharUpper((LPTSTR)(DWORD_PTR)c))
  147. #define TOLOWER(c) ((rechar)(DWORD_PTR)CharLower((LPTSTR)(DWORD_PTR)c))
  148. #else
  149. #define ISDIGIT(c) iswdigit(c)
  150. #define ISSPACE(c) iswspace(c)
  151. #define ISWORD(c) (iswalnum(c) || c=='_')
  152. #define ISLOWER(c) iswlower(c)
  153. #define ISUPPER(c) iswupper(c)
  154. #define ISALPHA(c) iswalpha(c)
  155. #define TOUPPER(c) towupper(c)
  156. #define TOLOWER(c) towlower(c)
  157. #endif
  158. #define ISTYPE(c,t) isType(c,t)
  159. int isType(rechar chr,int type)
  160. {
  161. switch (type)
  162. {
  163. case TYPE_DIGITCHAR:return ISDIGIT(chr);
  164. case TYPE_SPACECHAR:return ISSPACE(chr);
  165. case TYPE_WORDCHAR: return ISWORD(chr);
  166. case TYPE_LOWCASE: return ISLOWER(chr);
  167. case TYPE_UPCASE: return ISUPPER(chr);
  168. case TYPE_ALPHACHAR:return ISALPHA(chr);
  169. }
  170. return false;
  171. }
  172. int ushlen(const rechar* str)
  173. {
  174. rechar ch;
  175. int len = -1;
  176. do
  177. {
  178. ch = str[len+1];
  179. len++;
  180. }
  181. while (ch);
  182. return len;
  183. }
  184. #define strlen ushlen
  185. struct UniSet
  186. {
  187. unsigned char* high[256];
  188. char types;
  189. char nottypes;
  190. char negative;
  191. UniSet()
  192. {
  193. ClearArray(high);
  194. types=0;
  195. nottypes=0;
  196. negative=0;
  197. }
  198. UniSet(const UniSet& src)
  199. {
  200. for (int i=0; i<256; i++)
  201. {
  202. if (src.high[i])
  203. {
  204. high[i]=new unsigned char[32];
  205. memcpy(high[i],src.high[i],32);
  206. }
  207. else
  208. {
  209. high[i]=nullptr;
  210. }
  211. }
  212. types=src.types;
  213. nottypes=src.nottypes;
  214. negative=src.negative;
  215. }
  216. UniSet& operator=(const UniSet& src)
  217. {
  218. if (this != &src)
  219. {
  220. for (int i=0; i<256; i++)
  221. {
  222. if (src.high[i])
  223. {
  224. if (!high[i])high[i]=new unsigned char[32];
  225. memcpy(high[i],src.high[i],32);
  226. }
  227. else
  228. {
  229. if (high[i])delete [] high[i];
  230. high[i]=nullptr;
  231. }
  232. }
  233. types=src.types;
  234. nottypes=src.nottypes;
  235. negative=src.negative;
  236. }
  237. return (*this);
  238. }
  239. void Reset()
  240. {
  241. for (int i=0; i<256; i++)
  242. {
  243. if (high[i])
  244. {
  245. delete [] high[i];
  246. high[i]=0;
  247. }
  248. }
  249. types=0;
  250. nottypes=0;
  251. negative=0;
  252. }
  253. struct Setter
  254. {
  255. UniSet& set;
  256. rechar idx;
  257. Setter(UniSet& s,rechar chr):set(s),idx(chr)
  258. {
  259. }
  260. void operator=(int val)
  261. {
  262. if (val)set.SetBit(idx);
  263. else set.ClearBit(idx);
  264. }
  265. bool operator!()const
  266. {
  267. return !set.GetBit(idx);
  268. }
  269. };
  270. const bool operator[](rechar idx)const
  271. {
  272. return GetBit(idx);
  273. }
  274. Setter operator[](rechar idx)
  275. {
  276. return Setter(*this,idx);
  277. }
  278. ~UniSet()
  279. {
  280. for (int i=0; i<256; i++)
  281. {
  282. if (high[i])delete [] high[i];
  283. }
  284. }
  285. bool CheckType(int t, rechar chr) const
  286. {
  287. switch (t)
  288. {
  289. case TYPE_DIGITCHAR:if (ISDIGIT(chr))return true; else break;
  290. case TYPE_SPACECHAR:if (ISSPACE(chr))return true; else break;
  291. case TYPE_WORDCHAR: if (ISWORD(chr)) return true; else break;
  292. case TYPE_LOWCASE: if (ISLOWER(chr))return true; else break;
  293. case TYPE_UPCASE: if (ISUPPER(chr))return true; else break;
  294. case TYPE_ALPHACHAR:if (ISALPHA(chr))return true; else break;
  295. }
  296. return false;
  297. }
  298. bool GetBit(rechar chr) const
  299. {
  300. if (types)
  301. {
  302. int t=TYPE_ALPHACHAR;
  303. while (t)
  304. {
  305. if (types&t)
  306. {
  307. if (CheckType(t,chr))
  308. return negative?false:true;
  309. }
  310. t>>=1;
  311. }
  312. }
  313. if (nottypes)
  314. {
  315. int t=TYPE_ALPHACHAR;
  316. while (t)
  317. {
  318. if (nottypes&t)
  319. {
  320. if (!CheckType(t,chr))
  321. return negative?false:true;
  322. }
  323. t>>=1;
  324. }
  325. }
  326. unsigned char h=(chr&0xff00)>>8;
  327. if (!high[h]) return negative?true:false;
  328. if (((high[h][(chr&0xff)>>3]&(1<<(chr&7)))?1:0))
  329. {
  330. return negative?false:true;
  331. }
  332. return negative?true:false;
  333. }
  334. void SetBit(rechar chr)
  335. {
  336. unsigned char h=(chr&0xff00)>>8;
  337. if (!high[h])
  338. {
  339. high[h]=new unsigned char[32];
  340. memset(high[h],0,32);
  341. }
  342. high[h][(chr&0xff)>>3]|=1<<(chr&7);
  343. }
  344. void ClearBit(rechar chr)
  345. {
  346. unsigned char h=(chr&0xff00)>>8;
  347. if (!high[h])
  348. {
  349. high[h]=new unsigned char[32];
  350. memset(high[h],0,32);
  351. }
  352. high[h][(chr&0xff)>>3]&=~(1<<(chr&7));
  353. }
  354. };
  355. #define GetBit(cls,chr) cls->GetBit(chr)
  356. #define SetBit(cls,chr) cls->SetBit(chr)
  357. #else
  358. #define ISDIGIT(c) ((chartypes[c]&TYPE_DIGITCHAR))
  359. #define ISSPACE(c) ((chartypes[c]&TYPE_SPACECHAR))
  360. #define ISWORD(c) ((chartypes[c]&TYPE_WORDCHAR))
  361. #define ISLOWER(c) ((chartypes[c]&TYPE_LOWCASE))
  362. #define ISUPPER(c) ((chartypes[c]&TYPE_UPCASE))
  363. #define ISALPHA(c) ((chartypes[c]&TYPE_ALPHACHAR))
  364. #define TOUPPER(c) uc[c]
  365. #define TOLOWER(c) lc[c]
  366. #define ISTYPE(c,t) (chartypes[c]&t)
  367. #endif //UNICODE
  368. enum REOp
  369. {
  370. opLineStart=0x1, // ^
  371. opLineEnd, // $
  372. opDataStart, // \A and ^ in single line mode
  373. opDataEnd, // \Z and $ in signle line mode
  374. opWordBound, // \b
  375. opNotWordBound, // \B
  376. opType, // \d\s\w\l\u\e
  377. opNotType, // \D\S\W\L\U\E
  378. opCharAny, // .
  379. opCharAnyAll, // . in single line mode
  380. opSymbol, // single char
  381. opNotSymbol, // [^c] negative charclass with one char
  382. opSymbolIgnoreCase, // symbol with IGNORE_CASE turned on
  383. opNotSymbolIgnoreCase, // [^c] with ignore case set.
  384. opSymbolClass, // [chars]
  385. opOpenBracket, // (
  386. opClosingBracket, // )
  387. opAlternative, // |
  388. opBackRef, // \1
  389. #ifdef NAMEDBRACKETS
  390. opNamedBracket, // (?{name}
  391. opNamedBackRef, // \p{name}
  392. #endif
  393. opRangesBegin, // for op type check
  394. opRange, // generic range
  395. opMinRange, // generic minimizing range
  396. opSymbolRange, // quantifier applied to single char
  397. opSymbolMinRange, // minimizing quantifier
  398. opNotSymbolRange, // [^x]
  399. opNotSymbolMinRange,
  400. opAnyRange, // .
  401. opAnyMinRange,
  402. opTypeRange, // \w, \d, \s
  403. opTypeMinRange,
  404. opNotTypeRange, // \W, \D, \S
  405. opNotTypeMinRange,
  406. opClassRange, // for char classes
  407. opClassMinRange,
  408. opBracketRange, // for brackets
  409. opBracketMinRange,
  410. opBackRefRange, // for backrefs
  411. opBackRefMinRange,
  412. #ifdef NAMEDBRACKETS
  413. opNamedRefRange,
  414. opNamedRefMinRange,
  415. #endif
  416. opRangesEnd, // end of ranges
  417. opAssertionsBegin,
  418. opLookAhead,
  419. opNotLookAhead,
  420. opLookBehind,
  421. opNotLookBehind,
  422. opAsserionsEnd,
  423. opNoReturn,
  424. #ifdef RELIB
  425. opLibCall,
  426. #endif
  427. opRegExpEnd
  428. };
  429. struct REOpCode
  430. {
  431. int op;
  432. REOpCode *next,*prev;
  433. #ifdef RE_DEBUG
  434. int srcpos;
  435. #endif
  436. #ifdef RE_NO_NEWARRAY
  437. static void OnCreate(void *ptr);
  438. static void OnDelete(void *ptr);
  439. #else
  440. REOpCode()
  441. {
  442. ClearStruct(*this);
  443. }
  444. ~REOpCode();
  445. #endif
  446. struct SBracket
  447. {
  448. REOpCode* nextalt;
  449. int index;
  450. REOpCode* pairindex;
  451. };
  452. struct SRange
  453. {
  454. union
  455. {
  456. SBracket bracket;
  457. int op;
  458. rechar symbol;
  459. #ifdef UNICODE
  460. UniSet *symbolclass;
  461. #else
  462. prechar symbolclass;
  463. #endif
  464. REOpCode* nextalt;
  465. int refindex;
  466. #ifdef NAMEDBRACKETS
  467. prechar refname;
  468. #endif
  469. int type;
  470. };
  471. int min,max;
  472. };
  473. struct SNamedBracket
  474. {
  475. REOpCode* nextalt;
  476. prechar name;
  477. REOpCode* pairindex;
  478. };
  479. struct SAssert
  480. {
  481. REOpCode* nextalt;
  482. int length;
  483. REOpCode* pairindex;
  484. };
  485. struct SAlternative
  486. {
  487. REOpCode* nextalt;
  488. REOpCode* endindex;
  489. };
  490. union
  491. {
  492. SRange range;
  493. SBracket bracket;
  494. #ifdef NAMEDBRACKETS
  495. SNamedBracket nbracket;
  496. #endif
  497. SAssert assert;
  498. SAlternative alternative;
  499. rechar symbol;
  500. #ifdef UNICODE
  501. UniSet *symbolclass;
  502. #else
  503. prechar symbolclass;
  504. #endif
  505. int refindex;
  506. #ifdef NAMEDBRACKETS
  507. prechar refname;
  508. #endif
  509. #ifdef RELIB
  510. prechar rename;
  511. #endif
  512. int type;
  513. };
  514. };
  515. #ifdef RE_NO_NEWARRAY
  516. void StateStackItem::OnCreate(void *ptr)
  517. {
  518. memset(ptr,0,sizeof(StateStackItem));
  519. }
  520. void REOpCode::OnCreate(void *ptr)
  521. {
  522. memset(ptr,0,sizeof(REOpCode));
  523. }
  524. void REOpCode::OnDelete(void *ptr)
  525. {
  526. REOpCode &o=*static_cast<REOpCode*>(ptr);
  527. switch (o.op)
  528. {
  529. case opSymbolClass:
  530. if (o.symbolclass)
  531. free(o.symbolclass);
  532. break;
  533. case opClassRange:
  534. case opClassMinRange:
  535. if (o.range.symbolclass)
  536. free(o.range.symbolclass);
  537. break;
  538. #ifdef NAMEDBRACKETS
  539. case opNamedBracket:
  540. if (o.nbracket.name)
  541. free(o.nbracket.name);
  542. break;
  543. case opNamedBackRef:
  544. if (o.refname)
  545. free(o.refname);
  546. break;
  547. #endif
  548. #ifdef RELIB
  549. case opLibCall:
  550. if (o.rename)
  551. free(o.rename);
  552. break;
  553. #endif
  554. }
  555. }
  556. void *RegExp::CreateArray(const unsigned int size, const unsigned int total,
  557. ON_CREATE_FUNC Create)
  558. {
  559. if (total && size)
  560. {
  561. /* record[0] - sizeof
  562. record[1] - total
  563. record[2] - array
  564. */
  565. unsigned char *record=static_cast<unsigned char*>
  566. (malloc(sizeof(unsigned int)*2+size*total));
  567. if (record)
  568. {
  569. unsigned char *array=record+2*sizeof(unsigned int);
  570. *reinterpret_cast<int*>(record)=size;
  571. *reinterpret_cast<int*>(record+sizeof(unsigned int))=total;
  572. if (Create)
  573. for (unsigned int f=0; f<total; ++f)
  574. Create(array+size*f);
  575. return array;
  576. }
  577. }
  578. return nullptr;
  579. }
  580. void RegExp::DeleteArray(void **array, ON_DELETE_FUNC Delete)
  581. {
  582. if (array && *array)
  583. {
  584. unsigned char *record=reinterpret_cast<unsigned char*>(*array)-
  585. 2*sizeof(unsigned int);
  586. if (Delete)
  587. {
  588. unsigned char *m=static_cast<unsigned char*>(*array);
  589. unsigned int size=*reinterpret_cast<int*>(record),
  590. total=*reinterpret_cast<int*>(record+sizeof(unsigned int));
  591. for (unsigned int f=0; f<total; ++f)
  592. Delete(m+size*f);
  593. }
  594. free(record);
  595. *array=nullptr;
  596. }
  597. }
  598. #else // RE_NO_NEWARRAY
  599. REOpCode::~REOpCode()
  600. {
  601. switch (op)
  602. {
  603. #ifdef UNICODE
  604. case opSymbolClass:delete symbolclass; break;
  605. #else
  606. case opSymbolClass:delete [] symbolclass; break;
  607. #endif
  608. #ifdef UNICODE
  609. case opClassRange:
  610. case opClassMinRange:delete range.symbolclass; break;
  611. #else
  612. case opClassRange:
  613. case opClassMinRange:delete [] range.symbolclass; break;
  614. #endif
  615. #ifdef NAMEDBRACKETS
  616. case opNamedBracket:delete [] nbracket.name; break;
  617. case opNamedBackRef:delete [] refname; break;
  618. #endif
  619. #ifdef RELIB
  620. case opLibCall:delete [] rename; break;
  621. #endif
  622. }
  623. }
  624. #endif // RE_NO_NEWARRAY
  625. void RegExp::Init(const prechar expr,int options)
  626. {
  627. //ClearStruct(*this);
  628. code=nullptr;
  629. brhandler=nullptr;
  630. brhdata=nullptr;
  631. #ifndef UNICODE
  632. #ifndef RE_STATIC_LOCALE
  633. #ifndef RE_EXTERNAL_CTYPE
  634. InitLocale();
  635. #endif //RE_EXTERNAL_CTYPE
  636. #endif//RE_STATIC_LOCALE
  637. #endif //UNICODE
  638. #ifdef NAMEDBRACKETS
  639. havenamedbrackets=0;
  640. #endif
  641. stack=&initstack[0];
  642. st=&stack[0];
  643. initstackpage.stack=stack;
  644. firstpage=lastpage=&initstackpage;
  645. firstpage->next=nullptr;
  646. firstpage->prev=nullptr;
  647. #ifdef UNICODE
  648. firstptr=new UniSet();
  649. #define first (*firstptr)
  650. #endif
  651. start=nullptr;
  652. end=nullptr;
  653. trimend=nullptr;
  654. Compile((const RECHAR*)expr,options);
  655. }
  656. RegExp::RegExp():
  657. code(nullptr),
  658. #ifdef NAMEDBRACKETS
  659. havenamedbrackets(0),
  660. #endif
  661. stack(&initstack[0]),
  662. st(&stack[0]),
  663. slashChar('/'),
  664. backslashChar('\\'),
  665. firstpage(&initstackpage),
  666. lastpage(&initstackpage),
  667. #ifdef UNICODE
  668. firstptr(new UniSet()),
  669. #endif
  670. errorcode(errNotCompiled),
  671. start(nullptr),
  672. end(nullptr),
  673. trimend(nullptr),
  674. #ifdef RE_DEBUG
  675. resrc(nullptr),
  676. #endif
  677. brhandler(nullptr),
  678. brhdata(nullptr)
  679. {
  680. #ifndef UNICODE
  681. #ifndef RE_STATIC_LOCALE
  682. #ifndef RE_EXTERNAL_CTYPE
  683. InitLocale();
  684. #endif
  685. #endif
  686. #endif//UNICODE
  687. initstackpage.stack=stack;
  688. firstpage->next=nullptr;
  689. firstpage->prev=nullptr;
  690. }
  691. RegExp::RegExp(const RECHAR* expr,int options)
  692. {
  693. slashChar='/';
  694. backslashChar='\\';
  695. #ifdef RE_DEBUG
  696. resrc=nullptr;
  697. #endif
  698. Init((const prechar)expr,options);
  699. }
  700. RegExp::~RegExp()
  701. {
  702. #ifdef RE_DEBUG
  703. #ifdef RE_NO_NEWARRAY
  704. if (resrc)
  705. free(resrc);
  706. #else
  707. delete [] resrc;
  708. #endif // RE_NO_NEWARRAY
  709. #endif
  710. if (code)
  711. {
  712. #ifdef RE_NO_NEWARRAY
  713. DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
  714. #else
  715. delete [] code;
  716. code=nullptr;
  717. #endif
  718. }
  719. CleanStack();
  720. #ifdef UNICODE
  721. delete firstptr;
  722. #endif
  723. }
  724. #ifndef UNICODE
  725. #ifndef RE_EXTERNAL_CTYPE
  726. void RegExp::InitLocale()
  727. {
  728. for (int i=0; i<256; i++)
  729. {
  730. lc[i]=tolower(i);
  731. uc[i]=toupper(i);
  732. }
  733. for (int i=0; i<256; i++)
  734. {
  735. char res=0;
  736. if (isalnum(i) || i=='_')res|=TYPE_WORDCHAR;
  737. if (isalpha(i))res|=TYPE_ALPHACHAR;
  738. if (isdigit(i))res|=TYPE_DIGITCHAR;
  739. if (isspace(i))res|=TYPE_SPACECHAR;
  740. if (lc[i]==i && uc[i]!=i)res|=TYPE_LOWCASE;
  741. if (uc[i]==i && lc[i]!=i)res|=TYPE_UPCASE;
  742. chartypes[i]=res;
  743. }
  744. memset(charbits,0,sizeof(charbits));
  745. for (int i=0,j=0,k=1; i<256; i++)
  746. {
  747. if (chartypes[i]&TYPE_DIGITCHAR) {charbits[j]|=k;}
  748. if (chartypes[i]&TYPE_SPACECHAR) {charbits[32+j]|=k;}
  749. if (chartypes[i]&TYPE_WORDCHAR) {charbits[64+j]|=k;}
  750. if (chartypes[i]&TYPE_LOWCASE) {charbits[96+j]|=k;}
  751. if (chartypes[i]&TYPE_UPCASE) {charbits[128+j]|=k;}
  752. if (chartypes[i]&TYPE_ALPHACHAR) {charbits[160+j]|=k;}
  753. k<<=1;
  754. if (k==256) {k=1; j++;}
  755. }
  756. }
  757. #endif
  758. #endif
  759. int RegExp::CalcLength(const prechar src,int srclength)
  760. {
  761. int length=3;//global brackets
  762. int brackets[MAXDEPTH];
  763. int count=0;
  764. int i,save;
  765. bracketscount=1;
  766. int inquote=0;
  767. for (i=0; i<srclength; i++,length++)
  768. {
  769. if (inquote && src[i]!=backslashChar && src[i+1]!='E')
  770. {
  771. continue;
  772. }
  773. if (src[i]==backslashChar)
  774. {
  775. i++;
  776. if (src[i]=='Q')inquote=1;
  777. if (src[i]=='E')inquote=0;
  778. if (src[i]=='x')
  779. {
  780. i++;
  781. if(isxdigit(src[i]))
  782. {
  783. for(int j=1,k=i;j<4;j++)
  784. {
  785. if(isxdigit(src[k+j]))
  786. {
  787. i++;
  788. }
  789. else
  790. {
  791. break;
  792. }
  793. }
  794. }
  795. else return SetError(errSyntax,i);
  796. }
  797. #ifdef NAMEDBRACKETS
  798. if (src[i]=='p')
  799. {
  800. i++;
  801. if (src[i]!='{')
  802. return SetError(errSyntax,i);
  803. i++;
  804. int save2=i;
  805. while (i<srclength && (ISWORD(src[i]) || ISSPACE(src[i])) && src[i]!='}')
  806. i++;
  807. if (i>=srclength)
  808. return SetError(errBrackets,save2);
  809. if (src[i]!='}' && !(ISWORD(src[i]) || ISSPACE(src[i])))
  810. return SetError(errSyntax,i);
  811. }
  812. #endif
  813. continue;
  814. }
  815. switch (src[i])
  816. {
  817. case '(':
  818. {
  819. brackets[count]=i;
  820. count++;
  821. if (count==MAXDEPTH)return SetError(errMaxDepth,i);
  822. if (src[i+1]=='?')
  823. {
  824. i+=2;
  825. #ifdef NAMEDBRACKETS
  826. if (src[i]=='{')
  827. {
  828. save=i;
  829. i++;
  830. while (i<srclength && (ISWORD(src[i]) || ISSPACE(src[i])) && src[i]!='}')
  831. i++;
  832. if (i>=srclength)
  833. return SetError(errBrackets,save);
  834. if (src[i]!='}' && !(ISWORD(src[i]) || ISSPACE(src[i])))
  835. return SetError(errSyntax,i);
  836. }
  837. #endif
  838. }
  839. else
  840. {
  841. bracketscount++;
  842. }
  843. break;
  844. }
  845. case ')':
  846. {
  847. count--;
  848. if (count<0)return SetError(errBrackets,i);
  849. break;
  850. }
  851. case '{':
  852. case '*':
  853. case '+':
  854. case '?':
  855. {
  856. length--;
  857. if (src[i]=='{')
  858. {
  859. save=i;
  860. while (i<srclength && src[i]!='}')i++;
  861. if (i>=srclength)return SetError(errBrackets,save);
  862. }
  863. if (src[i+1]=='?')i++;
  864. break;
  865. }
  866. case '[':
  867. {
  868. save=i;
  869. while (i<srclength && src[i]!=']')i++;
  870. if (i>=srclength)return SetError(errBrackets,save);
  871. break;
  872. }
  873. #ifdef RELIB
  874. case '%':
  875. {
  876. i++;
  877. save=i;
  878. while (i<srclength && src[i]!='%')i++;
  879. if (i>=srclength)return SetError(errBrackets,save-1);
  880. if (save==i)return SetError(errSyntax,save);
  881. } break;
  882. #endif
  883. }
  884. }
  885. if (count)
  886. {
  887. errorpos=brackets[0];
  888. errorcode=errBrackets;
  889. return 0;
  890. }
  891. return length;
  892. }
  893. int RegExp::Compile(const RECHAR* src,int options)
  894. {
  895. int srcstart=0,srclength/*=0*/,relength;
  896. if (options&OP_CPPMODE)
  897. {
  898. slashChar='\\';
  899. backslashChar='/';
  900. }
  901. else
  902. {
  903. slashChar='/';
  904. backslashChar='\\';
  905. }
  906. havefirst=0;
  907. #ifdef RE_NO_NEWARRAY
  908. DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
  909. #else
  910. if (code)delete [] code;
  911. code=nullptr;
  912. #endif
  913. if (options&OP_PERLSTYLE)
  914. {
  915. if (src[0]!=slashChar)return SetError(errSyntax,0);
  916. srcstart=1;
  917. srclength=1;
  918. while (src[srclength] && src[srclength]!=slashChar)
  919. {
  920. if (src[srclength]==backslashChar && src[srclength+1])
  921. {
  922. srclength++;
  923. }
  924. srclength++;
  925. }
  926. if (!src[srclength])
  927. {
  928. return SetError(errSyntax,srclength-1);
  929. }
  930. int i=srclength+1;
  931. srclength--;
  932. while (src[i])
  933. {
  934. switch (src[i])
  935. {
  936. case 'i':options|=OP_IGNORECASE; break;
  937. case 's':options|=OP_SINGLELINE; break;
  938. case 'm':options|=OP_MULTILINE; break;
  939. case 'x':options|=OP_XTENDEDSYNTAX; break;
  940. case 'o':options|=OP_OPTIMIZE; break;
  941. default:return SetError(errOptions,i);
  942. }
  943. i++;
  944. }
  945. }
  946. else
  947. {
  948. srclength=(int)strlen(src);
  949. }
  950. ignorecase=options&OP_IGNORECASE?1:0;
  951. relength=CalcLength((const prechar)src+srcstart,srclength);
  952. if (!relength)
  953. {
  954. return 0;
  955. }
  956. #ifdef RE_NO_NEWARRAY
  957. code=static_cast<REOpCode*>
  958. (CreateArray(sizeof(REOpCode), relength, REOpCode::OnCreate));
  959. #else
  960. code=new REOpCode[relength];
  961. memset(code,0,sizeof(REOpCode)*relength);
  962. #endif
  963. for (int i=0; i<relength; i++)
  964. {
  965. code[i].next=i<relength-1?code+i+1:0;
  966. code[i].prev=i>0?code+i-1:0;
  967. }
  968. int result=InnerCompile((const prechar)src+srcstart,srclength,options);
  969. if (!result)
  970. {
  971. #ifdef RE_NO_NEWARRAY
  972. DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
  973. #else
  974. delete [] code;
  975. code=nullptr;
  976. #endif
  977. }
  978. else
  979. {
  980. errorcode=errNone;
  981. minlength=0;
  982. if (options&OP_OPTIMIZE)Optimize();
  983. }
  984. return result;
  985. }
  986. int RegExp::GetNum(const prechar src,int& i)
  987. {
  988. int res=0;//atoi((const char*)src+i);
  989. while (ISDIGIT(src[i]))
  990. {
  991. res*=10;
  992. res+=src[i]-'0';
  993. i++;
  994. }
  995. return res;
  996. }
  997. static int CalcPatternLength(PREOpCode from,PREOpCode to)
  998. {
  999. int len=0;
  1000. int altcnt=0;
  1001. int altlen=-1;
  1002. for (; from->prev!=to; from=from->next)
  1003. {
  1004. switch (from->op)
  1005. {
  1006. //zero width
  1007. case opLineStart:
  1008. case opLineEnd:
  1009. case opDataStart:
  1010. case opDataEnd:
  1011. case opWordBound:
  1012. case opNotWordBound:continue;
  1013. case opType:
  1014. case opNotType:
  1015. case opCharAny:
  1016. case opCharAnyAll:
  1017. case opSymbol:
  1018. case opNotSymbol:
  1019. case opSymbolIgnoreCase:
  1020. case opNotSymbolIgnoreCase:
  1021. case opSymbolClass:
  1022. len++;
  1023. altcnt++;
  1024. continue;
  1025. #ifdef NAMEDBRACKETS
  1026. case opNamedBracket:
  1027. #endif
  1028. case opOpenBracket:
  1029. {
  1030. int l=CalcPatternLength(from->next,from->bracket.pairindex->prev);
  1031. if (l==-1)return -1;
  1032. len+=l;
  1033. altcnt+=l;
  1034. from=from->bracket.pairindex;
  1035. continue;
  1036. }
  1037. case opClosingBracket:
  1038. break;
  1039. case opAlternative:
  1040. if (altlen!=-1 && altcnt!=altlen)return -1;
  1041. altlen=altcnt;
  1042. altcnt=0;
  1043. continue;
  1044. case opBackRef:
  1045. #ifdef NAMEDBRACKETS
  1046. case opNamedBackRef:
  1047. #endif
  1048. return -1;
  1049. case opRangesBegin:
  1050. case opRange:
  1051. case opMinRange:
  1052. case opSymbolRange:
  1053. case opSymbolMinRange:
  1054. case opNotSymbolRange:
  1055. case opNotSymbolMinRange:
  1056. case opAnyRange:
  1057. case opAnyMinRange:
  1058. case opTypeRange:
  1059. case opTypeMinRange:
  1060. case opNotTypeRange:
  1061. case opNotTypeMinRange:
  1062. case opClassRange:
  1063. case opClassMinRange:
  1064. if (from->range.min!=from->range.max)return -1;
  1065. len+=from->range.min;
  1066. altcnt+=from->range.min;
  1067. continue;
  1068. case opBracketRange:
  1069. case opBracketMinRange:
  1070. {
  1071. if (from->range.min!=from->range.max)return -1;
  1072. int l=CalcPatternLength(from->next,from->bracket.pairindex->prev);
  1073. if (l==-1)return -1;
  1074. len+=from->range.min*l;
  1075. altcnt+=from->range.min*l;
  1076. from=from->bracket.pairindex;
  1077. continue;
  1078. }
  1079. case opBackRefRange:
  1080. case opBackRefMinRange:
  1081. #ifdef NAMEDBRACKETS
  1082. case opNamedRefRange:
  1083. case opNamedRefMinRange:
  1084. #endif
  1085. return -1;
  1086. case opRangesEnd:
  1087. case opAssertionsBegin:
  1088. case opLookAhead:
  1089. case opNotLookAhead:
  1090. case opLookBehind:
  1091. case opNotLookBehind:
  1092. from=from->assert.pairindex;
  1093. continue;
  1094. case opAsserionsEnd:
  1095. case opNoReturn:
  1096. continue;
  1097. #ifdef RELIB
  1098. case opLibCall:
  1099. return -1;
  1100. #endif
  1101. }
  1102. }
  1103. if (altlen!=-1 && altlen!=altcnt)return -1;
  1104. return altlen==-1?len:altlen;
  1105. }
  1106. int RegExp::InnerCompile(const prechar src,int srclength,int options)
  1107. {
  1108. int i,j;
  1109. PREOpCode brackets[MAXDEPTH];
  1110. // current brackets depth
  1111. // one place reserved for surrounding 'main' brackets
  1112. int brdepth=1;
  1113. // compiling interior of lookbehind
  1114. // used to apply restrictions of lookbehind
  1115. int lookbehind=0;
  1116. // counter of normal brackets
  1117. int brcount=0;
  1118. // counter of closed brackets
  1119. // used to check correctness of backreferences
  1120. bool closedbrackets[MAXDEPTH];
  1121. // quoting is active
  1122. int inquote=0;
  1123. maxbackref=0;
  1124. #ifdef UNICODE
  1125. UniSet *tmpclass;
  1126. #else
  1127. rechar tmpclass[32];
  1128. int *itmpclass=(int*)tmpclass;
  1129. #endif
  1130. code->op=opOpenBracket;
  1131. code->bracket.index=0;
  1132. #ifdef NAMEDBRACKETS
  1133. MatchHash h;
  1134. SMatch m;
  1135. #endif
  1136. int pos=1;
  1137. register PREOpCode op;//=code;
  1138. brackets[0]=code;
  1139. #ifdef RE_DEBUG
  1140. #ifdef RE_NO_NEWARRAY
  1141. resrc=static_cast<rechar*>(malloc(sizeof(rechar)*(srclength+4)));
  1142. #else
  1143. resrc=new rechar[srclength+4];
  1144. #endif // RE_NO_NEWARRAY
  1145. resrc[0]='(';
  1146. resrc[1]=0;
  1147. memcpy(resrc+1,src,srclength*sizeof(rechar));
  1148. resrc[srclength+1]=')';
  1149. resrc[srclength+2]=27;
  1150. resrc[srclength+3]=0;
  1151. #endif
  1152. havelookahead=0;
  1153. for (i=0; i<srclength; i++)
  1154. {
  1155. op=code+pos;
  1156. pos++;
  1157. #ifdef RE_DEBUG
  1158. op->srcpos=i+1;
  1159. #endif
  1160. if (inquote && src[i]!=backslashChar)
  1161. {
  1162. op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
  1163. op->symbol=ignorecase?TOLOWER(src[i]):src[i];
  1164. if (ignorecase && TOUPPER(op->symbol)==op->symbol)op->op=opSymbol;
  1165. continue;
  1166. }
  1167. if (src[i]==backslashChar)
  1168. {
  1169. i++;
  1170. if (inquote && src[i]!='E')
  1171. {
  1172. op->op=opSymbol;
  1173. op->symbol=backslashChar;
  1174. op=code+pos;
  1175. pos++;
  1176. op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
  1177. op->symbol=ignorecase?TOLOWER(src[i]):src[i];
  1178. if (ignorecase && TOUPPER(op->symbol)==op->symbol)op->op=opSymbol;
  1179. continue;
  1180. }
  1181. op->op=opType;
  1182. switch (src[i])
  1183. {
  1184. case 'Q':inquote=1; pos--; continue;
  1185. case 'E':inquote=0; pos--; continue;
  1186. case 'b':op->op=opWordBound; continue;
  1187. case 'B':op->op=opNotWordBound; continue;
  1188. case 'D':op->op=opNotType;
  1189. case 'd':op->type=TYPE_DIGITCHAR; continue;
  1190. case 'S':op->op=opNotType;
  1191. case 's':op->type=TYPE_SPACECHAR; continue;
  1192. case 'W':op->op=opNotType;
  1193. case 'w':op->type=TYPE_WORDCHAR; continue;
  1194. case 'U':op->op=opNotType;
  1195. case 'u':op->type=TYPE_UPCASE; continue;
  1196. case 'L':op->op=opNotType;
  1197. case 'l':op->type=TYPE_LOWCASE; continue;
  1198. case 'I':op->op=opNotType;
  1199. case 'i':op->type=TYPE_ALPHACHAR; continue;
  1200. case 'A':op->op=opDataStart; continue;
  1201. case 'Z':op->op=opDataEnd; continue;
  1202. case 'n':op->op=opSymbol; op->symbol='\n'; continue;
  1203. case 'r':op->op=opSymbol; op->symbol='\r'; continue;
  1204. case 't':op->op=opSymbol; op->symbol='\t'; continue;
  1205. case 'f':op->op=opSymbol; op->symbol='\f'; continue;
  1206. case 'e':op->op=opSymbol; op->symbol=27; continue;
  1207. case 'O':op->op=opNoReturn; continue;
  1208. #ifdef NAMEDBRACKETS
  1209. case 'p':
  1210. {
  1211. op->op=opNamedBackRef;
  1212. i++;
  1213. if (src[i]!='{')return SetError(errSyntax,i);
  1214. int len=0; i++;
  1215. while (src[i+len]!='}')len++;
  1216. if (len>0)
  1217. {
  1218. #ifdef RE_NO_NEWARRAY
  1219. op->refname=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
  1220. #else
  1221. op->refname=new rechar[len+1];
  1222. #endif
  1223. memcpy(op->refname,src+i,len*sizeof(rechar));
  1224. op->refname[len]=0;
  1225. if (!h.Exists((char*)op->refname))
  1226. {
  1227. return SetError(errReferenceToUndefinedNamedBracket,i);
  1228. }
  1229. i+=len;
  1230. }
  1231. else
  1232. {
  1233. return SetError(errSyntax,i);
  1234. }
  1235. } continue;
  1236. #endif
  1237. case 'x':
  1238. {
  1239. i++;
  1240. if (i>=srclength)return SetError(errSyntax,i-1);
  1241. if(isxdigit(src[i]))
  1242. {
  1243. int c=TOLOWER(src[i])-'0';
  1244. if (c>9)c-='a'-'0'-10;
  1245. op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
  1246. op->symbol=c;
  1247. for(int j=1,k=i;j<4 && k+j<srclength;j++)
  1248. {
  1249. if(isxdigit(src[k+j]))
  1250. {
  1251. i++;
  1252. c=TOLOWER(src[k+j])-'0';
  1253. if (c>9)c-='a'-'0'-10;
  1254. op->symbol<<=4;
  1255. op->symbol|=c;
  1256. }
  1257. else
  1258. {
  1259. break;
  1260. }
  1261. }
  1262. if (ignorecase)
  1263. {
  1264. op->symbol=TOLOWER(op->symbol);
  1265. if (TOUPPER(op->symbol)==TOLOWER(op->symbol))
  1266. {
  1267. op->op=opSymbol;
  1268. }
  1269. }
  1270. }
  1271. else return SetError(errSyntax,i);
  1272. continue;
  1273. }
  1274. default:
  1275. {
  1276. if (ISDIGIT(src[i]))
  1277. {
  1278. int save=i;
  1279. op->op=opBackRef;
  1280. op->refindex=GetNum(src,i); i--;
  1281. if (op->refindex<=0 || op->refindex>brcount || !closedbrackets[op->refindex])
  1282. {
  1283. return SetError(errInvalidBackRef,save-1);
  1284. }
  1285. if (op->refindex>maxbackref)maxbackref=op->refindex;
  1286. }
  1287. else
  1288. {
  1289. if (options&OP_STRICT && ISALPHA(src[i]))
  1290. {
  1291. return SetError(errInvalidEscape,i-1);
  1292. }
  1293. op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
  1294. op->symbol=ignorecase?TOLOWER(src[i]):src[i];
  1295. if (TOLOWER(op->symbol)==TOUPPER(op->symbol))
  1296. {
  1297. op->op=opSymbol;
  1298. }
  1299. }
  1300. }
  1301. }
  1302. continue;
  1303. }
  1304. switch (src[i])
  1305. {
  1306. case '.':
  1307. {
  1308. if (options&OP_SINGLELINE)
  1309. {
  1310. op->op=opCharAnyAll;
  1311. }
  1312. else
  1313. {
  1314. op->op=opCharAny;
  1315. }
  1316. continue;
  1317. }
  1318. case '^':
  1319. {
  1320. if (options&OP_MULTILINE)
  1321. {
  1322. op->op=opLineStart;
  1323. }
  1324. else
  1325. {
  1326. op->op=opDataStart;
  1327. }
  1328. continue;
  1329. }
  1330. case '$':
  1331. {
  1332. if (options&OP_MULTILINE)
  1333. {
  1334. op->op=opLineEnd;
  1335. }
  1336. else
  1337. {
  1338. op->op=opDataEnd;
  1339. }
  1340. continue;
  1341. }
  1342. case '|':
  1343. {
  1344. if (brackets[brdepth-1]->op==opAlternative)
  1345. {
  1346. brackets[brdepth-1]->alternative.nextalt=op;
  1347. }
  1348. else
  1349. {
  1350. if (brackets[brdepth-1]->op==opOpenBracket)
  1351. {
  1352. brackets[brdepth-1]->bracket.nextalt=op;
  1353. }
  1354. else
  1355. {
  1356. brackets[brdepth-1]->assert.nextalt=op;
  1357. }
  1358. }
  1359. if (brdepth==MAXDEPTH)return SetError(errMaxDepth,i);
  1360. brackets[brdepth++]=op;
  1361. op->op=opAlternative;
  1362. continue;
  1363. }
  1364. case '(':
  1365. {
  1366. op->op=opOpenBracket;
  1367. if (src[i+1]=='?')
  1368. {
  1369. i+=2;
  1370. switch (src[i])
  1371. {
  1372. case ':':op->bracket.index=-1; break;
  1373. case '=':op->op=opLookAhead; havelookahead=1; break;
  1374. case '!':op->op=opNotLookAhead; havelookahead=1; break;
  1375. case '<':
  1376. {
  1377. i++;
  1378. if (src[i]=='=')
  1379. {
  1380. op->op=opLookBehind;
  1381. }
  1382. else if (src[i]=='!')
  1383. {
  1384. op->op=opNotLookBehind;
  1385. }
  1386. else return SetError(errSyntax,i);
  1387. lookbehind++;
  1388. } break;
  1389. #ifdef NAMEDBRACKETS
  1390. case '{':
  1391. {
  1392. op->op=opNamedBracket;
  1393. havenamedbrackets=1;
  1394. int len=0;
  1395. i++;
  1396. while (src[i+len]!='}')len++;
  1397. if (len>0)
  1398. {
  1399. #ifdef RE_NO_NEWARRAY
  1400. op->nbracket.name=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
  1401. #else
  1402. op->nbracket.name=new rechar[len+1];
  1403. #endif
  1404. memcpy(op->nbracket.name,src+i,len*sizeof(rechar));
  1405. op->nbracket.name[len]=0;
  1406. //h.SetItem((char*)op->nbracket.name,m);
  1407. }
  1408. else
  1409. {
  1410. op->op=opOpenBracket;
  1411. op->bracket.index=-1;
  1412. }
  1413. i+=len;
  1414. } break;
  1415. #endif
  1416. default:
  1417. {
  1418. return SetError(errSyntax,i);
  1419. }
  1420. }
  1421. }
  1422. else
  1423. {
  1424. brcount++;
  1425. closedbrackets[brcount]=false;
  1426. op->bracket.index=brcount;
  1427. }
  1428. brackets[brdepth]=op;
  1429. brdepth++;
  1430. continue;
  1431. }
  1432. case ')':
  1433. {
  1434. op->op=opClosingBracket;
  1435. brdepth--;
  1436. while (brackets[brdepth]->op==opAlternative)
  1437. {
  1438. brackets[brdepth]->alternative.endindex=op;
  1439. brdepth--;
  1440. }
  1441. switch (brackets[brdepth]->op)
  1442. {
  1443. case opOpenBracket:
  1444. {
  1445. op->bracket.pairindex=brackets[brdepth];
  1446. brackets[brdepth]->bracket.pairindex=op;
  1447. op->bracket.index=brackets[brdepth]->bracket.index;
  1448. if (op->bracket.index!=-1)
  1449. {
  1450. closedbrackets[op->bracket.index]=true;
  1451. }
  1452. break;
  1453. }
  1454. #ifdef NAMEDBRACKETS
  1455. case opNamedBracket:
  1456. {
  1457. op->nbracket.pairindex=brackets[brdepth];
  1458. brackets[brdepth]->nbracket.pairindex=op;
  1459. op->nbracket.name=brackets[brdepth]->nbracket.name;
  1460. h.SetItem((char*)op->nbracket.name,m);
  1461. break;
  1462. }
  1463. #endif
  1464. case opLookBehind:
  1465. case opNotLookBehind:
  1466. {
  1467. lookbehind--;
  1468. int l=CalcPatternLength(brackets[brdepth]->next,op->prev);
  1469. if (l==-1)return SetError(errVariableLengthLookBehind,i);
  1470. brackets[brdepth]->assert.length=l;
  1471. }// there is no break and this is correct!
  1472. case opLookAhead:
  1473. case opNotLookAhead:
  1474. {
  1475. op->assert.pairindex=brackets[brdepth];
  1476. brackets[brdepth]->assert.pairindex=op;
  1477. break;
  1478. }
  1479. }
  1480. continue;
  1481. }
  1482. case '[':
  1483. {
  1484. i++;
  1485. int negative=0;
  1486. if (src[i]=='^')
  1487. {
  1488. negative=1;
  1489. i++;
  1490. }
  1491. int lastchar=0;
  1492. int classsize=0;
  1493. op->op=opSymbolClass;
  1494. //op->symbolclass=new rechar[32];
  1495. //memset(op->symbolclass,0,32);
  1496. #ifdef UNICODE
  1497. op->symbolclass=new UniSet();
  1498. tmpclass=op->symbolclass;
  1499. #define IF_U(t)
  1500. #else
  1501. for (j=0; j<8; j++)itmpclass[j]=0;
  1502. int classindex=0;
  1503. #define IF_U(t) t
  1504. #endif
  1505. for (; src[i]!=']'; i++)
  1506. {
  1507. if (src[i]==backslashChar)
  1508. {
  1509. i++;
  1510. int isnottype=0;
  1511. int type=0;
  1512. lastchar=0;
  1513. switch (src[i])
  1514. {
  1515. case 'D':isnottype=1;
  1516. case 'd':type=TYPE_DIGITCHAR; IF_U(classindex=0); break;
  1517. case 'W':isnottype=1;
  1518. case 'w':type=TYPE_WORDCHAR; IF_U(classindex=64); break;
  1519. case 'S':isnottype=1;
  1520. case 's':type=TYPE_SPACECHAR; IF_U(classindex=32); break;
  1521. case 'L':isnottype=1;
  1522. case 'l':type=TYPE_LOWCASE; IF_U(lassindex=96); break;
  1523. case 'U':isnottype=1;
  1524. case 'u':type=TYPE_UPCASE; IF_U(classindex=128); break;
  1525. case 'I':isnottype=1;
  1526. case 'i':type=TYPE_ALPHACHAR; IF_U(classindex=160); break;
  1527. case 'n':lastchar='\n'; break;
  1528. case 'r':lastchar='\r'; break;
  1529. case 't':lastchar='\t'; break;
  1530. case 'f':lastchar='\f'; break;
  1531. case 'e':lastchar=27; break;
  1532. case 'x':
  1533. {
  1534. i++;
  1535. if (i>=srclength)return SetError(errSyntax,i-1);
  1536. if (isxdigit(src[i]))
  1537. {
  1538. int c=TOLOWER(src[i])-'0';
  1539. if (c>9)c-='a'-'0'-10;
  1540. lastchar=c;
  1541. for(int j=1,k=i;j<4 && k+j<srclength;j++)
  1542. {
  1543. if (isxdigit(src[k+j]))
  1544. {
  1545. i++;
  1546. c=TOLOWER(src[k+j])-'0';
  1547. if (c>9)c-='a'-'0'-10;
  1548. lastchar<<=4;
  1549. lastchar|=c;
  1550. }
  1551. else
  1552. {
  1553. break;
  1554. }
  1555. }
  1556. dpf(("Last char=%c(%02x)\n",lastchar,lastchar));
  1557. }
  1558. else return SetError(errSyntax,i);
  1559. break;
  1560. }
  1561. default:
  1562. {
  1563. if (options&OP_STRICT && ISALPHA(src[i]))
  1564. {
  1565. return SetError(errInvalidEscape,i-1);
  1566. }
  1567. lastchar=src[i];
  1568. }
  1569. }
  1570. if (type)
  1571. {
  1572. #ifdef UNICODE
  1573. if (isnottype)
  1574. {
  1575. tmpclass->nottypes|=type;
  1576. }
  1577. else
  1578. {
  1579. tmpclass->types|=type;
  1580. }
  1581. #else
  1582. isnottype=isnottype?0xffffffff:0;
  1583. int *b=(int*)(charbits+classindex);
  1584. for (j=0; j<8; j++)
  1585. {
  1586. itmpclass[j]|=b[j]^isnottype;
  1587. }
  1588. #endif
  1589. classsize=257;
  1590. //for(int j=0;j<32;j++)op->symbolclass[j]|=charbits[classindex+j]^isnottype;
  1591. //classsize+=charsizes[classindex>>5];
  1592. //int setbit;
  1593. /*for(int j=0;j<256;j++)
  1594. {
  1595. setbit=(chartypes[j]^isnottype)&type;
  1596. if(setbit)
  1597. {
  1598. if(ignorecase)
  1599. {
  1600. SetBit(op->symbolclass,lc[j]);
  1601. SetBit(op->symbolclass,uc[j]);
  1602. }else
  1603. {
  1604. SetBit(op->symbolclass,j);
  1605. }
  1606. classsize++;
  1607. }
  1608. }*/
  1609. }
  1610. else
  1611. {
  1612. if (options&OP_IGNORECASE)
  1613. {
  1614. SetBit(tmpclass,TOLOWER(lastchar));
  1615. SetBit(tmpclass,TOUPPER(lastchar));
  1616. }
  1617. else
  1618. {
  1619. SetBit(tmpclass,lastchar);
  1620. }
  1621. classsize++;
  1622. }
  1623. continue;
  1624. }
  1625. if (src[i]=='-')
  1626. {
  1627. if (lastchar && src[i+1]!=']')
  1628. {
  1629. int to=src[i+1];
  1630. if (to==backslashChar)
  1631. {
  1632. to=src[i+2];
  1633. if (to=='x')
  1634. {
  1635. i+=2;
  1636. to=TOLOWER(src[i+1]);
  1637. if(isxdigit(to))
  1638. {
  1639. to-='0';
  1640. if (to>9)to-='a'-'0'-10;
  1641. for(int j=1,k=(i+1);j<4 && k+j<srclength;j++)
  1642. {
  1643. int c=TOLOWER(src[k+j]);
  1644. if(isxdigit(c))
  1645. {
  1646. i++;
  1647. c-='0';
  1648. if (c>9)c-='a'-'0'-10;
  1649. to<<=4;
  1650. to|=c;
  1651. }
  1652. else
  1653. {
  1654. break;
  1655. }
  1656. }
  1657. }
  1658. else return SetError(errSyntax,i);
  1659. }
  1660. else
  1661. {
  1662. SetBit(tmpclass,'-');
  1663. classsize++;
  1664. continue;
  1665. }
  1666. }
  1667. i++;
  1668. dpf(("from %d to %d\n",lastchar,to));
  1669. for (j=lastchar; j<=to; j++)
  1670. {
  1671. if (ignorecase)
  1672. {
  1673. SetBit(tmpclass,TOLOWER(j));
  1674. SetBit(tmpclass,TOUPPER(j));
  1675. }
  1676. else
  1677. {
  1678. SetBit(tmpclass,j);
  1679. }
  1680. classsize++;
  1681. }
  1682. continue;
  1683. }
  1684. }
  1685. lastchar=src[i];
  1686. if (ignorecase)
  1687. {
  1688. SetBit(tmpclass,TOLOWER(lastchar));
  1689. SetBit(tmpclass,TOUPPER(lastchar));
  1690. }
  1691. else
  1692. {
  1693. SetBit(tmpclass,lastchar);
  1694. }
  1695. classsize++;
  1696. }
  1697. if (negative && classsize>1)
  1698. {
  1699. #ifdef UNICODE
  1700. tmpclass->negative=negative;
  1701. #else
  1702. for (int jj=0; jj<8; jj++)itmpclass[jj]^=0xffffffff;
  1703. #endif
  1704. //for(int j=0;j<32;j++)op->symbolclass[j]^=0xff;
  1705. }
  1706. if (classsize==1)
  1707. {
  1708. #ifdef UNICODE
  1709. delete op->symbolclass;
  1710. op->symbolclass=0;
  1711. tmpclass=0;
  1712. #endif
  1713. op->op=negative?opNotSymbol:opSymbol;
  1714. if (ignorecase)
  1715. {
  1716. op->op+=2;
  1717. op->symbol=TOLOWER(lastchar);
  1718. }
  1719. else
  1720. {
  1721. op->symbol=lastchar;
  1722. }
  1723. }
  1724. #ifdef UNICODE
  1725. if (tmpclass)tmpclass->negative=negative;
  1726. #else
  1727. else if (classsize==256 && !negative)
  1728. {
  1729. op->op=options&OP_SINGLELINE?opCharAnyAll:opCharAny;
  1730. }
  1731. else
  1732. {
  1733. #ifdef RE_NO_NEWARRAY
  1734. op->symbolclass=static_cast<rechar*>(malloc(sizeof(rechar)*32));
  1735. #else
  1736. op->symbolclass=new rechar[32];
  1737. #endif
  1738. for (j=0; j<8; j++)((int*)op->symbolclass)[j]=itmpclass[j];
  1739. }
  1740. #endif
  1741. continue;
  1742. }
  1743. case '+':
  1744. case '*':
  1745. case '?':
  1746. case '{':
  1747. {
  1748. int min=0,max=0;
  1749. switch (src[i])
  1750. {
  1751. case '+':min=1; max=-2; break;
  1752. case '*':min=0; max=-2; break;
  1753. case '?':
  1754. {
  1755. //if(src[i+1]=='?') return SetError(errInvalidQuantifiersCombination,i);
  1756. min=0; max=1;
  1757. break;
  1758. }
  1759. case '{':
  1760. {
  1761. i++;
  1762. int save=i;
  1763. min=GetNum(src,i);
  1764. max=min;
  1765. if (min<0)return SetError(errInvalidRange,save);
  1766. // i++;
  1767. if (src[i]==',')
  1768. {
  1769. if (src[i+1]=='}')
  1770. {
  1771. i++;
  1772. max=-2;
  1773. }
  1774. else
  1775. {
  1776. i++;
  1777. max=GetNum(src,i);
  1778. // i++;
  1779. if (max<min)return SetError(errInvalidRange,save);
  1780. }
  1781. }
  1782. if (src[i]!='}')return SetError(errInvalidRange,save);
  1783. }
  1784. }
  1785. pos--;
  1786. op=code+pos-1;
  1787. if (min==1 && max==1)continue;
  1788. op->range.min=min;
  1789. op->range.max=max;
  1790. switch (op->op)
  1791. {
  1792. case opLineStart:
  1793. case opLineEnd:
  1794. case opDataStart:
  1795. case opDataEnd:
  1796. case opWordBound:
  1797. case opNotWordBound:
  1798. {
  1799. return SetError(errInvalidQuantifiersCombination,i);
  1800. // op->range.op=op->op;
  1801. // op->op=opRange;
  1802. // continue;
  1803. }
  1804. case opCharAny:
  1805. case opCharAnyAll:
  1806. {
  1807. op->range.op=op->op;
  1808. op->op=opAnyRange;
  1809. break;
  1810. }
  1811. case opType:
  1812. {
  1813. op->op=opTypeRange;
  1814. break;
  1815. }
  1816. case opNotType:
  1817. {
  1818. op->op=opNotTypeRange;
  1819. break;
  1820. }
  1821. case opSymbolIgnoreCase:
  1822. case opSymbol:
  1823. {
  1824. op->op=opSymbolRange;
  1825. break;
  1826. }
  1827. case opNotSymbol:
  1828. case opNotSymbolIgnoreCase:
  1829. {
  1830. op->op=opNotSymbolRange;
  1831. break;
  1832. }
  1833. case opSymbolClass:
  1834. {
  1835. op->op=opClassRange;
  1836. break;
  1837. }
  1838. case opBackRef:
  1839. {
  1840. op->op=opBackRefRange;
  1841. break;
  1842. }
  1843. #ifdef NAMEDBRACKETS
  1844. case opNamedBackRef:
  1845. {
  1846. op->op=opNamedRefRange;
  1847. } break;
  1848. #endif
  1849. case opClosingBracket:
  1850. {
  1851. op=op->bracket.pairindex;
  1852. if (op->op!=opOpenBracket)return SetError(errInvalidQuantifiersCombination,i);
  1853. op->range.min=min;
  1854. op->range.max=max;
  1855. op->op=opBracketRange;
  1856. break;
  1857. }
  1858. default:
  1859. {
  1860. dpf(("OP=%d\n",op->op));
  1861. return SetError(errInvalidQuantifiersCombination,i);
  1862. }
  1863. }//switch(code.op)
  1864. if (src[i+1]=='?')
  1865. {
  1866. op->op++;
  1867. i++;
  1868. }
  1869. continue;
  1870. }// case +*?{
  1871. case ' ':
  1872. case '\t':
  1873. case '\n':
  1874. case '\r':
  1875. {
  1876. if (options&OP_XTENDEDSYNTAX)
  1877. {
  1878. pos--;
  1879. continue;
  1880. }
  1881. }
  1882. #ifdef RELIB
  1883. case '%':
  1884. {
  1885. i++;
  1886. int len=0;
  1887. while (src[i+len]!='%')len++;
  1888. op->op=opLibCall;
  1889. #ifdef RE_NO_NEWARRAY
  1890. op->rename=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
  1891. #else
  1892. op->rename=new rechar[len+1];
  1893. #endif
  1894. memcpy(op->rename,src+i,len*sizeof(rechar));
  1895. op->rename[len]=0;
  1896. i+=len;
  1897. continue;
  1898. }
  1899. #endif
  1900. default:
  1901. {
  1902. op->op=options&OP_IGNORECASE?opSymbolIgnoreCase:opSymbol;
  1903. if (ignorecase)
  1904. {
  1905. op->symbol=TOLOWER(src[i]);
  1906. }
  1907. else
  1908. {
  1909. op->symbol=src[i];
  1910. }
  1911. }
  1912. }//switch(src[i])
  1913. }//for()
  1914. op=code+pos;
  1915. pos++;
  1916. brdepth--;
  1917. while (brdepth>=0 && brackets[brdepth]->op==opAlternative)
  1918. {
  1919. brackets[brdepth]->alternative.endindex=op;
  1920. brdepth--;
  1921. }
  1922. op->op=opClosingBracket;
  1923. op->bracket.pairindex=code;
  1924. code->bracket.pairindex=op;
  1925. #ifdef RE_DEBUG
  1926. op->srcpos=i;
  1927. #endif
  1928. op=code+pos;
  1929. //pos++;
  1930. op->op=opRegExpEnd;
  1931. #ifdef RE_DEBUG
  1932. op->srcpos=i+1;
  1933. #endif
  1934. return 1;
  1935. }
  1936. inline void RegExp::PushState()
  1937. {
  1938. stackcount++;
  1939. #ifdef RELIB
  1940. stackusage++;
  1941. #endif
  1942. if (stackcount==STACK_PAGE_SIZE)
  1943. {
  1944. if (lastpage->next)
  1945. {
  1946. lastpage=lastpage->next;
  1947. stack=lastpage->stack;
  1948. }
  1949. else
  1950. {
  1951. lastpage->next=new StateStackPage;
  1952. lastpage->next->prev=lastpage;
  1953. lastpage=lastpage->next;
  1954. lastpage->next=nullptr;
  1955. #ifdef RE_NO_NEWARRAY
  1956. lastpage->stack=static_cast<StateStackItem*>
  1957. (CreateArray(sizeof(StateStackItem), STACK_PAGE_SIZE,
  1958. StateStackItem::OnCreate));
  1959. #else
  1960. lastpage->stack=new StateStackItem[STACK_PAGE_SIZE];
  1961. #endif // RE_NO_NEWARRAY
  1962. stack=lastpage->stack;
  1963. }
  1964. stackcount=0;
  1965. }
  1966. st=&stack[stackcount];
  1967. }
  1968. inline int RegExp::PopState()
  1969. {
  1970. stackcount--;
  1971. #ifdef RELIB
  1972. stackusage--;
  1973. if (stackusage<0)return 0;
  1974. #endif
  1975. if (stackcount<0)
  1976. {
  1977. if (!lastpage->prev)
  1978. return 0;
  1979. lastpage=lastpage->prev;
  1980. stack=lastpage->stack;
  1981. stackcount=STACK_PAGE_SIZE-1;
  1982. }
  1983. st=&stack[stackcount];
  1984. return 1;
  1985. }
  1986. inline StateStackItem *RegExp::GetState()
  1987. {
  1988. int tempcount=stackcount;
  1989. #ifdef RELIB
  1990. if (!stackusage)return 0;
  1991. #endif
  1992. StateStackPage* temppage=lastpage;
  1993. StateStackItem* tempstack=lastpage->stack;
  1994. tempcount--;
  1995. if (tempcount<0)
  1996. {
  1997. if (!temppage->prev)
  1998. return 0;
  1999. temppage=temppage->prev;
  2000. tempstack=temppage->stack;
  2001. tempcount=STACK_PAGE_SIZE-1;
  2002. }
  2003. return &tempstack[tempcount];
  2004. }
  2005. inline StateStackItem *RegExp::FindStateByPos(PREOpCode pos,int op)
  2006. {
  2007. #ifdef RELIB
  2008. int tempusage=stackusage;
  2009. #endif
  2010. int tempcount=stackcount;
  2011. StateStackPage* temppage=lastpage;
  2012. StateStackItem* tempstack=lastpage->stack;
  2013. do
  2014. {
  2015. tempcount--;
  2016. #ifdef RELIB
  2017. tempusage--;
  2018. if (tempusage<0)return 0;
  2019. #endif
  2020. if (tempcount<0)
  2021. {
  2022. if (!temppage->prev)
  2023. return 0;
  2024. temppage=temppage->prev;
  2025. tempstack=temppage->stack;
  2026. tempcount=STACK_PAGE_SIZE-1;
  2027. }
  2028. }
  2029. while (tempstack[tempcount].pos!=pos || tempstack[tempcount].op!=op);
  2030. return &tempstack[tempcount];
  2031. }
  2032. inline int RegExp::StrCmp(const prechar& str,const prechar _st,const prechar ed)
  2033. {
  2034. const prechar save=str;
  2035. if (ignorecase)
  2036. {
  2037. while (_st<ed)
  2038. {
  2039. if (TOLOWER(*str)!=TOLOWER(*_st)) {str=save; return 0;}
  2040. str++;
  2041. _st++;
  2042. }
  2043. }
  2044. else
  2045. {
  2046. while (_st<ed)
  2047. {
  2048. if (*str!=*_st) {str=save; return 0;}
  2049. str++;
  2050. _st++;
  2051. }
  2052. }
  2053. return 1;
  2054. }
  2055. #define OP (*op)
  2056. #define MINSKIP(cmp) \
  2057. { int jj; \
  2058. switch(op->next->op) \
  2059. { \
  2060. case opSymbol: \
  2061. { \
  2062. jj=op->next->symbol; \
  2063. if(*str!=jj) \
  2064. while(str<strend && cmp && st->max--)\
  2065. {\
  2066. str++;\
  2067. if(str[1]!=jj)break;\
  2068. } \
  2069. break; \
  2070. } \
  2071. case opNotSymbol: \
  2072. { \
  2073. jj=op->next->symbol; \
  2074. if(*str==jj) \
  2075. while(str<strend && cmp && st->max--)\
  2076. {\
  2077. str++;\
  2078. if(str[1]==jj)break;\
  2079. } \
  2080. break; \
  2081. } \
  2082. case opSymbolIgnoreCase: \
  2083. { \
  2084. jj=op->next->symbol; \
  2085. if(TOLOWER(*str)!=jj) \
  2086. while(str<strend && cmp && st->max--)\
  2087. {\
  2088. str++;\
  2089. if(TOLOWER(str[1])!=jj)break;\
  2090. } \
  2091. break; \
  2092. } \
  2093. case opNotSymbolIgnoreCase: \
  2094. { \
  2095. jj=op->next->symbol; \
  2096. if(TOLOWER(*str)==jj) \
  2097. while(str<strend && cmp && st->max--)\
  2098. {\
  2099. str++;\
  2100. if(TOLOWER(str[1])==jj)break;\
  2101. } \
  2102. break; \
  2103. } \
  2104. case opType: \
  2105. { \
  2106. jj=op->next->type; \
  2107. if(!(ISTYPE(*str,jj))) \
  2108. while(str<strend && cmp && st->max--)\
  2109. {\
  2110. str++;\
  2111. if(!(ISTYPE(str[1],jj)))break;\
  2112. } \
  2113. break; \
  2114. } \
  2115. case opNotType: \
  2116. { \
  2117. jj=op->next->type; \
  2118. if((ISTYPE(*str,jj))) \
  2119. while(str<strend && cmp && st->max--)\
  2120. {\
  2121. str++;\
  2122. if((ISTYPE(str[1],jj)))break;\
  2123. } \
  2124. break; \
  2125. } \
  2126. case opSymbolClass: \
  2127. { \
  2128. cl=op->next->symbolclass; \
  2129. if(!GetBit(cl,*str)) \
  2130. while(str<strend && cmp && st->max--)\
  2131. {\
  2132. str++;\
  2133. if(!GetBit(cl,str[1]))break;\
  2134. } \
  2135. break; \
  2136. } \
  2137. } \
  2138. }
  2139. #ifdef RELIB
  2140. static void KillMatchList(MatchList *ml)
  2141. {
  2142. for (int i=0; i<ml->Count(); i++)
  2143. {
  2144. KillMatchList((*ml)[i].sublist);
  2145. (*ml)[i].sublist=nullptr;
  2146. }
  2147. ml->Clean();
  2148. }
  2149. #endif
  2150. int RegExp::InnerMatch(const prechar str,const prechar strend,PMatch match,int& matchcount
  2151. #ifdef NAMEDBRACKETS
  2152. ,PMatchHash hmatch
  2153. #endif
  2154. )
  2155. {
  2156. // register prechar str=start;
  2157. int i,j;
  2158. int minimizing;
  2159. PREOpCode op,tmp=nullptr;
  2160. PMatch m;
  2161. #ifdef UNICODE
  2162. UniSet *cl;
  2163. #else
  2164. prechar cl;
  2165. #endif
  2166. #ifdef RELIB
  2167. SMatchListItem ml;
  2168. #endif
  2169. int inrangebracket=0;
  2170. if (errorcode==errNotCompiled)return 0;
  2171. if (matchcount<maxbackref)return SetError(errNotEnoughMatches,maxbackref);
  2172. #ifdef NAMEDBRACKETS
  2173. if (havenamedbrackets && !hmatch)return SetError(errNoStorageForNB,0);
  2174. #endif
  2175. #ifdef RELIB
  2176. if (reclevel<=1)
  2177. {
  2178. #endif
  2179. stackcount=0;
  2180. lastpage=firstpage;
  2181. stack=lastpage->stack;
  2182. st=&stack[0];
  2183. #ifdef RELIB
  2184. }
  2185. #endif
  2186. StateStackItem *ps;
  2187. errorcode=errNone;
  2188. /*for(i=0;i<matchcount;i++)
  2189. {
  2190. match[i].start=-1;
  2191. match[i].end=-1;
  2192. }*/
  2193. if (bracketscount<matchcount)matchcount=bracketscount;
  2194. memset(match,-1,sizeof(*match)*matchcount);
  2195. for (op=code; op; op=op->next)
  2196. {
  2197. //dpf(("op:%s,\tpos:%d,\tstr:%d\n",ops[OP.op],pos,str-start));
  2198. dpf(("=================\n"));
  2199. dpf(("S:%s\n%*s\n",start,str-start+3,"^"));
  2200. dpf(("R:%s\n%*s\n",resrc,OP.srcpos+3,"^"));
  2201. if (str<=strend)
  2202. switch (OP.op)
  2203. {
  2204. case opLineStart:
  2205. {
  2206. if (str==start || str[-1]==0x0d || str[-1]==0x0a)continue;
  2207. break;
  2208. }
  2209. case opLineEnd:
  2210. {
  2211. if (str==strend)continue;
  2212. if (str[0]==0x0d || str[0]==0x0a)
  2213. {
  2214. if (str[0]==0x0d)str++;
  2215. if (str[0]==0x0a)str++;
  2216. continue;
  2217. }
  2218. break;
  2219. }
  2220. case opDataStart:
  2221. {
  2222. if (str==start)continue;
  2223. break;
  2224. }
  2225. case opDataEnd:
  2226. {
  2227. if (str==strend)continue;
  2228. break;
  2229. }
  2230. case opWordBound:
  2231. {
  2232. if ((str==start && ISWORD(*str))||
  2233. (!(ISWORD(str[-1])) && ISWORD(*str)) ||
  2234. (!(ISWORD(*str)) && ISWORD(str[-1])) ||
  2235. (str==strend && ISWORD(str[-1])))continue;
  2236. break;
  2237. }
  2238. case opNotWordBound:
  2239. {
  2240. if (!((str==start && ISWORD(*str))||
  2241. (!(ISWORD(str[-1])) && ISWORD(*str)) ||
  2242. (!(ISWORD(*str)) && ISWORD(str[-1])) ||
  2243. (str==strend && ISWORD(str[-1]))))continue;
  2244. break;
  2245. }
  2246. case opType:
  2247. {
  2248. if (ISTYPE(*str,OP.type))
  2249. {
  2250. str++;
  2251. continue;
  2252. }
  2253. break;
  2254. }
  2255. case opNotType:
  2256. {
  2257. if (!(ISTYPE(*str,OP.type)))
  2258. {
  2259. str++;
  2260. continue;
  2261. }
  2262. break;
  2263. }
  2264. case opCharAny:
  2265. {
  2266. if (*str!=0x0d && *str!=0x0a)
  2267. {
  2268. str++;
  2269. continue;
  2270. }
  2271. break;
  2272. }
  2273. case opCharAnyAll:
  2274. {
  2275. str++;
  2276. continue;
  2277. }
  2278. case opSymbol:
  2279. {
  2280. if (*str==OP.symbol)
  2281. {
  2282. str++;
  2283. continue;
  2284. }
  2285. break;
  2286. }
  2287. case opNotSymbol:
  2288. {
  2289. if (*str!=OP.symbol)
  2290. {
  2291. str++;
  2292. continue;
  2293. }
  2294. break;
  2295. }
  2296. case opSymbolIgnoreCase:
  2297. {
  2298. if (TOLOWER(*str)==OP.symbol)
  2299. {
  2300. str++;
  2301. continue;
  2302. }
  2303. break;
  2304. }
  2305. case opNotSymbolIgnoreCase:
  2306. {
  2307. if (TOLOWER(*str)!=OP.symbol)
  2308. {
  2309. str++;
  2310. continue;
  2311. }
  2312. break;
  2313. }
  2314. case opSymbolClass:
  2315. {
  2316. if (GetBit(OP.symbolclass,*str))
  2317. {
  2318. str++;
  2319. continue;
  2320. }
  2321. break;
  2322. }
  2323. case opOpenBracket:
  2324. {
  2325. if (OP.bracket.index>=0 && OP.bracket.index<matchcount)
  2326. {
  2327. //if (inrangebracket) Mantis#1388
  2328. {
  2329. st->op=opOpenBracket;
  2330. st->pos=op;
  2331. st->min=match[OP.bracket.index].start;
  2332. st->max=match[OP.bracket.index].end;
  2333. PushState();
  2334. }
  2335. match[OP.bracket.index].start=(int)(str-start);
  2336. }
  2337. if (OP.bracket.nextalt)
  2338. {
  2339. st->op=opAlternative;
  2340. st->pos=OP.bracket.nextalt;
  2341. st->savestr=str;
  2342. PushState();
  2343. }
  2344. continue;
  2345. }
  2346. #ifdef NAMEDBRACKETS
  2347. case opNamedBracket:
  2348. {
  2349. if (hmatch)
  2350. {
  2351. PMatch m2;
  2352. if (!hmatch->Exists((char*)OP.nbracket.name))
  2353. {
  2354. tag_Match sm;
  2355. sm.start=-1;
  2356. sm.end=-1;
  2357. m2=hmatch->SetItem((char*)OP.nbracket.name,sm);
  2358. }
  2359. else
  2360. {
  2361. m2=hmatch->GetPtr((char*)OP.nbracket.name);

Large files files are truncated, but you can click here to view the full file