RegExp.cpp | searchcode

/unicode_far/RegExp.cpp

Large files files are truncated, but you can click here to view the full file

/*
  Copyright Š 2000 Konstantin Stupnik
  Copyright Š 2008 Far Group
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
  1. Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.
  2. Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.
  3. The name of the authors may not be used to endorse or promote products
     derived from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

  Regular expressions support library.
  Syntax and semantics of regexps very close to
  syntax and semantics of perl regexps.
*/

#include "headers.hpp"
#pragma hdrstop

#include "RegExp.hpp"

#ifndef RE_FAR_MODE

#ifndef UNICODE
#ifndef RE_EXTERNAL_CTYPE
#include <ctype.h>
#endif
#else
#ifndef __LINUX
#include <windows.h>
#endif
#endif
#ifndef RE_NO_STRING_H
#include <string.h>
#endif

#else

#define malloc xf_malloc
#define free xf_free

#endif

#ifdef RE_DEBUG
#include <stdio.h>
#ifdef dpf
#undef dpf
#endif
#define dpf(x) printf x

char *ops[]=
{
	"opNone",
	"opLineStart",
	"opLineEnd",
	"opDataStart",
	"opDataEnd",
	"opWordBound",
	"opNotWordBound",
	"opType",
	"opNotType",
	"opCharAny",
	"opCharAnyAll",
	"opSymbol",
	"opNotSymbol",
	"opSymbolIgnoreCase",
	"opNotSymbolIgnoreCase",
	"opSymbolClass",
	"opOpenBracket",
	"opClosingBracket",
	"opAlternative",
	"opBackRef",
#ifdef NAMEDBRACKETS
	"opNamedBracket",
	"opNamedBackRef",
#endif
	"opRangesBegin",
	"opRange",
	"opMinRange",
	"opSymbolRange",
	"opSymbolMinRange",
	"opNotSymbolRange",
	"opNotSymbolMinRange",
	"opAnyRange",
	"opAnyMinRange",
	"opTypeRange",
	"opTypeMinRange",
	"opNotTypeRange",
	"opNotTypeMinRange",
	"opClassRange",
	"opClassMinRange",
	"opBracketRange",
	"opBracketMinRange",
	"opBackRefRange",
	"opBackRefMinRange",
#ifdef NAMEDBRACKETS
	"opNamedRefRange",
	"opNamedRefMinRange",
#endif
	"opRangesEnd",
	"opAssertionsBegin",
	"opLookAhead",
	"opNotLookAhead",
	"opLookBehind",
	"opNotLookBehind",
	"opAsserionsEnd",
	"opNoReturn",
#ifdef RELIB
	"opLibCall",
#endif
	"opRegExpEnd",
};

#else
#define dpf(x)
#endif

#ifndef UNICODE
#ifdef RE_STATIC_LOCALE
#ifdef RE_EXTERNAL_CTYPE
prechar RegExp::lc;
prechar RegExp::uc;
prechar RegExp::chartypes;
#else
int RegExp::ilc[256/sizeof(int)];
int RegExp::iuc[256/sizeof(int)];
int RegExp::ichartypes[256/sizeof(int)];
rechar* RegExp::lc=(rechar*)RegExp::ilc;
rechar* RegExp::uc=(rechar*)RegExp::iuc;
rechar* RegExp::chartypes=(rechar*)RegExp::ichartypes;
#endif
int RegExp::icharbits[256/sizeof(int)];
rechar* RegExp::charbits=(rechar*)RegExp::icharbits;
#endif
#endif

#ifdef UNICODE
#ifndef __LINUX

#define ISDIGIT(c) iswdigit(c)
#define ISSPACE(c) iswspace(c)
#define ISWORD(c)  (IsCharAlphaNumeric(c) || c=='_')
#define ISLOWER(c) IsCharLower(c)
#define ISUPPER(c) IsCharUpper(c)
#define ISALPHA(c) IsCharAlpha(c)
#define TOUPPER(c) ((rechar)(DWORD_PTR)CharUpper((LPTSTR)(DWORD_PTR)c))
#define TOLOWER(c) ((rechar)(DWORD_PTR)CharLower((LPTSTR)(DWORD_PTR)c))

#else

#define ISDIGIT(c) iswdigit(c)
#define ISSPACE(c) iswspace(c)
#define ISWORD(c)  (iswalnum(c) || c=='_')
#define ISLOWER(c) iswlower(c)
#define ISUPPER(c) iswupper(c)
#define ISALPHA(c) iswalpha(c)
#define TOUPPER(c) towupper(c)
#define TOLOWER(c) towlower(c)

#endif

#define ISTYPE(c,t) isType(c,t)

int isType(rechar chr,int type)
{
	switch (type)
	{
		case TYPE_DIGITCHAR:return ISDIGIT(chr);
		case TYPE_SPACECHAR:return ISSPACE(chr);
		case TYPE_WORDCHAR: return ISWORD(chr);
		case TYPE_LOWCASE:  return ISLOWER(chr);
		case TYPE_UPCASE:   return ISUPPER(chr);
		case TYPE_ALPHACHAR:return ISALPHA(chr);
	}

	return false;
}

int ushlen(const rechar* str)
{
	rechar ch;
	int len = -1;

	do
	{
		ch = str[len+1];
		len++;
	}
	while (ch);

	return len;
}

#define strlen ushlen

struct UniSet
{
	unsigned char* high[256];
	char types;
	char nottypes;
	char negative;
	UniSet()
	{
		ClearArray(high);
		types=0;
		nottypes=0;
		negative=0;
	}
	UniSet(const UniSet& src)
	{
		for (int i=0; i<256; i++)
		{
			if (src.high[i])
			{
				high[i]=new unsigned char[32];
				memcpy(high[i],src.high[i],32);
			}
			else
			{
				high[i]=nullptr;
			}
		}

		types=src.types;
		nottypes=src.nottypes;
		negative=src.negative;
	}
	UniSet& operator=(const UniSet& src)
	{
		if (this != &src)
		{
			for (int i=0; i<256; i++)
			{
				if (src.high[i])
				{
					if (!high[i])high[i]=new unsigned char[32];

					memcpy(high[i],src.high[i],32);
				}
				else
				{
					if (high[i])delete [] high[i];

					high[i]=nullptr;
				}
			}

			types=src.types;
			nottypes=src.nottypes;
			negative=src.negative;
		}

		return (*this);
	}

	void Reset()
	{
		for (int i=0; i<256; i++)
		{
			if (high[i])
			{
				delete [] high[i];
				high[i]=0;
			}
		}

		types=0;
		nottypes=0;
		negative=0;
	}

	struct Setter
	{
		UniSet& set;
		rechar idx;
		Setter(UniSet& s,rechar chr):set(s),idx(chr)
		{
		}
		void operator=(int val)
		{
			if (val)set.SetBit(idx);
			else set.ClearBit(idx);
		}
		bool operator!()const
		{
			return !set.GetBit(idx);
		}
	};

	const bool operator[](rechar idx)const
	{
		return GetBit(idx);
	}
	Setter operator[](rechar idx)
	{
		return Setter(*this,idx);
	}
	~UniSet()
	{
		for (int i=0; i<256; i++)
		{
			if (high[i])delete [] high[i];
		}
	}
	bool CheckType(int t, rechar chr) const
	{
		switch (t)
		{
			case TYPE_DIGITCHAR:if (ISDIGIT(chr))return true; else break;
			case TYPE_SPACECHAR:if (ISSPACE(chr))return true; else break;
			case TYPE_WORDCHAR: if (ISWORD(chr)) return true; else break;
			case TYPE_LOWCASE:  if (ISLOWER(chr))return true; else break;
			case TYPE_UPCASE:   if (ISUPPER(chr))return true; else break;
			case TYPE_ALPHACHAR:if (ISALPHA(chr))return true; else break;
		}

		return false;
	}
	bool GetBit(rechar chr) const
	{
		if (types)
		{
			int t=TYPE_ALPHACHAR;

			while (t)
			{
				if (types&t)
				{
					if (CheckType(t,chr))
						return negative?false:true;
				}

				t>>=1;
			}
		}

		if (nottypes)
		{
			int t=TYPE_ALPHACHAR;

			while (t)
			{
				if (nottypes&t)
				{
					if (!CheckType(t,chr))
						return negative?false:true;
				}

				t>>=1;
			}
		}

		unsigned char h=(chr&0xff00)>>8;

		if (!high[h]) return negative?true:false;

		if (((high[h][(chr&0xff)>>3]&(1<<(chr&7)))?1:0))
		{
			return negative?false:true;
		}

		return negative?true:false;
	}
	void SetBit(rechar  chr)
	{
		unsigned char h=(chr&0xff00)>>8;

		if (!high[h])
		{
			high[h]=new unsigned char[32];
			memset(high[h],0,32);
		}

		high[h][(chr&0xff)>>3]|=1<<(chr&7);
	}
	void ClearBit(rechar  chr)
	{
		unsigned char h=(chr&0xff00)>>8;

		if (!high[h])
		{
			high[h]=new unsigned char[32];
			memset(high[h],0,32);
		}

		high[h][(chr&0xff)>>3]&=~(1<<(chr&7));
	}

};

#define GetBit(cls,chr) cls->GetBit(chr)
#define SetBit(cls,chr) cls->SetBit(chr)

#else
#define ISDIGIT(c) ((chartypes[c]&TYPE_DIGITCHAR))
#define ISSPACE(c) ((chartypes[c]&TYPE_SPACECHAR))
#define ISWORD(c)  ((chartypes[c]&TYPE_WORDCHAR))
#define ISLOWER(c) ((chartypes[c]&TYPE_LOWCASE))
#define ISUPPER(c) ((chartypes[c]&TYPE_UPCASE))
#define ISALPHA(c) ((chartypes[c]&TYPE_ALPHACHAR))
#define TOUPPER(c) uc[c]
#define TOLOWER(c) lc[c]

#define ISTYPE(c,t) (chartypes[c]&t)

#endif //UNICODE



enum REOp
{
	opLineStart=0x1,        // ^
	opLineEnd,              // $
	opDataStart,            // \A and ^ in single line mode
	opDataEnd,              // \Z and $ in signle line mode

	opWordBound,            // \b
	opNotWordBound,         // \B

	opType,                 // \d\s\w\l\u\e
	opNotType,              // \D\S\W\L\U\E

	opCharAny,              // .
	opCharAnyAll,           // . in single line mode

	opSymbol,               // single char
	opNotSymbol,            // [^c] negative charclass with one char
	opSymbolIgnoreCase,     // symbol with IGNORE_CASE turned on
	opNotSymbolIgnoreCase,  // [^c] with ignore case set.

	opSymbolClass,          // [chars]

	opOpenBracket,          // (

	opClosingBracket,       // )

	opAlternative,          // |

	opBackRef,              // \1

#ifdef NAMEDBRACKETS
	opNamedBracket,         // (?{name}
	opNamedBackRef,         // \p{name}
#endif


	opRangesBegin,          // for op type check

	opRange,                // generic range
	opMinRange,             // generic minimizing range

	opSymbolRange,          // quantifier applied to single char
	opSymbolMinRange,       // minimizing quantifier

	opNotSymbolRange,       // [^x]
	opNotSymbolMinRange,

	opAnyRange,             // .
	opAnyMinRange,

	opTypeRange,            // \w, \d, \s
	opTypeMinRange,

	opNotTypeRange,         // \W, \D, \S
	opNotTypeMinRange,

	opClassRange,           // for char classes
	opClassMinRange,

	opBracketRange,         // for brackets
	opBracketMinRange,

	opBackRefRange,         // for backrefs
	opBackRefMinRange,

#ifdef NAMEDBRACKETS
	opNamedRefRange,
	opNamedRefMinRange,
#endif

	opRangesEnd,            // end of ranges

	opAssertionsBegin,

	opLookAhead,
	opNotLookAhead,

	opLookBehind,
	opNotLookBehind,

	opAsserionsEnd,

	opNoReturn,

#ifdef RELIB
	opLibCall,
#endif
	opRegExpEnd
};

struct REOpCode
{
	int op;
	REOpCode *next,*prev;
#ifdef RE_DEBUG
	int    srcpos;
#endif
#ifdef RE_NO_NEWARRAY
	static void OnCreate(void *ptr);
	static void OnDelete(void *ptr);
#else
	REOpCode()
	{
		ClearStruct(*this);
	}
	~REOpCode();
#endif

	struct SBracket
	{
		REOpCode* nextalt;
		int index;
		REOpCode* pairindex;
	};

	struct SRange
	{
		union
		{
			SBracket bracket;
			int op;
			rechar symbol;
#ifdef UNICODE
			UniSet *symbolclass;
#else
			prechar symbolclass;
#endif
			REOpCode* nextalt;
			int refindex;
#ifdef NAMEDBRACKETS
			prechar refname;
#endif
			int type;
		};
		int min,max;
	};

	struct SNamedBracket
	{
		REOpCode* nextalt;
		prechar name;
		REOpCode* pairindex;
	};

	struct SAssert
	{
		REOpCode* nextalt;
		int length;
		REOpCode* pairindex;
	};

	struct SAlternative
	{
		REOpCode* nextalt;
		REOpCode* endindex;
	};


	union
	{
		SRange range;
		SBracket bracket;
#ifdef NAMEDBRACKETS
		SNamedBracket nbracket;
#endif
		SAssert assert;
		SAlternative alternative;
		rechar symbol;
#ifdef UNICODE
		UniSet *symbolclass;
#else
		prechar symbolclass;
#endif
		int refindex;
#ifdef NAMEDBRACKETS
		prechar refname;
#endif
#ifdef RELIB
		prechar rename;
#endif

		int type;
	};
};

#ifdef RE_NO_NEWARRAY
void StateStackItem::OnCreate(void *ptr)
{
	memset(ptr,0,sizeof(StateStackItem));
}

void REOpCode::OnCreate(void *ptr)
{
	memset(ptr,0,sizeof(REOpCode));
}

void REOpCode::OnDelete(void *ptr)
{
	REOpCode &o=*static_cast<REOpCode*>(ptr);

	switch (o.op)
	{
		case opSymbolClass:

			if (o.symbolclass)
				free(o.symbolclass);

			break;
		case opClassRange:
		case opClassMinRange:

			if (o.range.symbolclass)
				free(o.range.symbolclass);

			break;
#ifdef NAMEDBRACKETS
		case opNamedBracket:

			if (o.nbracket.name)
				free(o.nbracket.name);

			break;
		case opNamedBackRef:

			if (o.refname)
				free(o.refname);

			break;
#endif
#ifdef RELIB
		case opLibCall:

			if (o.rename)
				free(o.rename);

			break;
#endif
	}
}

void *RegExp::CreateArray(const unsigned int size, const unsigned int total,
                          ON_CREATE_FUNC Create)
{
	if (total && size)
	{
		/* record[0] - sizeof
		   record[1] - total
		   record[2] - array
		*/
		unsigned char *record=static_cast<unsigned char*>
		                      (malloc(sizeof(unsigned int)*2+size*total));

		if (record)
		{
			unsigned char *array=record+2*sizeof(unsigned int);
			*reinterpret_cast<int*>(record)=size;
			*reinterpret_cast<int*>(record+sizeof(unsigned int))=total;

			if (Create)
				for (unsigned int f=0; f<total; ++f)
					Create(array+size*f);

			return array;
		}
	}

	return nullptr;
}

void RegExp::DeleteArray(void **array, ON_DELETE_FUNC Delete)
{
	if (array && *array)
	{
		unsigned char *record=reinterpret_cast<unsigned char*>(*array)-
		                      2*sizeof(unsigned int);

		if (Delete)
		{
			unsigned char *m=static_cast<unsigned char*>(*array);
			unsigned int size=*reinterpret_cast<int*>(record),
			                  total=*reinterpret_cast<int*>(record+sizeof(unsigned int));

			for (unsigned int f=0; f<total; ++f)
				Delete(m+size*f);
		}

		free(record);
		*array=nullptr;
	}
}
#else  // RE_NO_NEWARRAY
REOpCode::~REOpCode()
{
	switch (op)
	{
#ifdef UNICODE
		case opSymbolClass:delete symbolclass; break;
#else
case opSymbolClass:delete [] symbolclass; break;
#endif
#ifdef UNICODE
		case opClassRange:
		case opClassMinRange:delete range.symbolclass; break;
#else
case opClassRange:
case opClassMinRange:delete [] range.symbolclass; break;
#endif
#ifdef NAMEDBRACKETS
		case opNamedBracket:delete [] nbracket.name; break;
		case opNamedBackRef:delete [] refname; break;
#endif
#ifdef RELIB
		case opLibCall:delete [] rename; break;
#endif
	}
}
#endif // RE_NO_NEWARRAY



void RegExp::Init(const prechar expr,int options)
{
	//ClearStruct(*this);
	code=nullptr;
	brhandler=nullptr;
	brhdata=nullptr;
#ifndef UNICODE
#ifndef RE_STATIC_LOCALE
#ifndef RE_EXTERNAL_CTYPE
	InitLocale();
#endif //RE_EXTERNAL_CTYPE
#endif//RE_STATIC_LOCALE
#endif //UNICODE
#ifdef NAMEDBRACKETS
	havenamedbrackets=0;
#endif
	stack=&initstack[0];
	st=&stack[0];
	initstackpage.stack=stack;
	firstpage=lastpage=&initstackpage;
	firstpage->next=nullptr;
	firstpage->prev=nullptr;
#ifdef UNICODE
	firstptr=new UniSet();
#define first (*firstptr)
#endif
	start=nullptr;
	end=nullptr;
	trimend=nullptr;
	Compile((const RECHAR*)expr,options);
}

RegExp::RegExp():
	code(nullptr),
#ifdef NAMEDBRACKETS
	havenamedbrackets(0),
#endif
	stack(&initstack[0]),
	st(&stack[0]),
	slashChar('/'),
	backslashChar('\\'),
	firstpage(&initstackpage),
	lastpage(&initstackpage),
#ifdef UNICODE
	firstptr(new UniSet()),
#endif
	errorcode(errNotCompiled),
	start(nullptr),
	end(nullptr),
	trimend(nullptr),
#ifdef RE_DEBUG
	resrc(nullptr),
#endif
	brhandler(nullptr),
	brhdata(nullptr)
{
#ifndef UNICODE
#ifndef RE_STATIC_LOCALE
#ifndef RE_EXTERNAL_CTYPE
	InitLocale();
#endif
#endif
#endif//UNICODE
	initstackpage.stack=stack;
	firstpage->next=nullptr;
	firstpage->prev=nullptr;
}

RegExp::RegExp(const RECHAR* expr,int options)
{
	slashChar='/';
	backslashChar='\\';
#ifdef RE_DEBUG
	resrc=nullptr;
#endif
	Init((const prechar)expr,options);
}

RegExp::~RegExp()
{
#ifdef RE_DEBUG
#ifdef RE_NO_NEWARRAY

	if (resrc)
		free(resrc);

#else
	delete [] resrc;
#endif // RE_NO_NEWARRAY
#endif

	if (code)
	{
#ifdef RE_NO_NEWARRAY
		DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
#else
		delete [] code;
		code=nullptr;
#endif
	}

	CleanStack();
#ifdef UNICODE
	delete firstptr;
#endif
}

#ifndef UNICODE
#ifndef RE_EXTERNAL_CTYPE
void RegExp::InitLocale()
{
	for (int i=0; i<256; i++)
	{
		lc[i]=tolower(i);
		uc[i]=toupper(i);
	}

	for (int i=0; i<256; i++)
	{
		char res=0;

		if (isalnum(i) || i=='_')res|=TYPE_WORDCHAR;

		if (isalpha(i))res|=TYPE_ALPHACHAR;

		if (isdigit(i))res|=TYPE_DIGITCHAR;

		if (isspace(i))res|=TYPE_SPACECHAR;

		if (lc[i]==i && uc[i]!=i)res|=TYPE_LOWCASE;

		if (uc[i]==i && lc[i]!=i)res|=TYPE_UPCASE;

		chartypes[i]=res;
	}

	memset(charbits,0,sizeof(charbits));

	for (int i=0,j=0,k=1; i<256; i++)
	{
		if (chartypes[i]&TYPE_DIGITCHAR) {charbits[j]|=k;}

		if (chartypes[i]&TYPE_SPACECHAR) {charbits[32+j]|=k;}

		if (chartypes[i]&TYPE_WORDCHAR) {charbits[64+j]|=k;}

		if (chartypes[i]&TYPE_LOWCASE) {charbits[96+j]|=k;}

		if (chartypes[i]&TYPE_UPCASE) {charbits[128+j]|=k;}

		if (chartypes[i]&TYPE_ALPHACHAR) {charbits[160+j]|=k;}

		k<<=1;

		if (k==256) {k=1; j++;}
	}
}
#endif
#endif



int RegExp::CalcLength(const prechar src,int srclength)
{
	int length=3;//global brackets
	int brackets[MAXDEPTH];
	int count=0;
	int i,save;
	bracketscount=1;
	int inquote=0;

	for (i=0; i<srclength; i++,length++)
	{
		if (inquote && src[i]!=backslashChar && src[i+1]!='E')
		{
			continue;
		}

		if (src[i]==backslashChar)
		{
			i++;

			if (src[i]=='Q')inquote=1;

			if (src[i]=='E')inquote=0;

			if (src[i]=='x')
			{
				i++;
				if(isxdigit(src[i]))
				{
					for(int j=1,k=i;j<4;j++)
					{
						if(isxdigit(src[k+j]))
						{
							i++;
						}
						else
						{
							break;
						}
					}
				}
				else return SetError(errSyntax,i);
			}

#ifdef NAMEDBRACKETS

			if (src[i]=='p')
			{
				i++;

				if (src[i]!='{')
					return SetError(errSyntax,i);

				i++;
				int save2=i;

				while (i<srclength && (ISWORD(src[i]) || ISSPACE(src[i])) && src[i]!='}')
					i++;

				if (i>=srclength)
					return SetError(errBrackets,save2);

				if (src[i]!='}' && !(ISWORD(src[i]) || ISSPACE(src[i])))
					return SetError(errSyntax,i);
			}

#endif
			continue;
		}

		switch (src[i])
		{
			case '(':
			{
				brackets[count]=i;
				count++;

				if (count==MAXDEPTH)return SetError(errMaxDepth,i);

				if (src[i+1]=='?')
				{
					i+=2;
#ifdef NAMEDBRACKETS

					if (src[i]=='{')
					{
						save=i;
						i++;

						while (i<srclength && (ISWORD(src[i]) || ISSPACE(src[i])) && src[i]!='}')
							i++;

						if (i>=srclength)
							return SetError(errBrackets,save);

						if (src[i]!='}' && !(ISWORD(src[i]) || ISSPACE(src[i])))
							return SetError(errSyntax,i);
					}

#endif
				}
				else
				{
					bracketscount++;
				}

				break;
			}
			case ')':
			{
				count--;

				if (count<0)return SetError(errBrackets,i);

				break;
			}
			case '{':
			case '*':
			case '+':
			case '?':
			{
				length--;

				if (src[i]=='{')
				{
					save=i;

					while (i<srclength && src[i]!='}')i++;

					if (i>=srclength)return SetError(errBrackets,save);
				}

				if (src[i+1]=='?')i++;

				break;
			}
			case '[':
			{
				save=i;

				while (i<srclength && src[i]!=']')i++;

				if (i>=srclength)return SetError(errBrackets,save);

				break;
			}
#ifdef RELIB
			case '%':
			{
				i++;
				save=i;

				while (i<srclength && src[i]!='%')i++;

				if (i>=srclength)return SetError(errBrackets,save-1);

				if (save==i)return SetError(errSyntax,save);
			} break;
#endif
		}
	}

	if (count)
	{
		errorpos=brackets[0];
		errorcode=errBrackets;
		return 0;
	}

	return length;
}

int RegExp::Compile(const RECHAR* src,int options)
{
	int srcstart=0,srclength/*=0*/,relength;

	if (options&OP_CPPMODE)
	{
		slashChar='\\';
		backslashChar='/';
	}
	else
	{
		slashChar='/';
		backslashChar='\\';
	}

	havefirst=0;
#ifdef RE_NO_NEWARRAY
	DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
#else

	if (code)delete [] code;

	code=nullptr;
#endif

	if (options&OP_PERLSTYLE)
	{
		if (src[0]!=slashChar)return SetError(errSyntax,0);

		srcstart=1;
		srclength=1;

		while (src[srclength] && src[srclength]!=slashChar)
		{
			if (src[srclength]==backslashChar && src[srclength+1])
			{
				srclength++;
			}

			srclength++;
		}

		if (!src[srclength])
		{
			return SetError(errSyntax,srclength-1);
		}

		int i=srclength+1;
		srclength--;

		while (src[i])
		{
			switch (src[i])
			{
				case 'i':options|=OP_IGNORECASE; break;
				case 's':options|=OP_SINGLELINE; break;
				case 'm':options|=OP_MULTILINE; break;
				case 'x':options|=OP_XTENDEDSYNTAX; break;
				case 'o':options|=OP_OPTIMIZE; break;
				default:return SetError(errOptions,i);
			}

			i++;
		}
	}
	else
	{
		srclength=(int)strlen(src);
	}

	ignorecase=options&OP_IGNORECASE?1:0;
	relength=CalcLength((const prechar)src+srcstart,srclength);

	if (!relength)
	{
		return 0;
	}

#ifdef RE_NO_NEWARRAY
	code=static_cast<REOpCode*>
	     (CreateArray(sizeof(REOpCode), relength, REOpCode::OnCreate));
#else
	code=new REOpCode[relength];
	memset(code,0,sizeof(REOpCode)*relength);
#endif

	for (int i=0; i<relength; i++)
	{
		code[i].next=i<relength-1?code+i+1:0;
		code[i].prev=i>0?code+i-1:0;
	}

	int result=InnerCompile((const prechar)src+srcstart,srclength,options);

	if (!result)
	{
#ifdef RE_NO_NEWARRAY
		DeleteArray(reinterpret_cast<void**>(&code),REOpCode::OnDelete);
#else
		delete [] code;
		code=nullptr;
#endif
	}
	else
	{
		errorcode=errNone;
		minlength=0;

		if (options&OP_OPTIMIZE)Optimize();
	}

	return result;
}

int RegExp::GetNum(const prechar src,int& i)
{
	int res=0;//atoi((const char*)src+i);

	while (ISDIGIT(src[i]))
	{
		res*=10;
		res+=src[i]-'0';
		i++;
	}

	return res;
}

static int CalcPatternLength(PREOpCode from,PREOpCode to)
{
	int len=0;
	int altcnt=0;
	int altlen=-1;

	for (; from->prev!=to; from=from->next)
	{
		switch (from->op)
		{
				//zero width
			case opLineStart:
			case opLineEnd:
			case opDataStart:
			case opDataEnd:
			case opWordBound:
			case opNotWordBound:continue;
			case opType:
			case opNotType:
			case opCharAny:
			case opCharAnyAll:
			case opSymbol:
			case opNotSymbol:
			case opSymbolIgnoreCase:
			case opNotSymbolIgnoreCase:
			case opSymbolClass:
				len++;
				altcnt++;
				continue;
#ifdef NAMEDBRACKETS
			case opNamedBracket:
#endif
			case opOpenBracket:
			{
				int l=CalcPatternLength(from->next,from->bracket.pairindex->prev);

				if (l==-1)return -1;

				len+=l;
				altcnt+=l;
				from=from->bracket.pairindex;
				continue;
			}
			case opClosingBracket:
				break;
			case opAlternative:

				if (altlen!=-1 && altcnt!=altlen)return -1;

				altlen=altcnt;
				altcnt=0;
				continue;
			case opBackRef:
#ifdef NAMEDBRACKETS
			case opNamedBackRef:
#endif
				return -1;
			case opRangesBegin:
			case opRange:
			case opMinRange:
			case opSymbolRange:
			case opSymbolMinRange:
			case opNotSymbolRange:
			case opNotSymbolMinRange:
			case opAnyRange:
			case opAnyMinRange:
			case opTypeRange:
			case opTypeMinRange:
			case opNotTypeRange:
			case opNotTypeMinRange:
			case opClassRange:
			case opClassMinRange:

				if (from->range.min!=from->range.max)return -1;

				len+=from->range.min;
				altcnt+=from->range.min;
				continue;
			case opBracketRange:
			case opBracketMinRange:
			{
				if (from->range.min!=from->range.max)return -1;

				int l=CalcPatternLength(from->next,from->bracket.pairindex->prev);

				if (l==-1)return -1;

				len+=from->range.min*l;
				altcnt+=from->range.min*l;
				from=from->bracket.pairindex;
				continue;
			}
			case opBackRefRange:
			case opBackRefMinRange:
#ifdef NAMEDBRACKETS
			case opNamedRefRange:
			case opNamedRefMinRange:
#endif
				return -1;
			case opRangesEnd:
			case opAssertionsBegin:
			case opLookAhead:
			case opNotLookAhead:
			case opLookBehind:
			case opNotLookBehind:
				from=from->assert.pairindex;
				continue;
			case opAsserionsEnd:
			case opNoReturn:
				continue;
#ifdef RELIB
			case opLibCall:
				return -1;
#endif
		}
	}

	if (altlen!=-1 && altlen!=altcnt)return -1;

	return altlen==-1?len:altlen;
}

int RegExp::InnerCompile(const prechar src,int srclength,int options)
{
	int i,j;
	PREOpCode brackets[MAXDEPTH];
	// current brackets depth
	// one place reserved for surrounding 'main' brackets
	int brdepth=1;
	// compiling interior of lookbehind
	// used to apply restrictions of lookbehind
	int lookbehind=0;
	// counter of normal brackets
	int brcount=0;
	// counter of closed brackets
	// used to check correctness of backreferences
	bool closedbrackets[MAXDEPTH];
	// quoting is active
	int inquote=0;
	maxbackref=0;
#ifdef UNICODE
	UniSet *tmpclass;
#else
	rechar tmpclass[32];
	int *itmpclass=(int*)tmpclass;
#endif
	code->op=opOpenBracket;
	code->bracket.index=0;
#ifdef NAMEDBRACKETS
	MatchHash h;
	SMatch m;
#endif
	int pos=1;
	register PREOpCode op;//=code;
	brackets[0]=code;
#ifdef RE_DEBUG
#ifdef RE_NO_NEWARRAY
	resrc=static_cast<rechar*>(malloc(sizeof(rechar)*(srclength+4)));
#else
	resrc=new rechar[srclength+4];
#endif // RE_NO_NEWARRAY
	resrc[0]='(';
	resrc[1]=0;
	memcpy(resrc+1,src,srclength*sizeof(rechar));
	resrc[srclength+1]=')';
	resrc[srclength+2]=27;
	resrc[srclength+3]=0;
#endif
	havelookahead=0;

	for (i=0; i<srclength; i++)
	{
		op=code+pos;
		pos++;
#ifdef RE_DEBUG
		op->srcpos=i+1;
#endif

		if (inquote && src[i]!=backslashChar)
		{
			op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
			op->symbol=ignorecase?TOLOWER(src[i]):src[i];

			if (ignorecase && TOUPPER(op->symbol)==op->symbol)op->op=opSymbol;

			continue;
		}

		if (src[i]==backslashChar)
		{
			i++;

			if (inquote && src[i]!='E')
			{
				op->op=opSymbol;
				op->symbol=backslashChar;
				op=code+pos;
				pos++;
				op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
				op->symbol=ignorecase?TOLOWER(src[i]):src[i];

				if (ignorecase && TOUPPER(op->symbol)==op->symbol)op->op=opSymbol;

				continue;
			}

			op->op=opType;

			switch (src[i])
			{
				case 'Q':inquote=1; pos--; continue;
				case 'E':inquote=0; pos--; continue;
				case 'b':op->op=opWordBound; continue;
				case 'B':op->op=opNotWordBound; continue;
				case 'D':op->op=opNotType;
				case 'd':op->type=TYPE_DIGITCHAR; continue;
				case 'S':op->op=opNotType;
				case 's':op->type=TYPE_SPACECHAR; continue;
				case 'W':op->op=opNotType;
				case 'w':op->type=TYPE_WORDCHAR; continue;
				case 'U':op->op=opNotType;
				case 'u':op->type=TYPE_UPCASE; continue;
				case 'L':op->op=opNotType;
				case 'l':op->type=TYPE_LOWCASE; continue;
				case 'I':op->op=opNotType;
				case 'i':op->type=TYPE_ALPHACHAR; continue;
				case 'A':op->op=opDataStart; continue;
				case 'Z':op->op=opDataEnd; continue;
				case 'n':op->op=opSymbol; op->symbol='\n'; continue;
				case 'r':op->op=opSymbol; op->symbol='\r'; continue;
				case 't':op->op=opSymbol; op->symbol='\t'; continue;
				case 'f':op->op=opSymbol; op->symbol='\f'; continue;
				case 'e':op->op=opSymbol; op->symbol=27; continue;
				case 'O':op->op=opNoReturn; continue;
#ifdef NAMEDBRACKETS
				case 'p':
				{
					op->op=opNamedBackRef;
					i++;

					if (src[i]!='{')return SetError(errSyntax,i);

					int len=0; i++;

					while (src[i+len]!='}')len++;

					if (len>0)
					{
#ifdef RE_NO_NEWARRAY
						op->refname=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
#else
						op->refname=new rechar[len+1];
#endif
						memcpy(op->refname,src+i,len*sizeof(rechar));
						op->refname[len]=0;

						if (!h.Exists((char*)op->refname))
						{
							return SetError(errReferenceToUndefinedNamedBracket,i);
						}

						i+=len;
					}
					else
					{
						return SetError(errSyntax,i);
					}
				} continue;
#endif
				case 'x':
				{
					i++;

					if (i>=srclength)return SetError(errSyntax,i-1);

					if(isxdigit(src[i]))
					{
						int c=TOLOWER(src[i])-'0';

						if (c>9)c-='a'-'0'-10;

						op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
						op->symbol=c;
						for(int j=1,k=i;j<4 && k+j<srclength;j++)
						{
							if(isxdigit(src[k+j]))
							{
								i++;
								c=TOLOWER(src[k+j])-'0';
								if (c>9)c-='a'-'0'-10;
								op->symbol<<=4;
								op->symbol|=c;
							}
							else
							{
								break;
							}
						}
						if (ignorecase)
						{
							op->symbol=TOLOWER(op->symbol);
							if (TOUPPER(op->symbol)==TOLOWER(op->symbol))
							{
								op->op=opSymbol;
							}
						}
					}
					else return SetError(errSyntax,i);

					continue;
				}
				default:
				{
					if (ISDIGIT(src[i]))
					{
						int save=i;
						op->op=opBackRef;
						op->refindex=GetNum(src,i); i--;

						if (op->refindex<=0 || op->refindex>brcount || !closedbrackets[op->refindex])
						{
							return SetError(errInvalidBackRef,save-1);
						}

						if (op->refindex>maxbackref)maxbackref=op->refindex;
					}
					else
					{
						if (options&OP_STRICT && ISALPHA(src[i]))
						{
							return SetError(errInvalidEscape,i-1);
						}

						op->op=ignorecase?opSymbolIgnoreCase:opSymbol;
						op->symbol=ignorecase?TOLOWER(src[i]):src[i];

						if (TOLOWER(op->symbol)==TOUPPER(op->symbol))
						{
							op->op=opSymbol;
						}
					}
				}
			}

			continue;
		}

		switch (src[i])
		{
			case '.':
			{
				if (options&OP_SINGLELINE)
				{
					op->op=opCharAnyAll;
				}
				else
				{
					op->op=opCharAny;
				}

				continue;
			}
			case '^':
			{
				if (options&OP_MULTILINE)
				{
					op->op=opLineStart;
				}
				else
				{
					op->op=opDataStart;
				}

				continue;
			}
			case '$':
			{
				if (options&OP_MULTILINE)
				{
					op->op=opLineEnd;
				}
				else
				{
					op->op=opDataEnd;
				}

				continue;
			}
			case '|':
			{
				if (brackets[brdepth-1]->op==opAlternative)
				{
					brackets[brdepth-1]->alternative.nextalt=op;
				}
				else
				{
					if (brackets[brdepth-1]->op==opOpenBracket)
					{
						brackets[brdepth-1]->bracket.nextalt=op;
					}
					else
					{
						brackets[brdepth-1]->assert.nextalt=op;
					}
				}

				if (brdepth==MAXDEPTH)return SetError(errMaxDepth,i);

				brackets[brdepth++]=op;
				op->op=opAlternative;
				continue;
			}
			case '(':
			{
				op->op=opOpenBracket;

				if (src[i+1]=='?')
				{
					i+=2;

					switch (src[i])
					{
						case ':':op->bracket.index=-1; break;
						case '=':op->op=opLookAhead; havelookahead=1; break;
						case '!':op->op=opNotLookAhead; havelookahead=1; break;
						case '<':
						{
							i++;

							if (src[i]=='=')
							{
								op->op=opLookBehind;
							}
							else if (src[i]=='!')
							{
								op->op=opNotLookBehind;
							}
							else return SetError(errSyntax,i);

							lookbehind++;
						} break;
#ifdef NAMEDBRACKETS
						case '{':
						{
							op->op=opNamedBracket;
							havenamedbrackets=1;
							int len=0;
							i++;

							while (src[i+len]!='}')len++;

							if (len>0)
							{
#ifdef RE_NO_NEWARRAY
								op->nbracket.name=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
#else
								op->nbracket.name=new rechar[len+1];
#endif
								memcpy(op->nbracket.name,src+i,len*sizeof(rechar));
								op->nbracket.name[len]=0;
								//h.SetItem((char*)op->nbracket.name,m);
							}
							else
							{
								op->op=opOpenBracket;
								op->bracket.index=-1;
							}

							i+=len;
						} break;
#endif
						default:
						{
							return SetError(errSyntax,i);
						}
					}
				}
				else
				{
					brcount++;
					closedbrackets[brcount]=false;
					op->bracket.index=brcount;
				}

				brackets[brdepth]=op;
				brdepth++;
				continue;
			}
			case ')':
			{
				op->op=opClosingBracket;
				brdepth--;

				while (brackets[brdepth]->op==opAlternative)
				{
					brackets[brdepth]->alternative.endindex=op;
					brdepth--;
				}

				switch (brackets[brdepth]->op)
				{
					case opOpenBracket:
					{
						op->bracket.pairindex=brackets[brdepth];
						brackets[brdepth]->bracket.pairindex=op;
						op->bracket.index=brackets[brdepth]->bracket.index;

						if (op->bracket.index!=-1)
						{
							closedbrackets[op->bracket.index]=true;
						}

						break;
					}
#ifdef NAMEDBRACKETS
					case opNamedBracket:
					{
						op->nbracket.pairindex=brackets[brdepth];
						brackets[brdepth]->nbracket.pairindex=op;
						op->nbracket.name=brackets[brdepth]->nbracket.name;
						h.SetItem((char*)op->nbracket.name,m);
						break;
					}
#endif
					case opLookBehind:
					case opNotLookBehind:
					{
						lookbehind--;
						int l=CalcPatternLength(brackets[brdepth]->next,op->prev);

						if (l==-1)return SetError(errVariableLengthLookBehind,i);

						brackets[brdepth]->assert.length=l;
					}// there is no break and this is correct!
					case opLookAhead:
					case opNotLookAhead:
					{
						op->assert.pairindex=brackets[brdepth];
						brackets[brdepth]->assert.pairindex=op;
						break;
					}
				}

				continue;
			}
			case '[':
			{
				i++;
				int negative=0;

				if (src[i]=='^')
				{
					negative=1;
					i++;
				}

				int lastchar=0;
				int classsize=0;
				op->op=opSymbolClass;
				//op->symbolclass=new rechar[32];
				//memset(op->symbolclass,0,32);
#ifdef UNICODE
				op->symbolclass=new UniSet();
				tmpclass=op->symbolclass;
           #define IF_U(t)
#else

				for (j=0; j<8; j++)itmpclass[j]=0;
				int classindex=0;
           #define IF_U(t) t
#endif

				for (; src[i]!=']'; i++)
				{
					if (src[i]==backslashChar)
					{
						i++;
						int isnottype=0;
						int type=0;
						lastchar=0;

						switch (src[i])
						{
							case 'D':isnottype=1;
							case 'd':type=TYPE_DIGITCHAR; IF_U(classindex=0); break;
							case 'W':isnottype=1;
							case 'w':type=TYPE_WORDCHAR; IF_U(classindex=64); break;
							case 'S':isnottype=1;
							case 's':type=TYPE_SPACECHAR; IF_U(classindex=32); break;
							case 'L':isnottype=1;
							case 'l':type=TYPE_LOWCASE; IF_U(lassindex=96); break;
							case 'U':isnottype=1;
							case 'u':type=TYPE_UPCASE; IF_U(classindex=128); break;
							case 'I':isnottype=1;
							case 'i':type=TYPE_ALPHACHAR; IF_U(classindex=160); break;
							case 'n':lastchar='\n'; break;
							case 'r':lastchar='\r'; break;
							case 't':lastchar='\t'; break;
							case 'f':lastchar='\f'; break;
							case 'e':lastchar=27; break;
							case 'x':
							{
								i++;

								if (i>=srclength)return SetError(errSyntax,i-1);

								if (isxdigit(src[i]))
								{
									int c=TOLOWER(src[i])-'0';

									if (c>9)c-='a'-'0'-10;

									lastchar=c;

									for(int j=1,k=i;j<4 && k+j<srclength;j++)
									{
										if (isxdigit(src[k+j]))
										{
											i++;
											c=TOLOWER(src[k+j])-'0';

											if (c>9)c-='a'-'0'-10;

											lastchar<<=4;
											lastchar|=c;
										}
										else
										{
											break;
										}
									}
									dpf(("Last char=%c(%02x)\n",lastchar,lastchar));
								}
								else return SetError(errSyntax,i);

								break;
							}
							default:
							{
								if (options&OP_STRICT && ISALPHA(src[i]))
								{
									return SetError(errInvalidEscape,i-1);
								}

								lastchar=src[i];
							}
						}

						if (type)
						{
#ifdef UNICODE

							if (isnottype)
							{
								tmpclass->nottypes|=type;
							}
							else
							{
								tmpclass->types|=type;
							}

#else
							isnottype=isnottype?0xffffffff:0;
							int *b=(int*)(charbits+classindex);

							for (j=0; j<8; j++)
							{
								itmpclass[j]|=b[j]^isnottype;
							}

#endif
							classsize=257;
							//for(int j=0;j<32;j++)op->symbolclass[j]|=charbits[classindex+j]^isnottype;
							//classsize+=charsizes[classindex>>5];
							//int setbit;
							/*for(int j=0;j<256;j++)
							{
							  setbit=(chartypes[j]^isnottype)&type;
							  if(setbit)
							  {
							    if(ignorecase)
							    {
							      SetBit(op->symbolclass,lc[j]);
							      SetBit(op->symbolclass,uc[j]);
							    }else
							    {
							      SetBit(op->symbolclass,j);
							    }
							    classsize++;
							  }
							}*/
						}
						else
						{
							if (options&OP_IGNORECASE)
							{
								SetBit(tmpclass,TOLOWER(lastchar));
								SetBit(tmpclass,TOUPPER(lastchar));
							}
							else
							{
								SetBit(tmpclass,lastchar);
							}

							classsize++;
						}

						continue;
					}

					if (src[i]=='-')
					{
						if (lastchar && src[i+1]!=']')
						{
							int to=src[i+1];

							if (to==backslashChar)
							{
								to=src[i+2];

								if (to=='x')
								{
									i+=2;
									to=TOLOWER(src[i+1]);

									if(isxdigit(to))
									{
										to-='0';

										if (to>9)to-='a'-'0'-10;

										for(int j=1,k=(i+1);j<4 && k+j<srclength;j++)
										{
											int c=TOLOWER(src[k+j]);
											if(isxdigit(c))
											{
												i++;
												c-='0';

												if (c>9)c-='a'-'0'-10;

												to<<=4;
												to|=c;
											}
											else
											{
												break;
											}
										}
									}
									else return SetError(errSyntax,i);
								}
								else
								{
									SetBit(tmpclass,'-');
									classsize++;
									continue;
								}
							}

							i++;
							dpf(("from %d to %d\n",lastchar,to));

							for (j=lastchar; j<=to; j++)
							{
								if (ignorecase)
								{
									SetBit(tmpclass,TOLOWER(j));
									SetBit(tmpclass,TOUPPER(j));
								}
								else
								{
									SetBit(tmpclass,j);
								}

								classsize++;
							}

							continue;
						}
					}

					lastchar=src[i];

					if (ignorecase)
					{
						SetBit(tmpclass,TOLOWER(lastchar));
						SetBit(tmpclass,TOUPPER(lastchar));
					}
					else
					{
						SetBit(tmpclass,lastchar);
					}

					classsize++;
				}

				if (negative && classsize>1)
				{
#ifdef UNICODE
					tmpclass->negative=negative;
#else

					for (int jj=0; jj<8; jj++)itmpclass[jj]^=0xffffffff;

#endif
					//for(int j=0;j<32;j++)op->symbolclass[j]^=0xff;
				}

				if (classsize==1)
				{
#ifdef UNICODE
					delete op->symbolclass;
					op->symbolclass=0;
					tmpclass=0;
#endif
					op->op=negative?opNotSymbol:opSymbol;

					if (ignorecase)
					{
						op->op+=2;
						op->symbol=TOLOWER(lastchar);
					}
					else
					{
						op->symbol=lastchar;
					}
				}

#ifdef UNICODE

				if (tmpclass)tmpclass->negative=negative;

#else
				else if (classsize==256 && !negative)
				{
					op->op=options&OP_SINGLELINE?opCharAnyAll:opCharAny;
				}
				else
				{
#ifdef RE_NO_NEWARRAY
					op->symbolclass=static_cast<rechar*>(malloc(sizeof(rechar)*32));
#else
				op->symbolclass=new rechar[32];
#endif

					for (j=0; j<8; j++)((int*)op->symbolclass)[j]=itmpclass[j];
				}

#endif
				continue;
			}
			case '+':
			case '*':
			case '?':
			case '{':
			{
				int min=0,max=0;

				switch (src[i])
				{
					case '+':min=1; max=-2; break;
					case '*':min=0; max=-2; break;
					case '?':
					{
						//if(src[i+1]=='?') return SetError(errInvalidQuantifiersCombination,i);
						min=0; max=1;
						break;
					}
					case '{':
					{
						i++;
						int save=i;
						min=GetNum(src,i);
						max=min;

						if (min<0)return SetError(errInvalidRange,save);

//            i++;
						if (src[i]==',')
						{
							if (src[i+1]=='}')
							{
								i++;
								max=-2;
							}
							else
							{
								i++;
								max=GetNum(src,i);

//                i++;
								if (max<min)return SetError(errInvalidRange,save);
							}
						}

						if (src[i]!='}')return SetError(errInvalidRange,save);
					}
				}

				pos--;
				op=code+pos-1;

				if (min==1 && max==1)continue;

				op->range.min=min;
				op->range.max=max;

				switch (op->op)
				{
					case opLineStart:
					case opLineEnd:
					case opDataStart:
					case opDataEnd:
					case opWordBound:
					case opNotWordBound:
					{
						return SetError(errInvalidQuantifiersCombination,i);
//            op->range.op=op->op;
//            op->op=opRange;
//            continue;
					}
					case opCharAny:
					case opCharAnyAll:
					{
						op->range.op=op->op;
						op->op=opAnyRange;
						break;
					}
					case opType:
					{
						op->op=opTypeRange;
						break;
					}
					case opNotType:
					{
						op->op=opNotTypeRange;
						break;
					}
					case opSymbolIgnoreCase:
					case opSymbol:
					{
						op->op=opSymbolRange;
						break;
					}
					case opNotSymbol:
					case opNotSymbolIgnoreCase:
					{
						op->op=opNotSymbolRange;
						break;
					}
					case opSymbolClass:
					{
						op->op=opClassRange;
						break;
					}
					case opBackRef:
					{
						op->op=opBackRefRange;
						break;
					}
#ifdef NAMEDBRACKETS
					case opNamedBackRef:
					{
						op->op=opNamedRefRange;
					} break;
#endif
					case opClosingBracket:
					{
						op=op->bracket.pairindex;

						if (op->op!=opOpenBracket)return SetError(errInvalidQuantifiersCombination,i);

						op->range.min=min;
						op->range.max=max;
						op->op=opBracketRange;
						break;
					}
					default:
					{
						dpf(("OP=%d\n",op->op));
						return SetError(errInvalidQuantifiersCombination,i);
					}
				}//switch(code.op)

				if (src[i+1]=='?')
				{
					op->op++;
					i++;
				}

				continue;
			}// case +*?{
			case ' ':
			case '\t':
			case '\n':
			case '\r':
			{
				if (options&OP_XTENDEDSYNTAX)
				{
					pos--;
					continue;
				}
			}
#ifdef RELIB
			case '%':
			{
				i++;
				int len=0;

				while (src[i+len]!='%')len++;

				op->op=opLibCall;
#ifdef RE_NO_NEWARRAY
				op->rename=static_cast<rechar*>(malloc(sizeof(rechar)*(len+1)));
#else
				op->rename=new rechar[len+1];
#endif
				memcpy(op->rename,src+i,len*sizeof(rechar));
				op->rename[len]=0;
				i+=len;
				continue;
			}
#endif
			default:
			{
				op->op=options&OP_IGNORECASE?opSymbolIgnoreCase:opSymbol;

				if (ignorecase)
				{
					op->symbol=TOLOWER(src[i]);
				}
				else
				{
					op->symbol=src[i];
				}
			}
		}//switch(src[i])
	}//for()

	op=code+pos;
	pos++;
	brdepth--;

	while (brdepth>=0 && brackets[brdepth]->op==opAlternative)
	{
		brackets[brdepth]->alternative.endindex=op;
		brdepth--;
	}

	op->op=opClosingBracket;
	op->bracket.pairindex=code;
	code->bracket.pairindex=op;
#ifdef RE_DEBUG
	op->srcpos=i;
#endif
	op=code+pos;
	//pos++;
	op->op=opRegExpEnd;
#ifdef RE_DEBUG
	op->srcpos=i+1;
#endif
	return 1;
}

inline void RegExp::PushState()
{
	stackcount++;
#ifdef RELIB
	stackusage++;
#endif

	if (stackcount==STACK_PAGE_SIZE)
	{
		if (lastpage->next)
		{
			lastpage=lastpage->next;
			stack=lastpage->stack;
		}
		else
		{
			lastpage->next=new StateStackPage;
			lastpage->next->prev=lastpage;
			lastpage=lastpage->next;
			lastpage->next=nullptr;
#ifdef RE_NO_NEWARRAY
			lastpage->stack=static_cast<StateStackItem*>
			                (CreateArray(sizeof(StateStackItem), STACK_PAGE_SIZE,
			                             StateStackItem::OnCreate));
#else
			lastpage->stack=new StateStackItem[STACK_PAGE_SIZE];
#endif // RE_NO_NEWARRAY
			stack=lastpage->stack;
		}

		stackcount=0;
	}

	st=&stack[stackcount];
}
inline int RegExp::PopState()
{
	stackcount--;
#ifdef RELIB
	stackusage--;

	if (stackusage<0)return 0;

#endif

	if (stackcount<0)
	{
		if (!lastpage->prev)
			return 0;

		lastpage=lastpage->prev;
		stack=lastpage->stack;
		stackcount=STACK_PAGE_SIZE-1;
	}

	st=&stack[stackcount];
	return 1;
}


inline StateStackItem *RegExp::GetState()
{
	int tempcount=stackcount;
#ifdef RELIB

	if (!stackusage)return 0;

#endif
	StateStackPage* temppage=lastpage;
	StateStackItem* tempstack=lastpage->stack;
	tempcount--;

	if (tempcount<0)
	{
		if (!temppage->prev)
			return 0;

		temppage=temppage->prev;
		tempstack=temppage->stack;
		tempcount=STACK_PAGE_SIZE-1;
	}

	return &tempstack[tempcount];
}

inline StateStackItem *RegExp::FindStateByPos(PREOpCode pos,int op)
{
#ifdef RELIB
	int tempusage=stackusage;
#endif
	int tempcount=stackcount;
	StateStackPage* temppage=lastpage;
	StateStackItem* tempstack=lastpage->stack;

	do
	{
		tempcount--;
#ifdef RELIB
		tempusage--;

		if (tempusage<0)return 0;

#endif

		if (tempcount<0)
		{
			if (!temppage->prev)
				return 0;

			temppage=temppage->prev;
			tempstack=temppage->stack;
			tempcount=STACK_PAGE_SIZE-1;
		}
	}
	while (tempstack[tempcount].pos!=pos || tempstack[tempcount].op!=op);

	return &tempstack[tempcount];
}


inline int RegExp::StrCmp(const prechar& str,const prechar _st,const prechar ed)
{
	const prechar save=str;

	if (ignorecase)
	{
		while (_st<ed)
		{
			if (TOLOWER(*str)!=TOLOWER(*_st)) {str=save; return 0;}

			str++;
			_st++;
		}
	}
	else
	{
		while (_st<ed)
		{
			if (*str!=*_st) {str=save; return 0;}

			str++;
			_st++;
		}
	}

	return 1;
}

#define OP (*op)


#define MINSKIP(cmp) \
	{ int jj; \
		switch(op->next->op) \
		{ \
			case opSymbol: \
			{ \
				jj=op->next->symbol; \
				if(*str!=jj) \
					while(str<strend && cmp && st->max--)\
					{\
						str++;\
						if(str[1]!=jj)break;\
					} \
				break; \
			} \
			case opNotSymbol: \
			{ \
				jj=op->next->symbol; \
				if(*str==jj) \
					while(str<strend && cmp && st->max--)\
					{\
						str++;\
						if(str[1]==jj)break;\
					} \
				break; \
			} \
			case opSymbolIgnoreCase: \
			{ \
				jj=op->next->symbol; \
				if(TOLOWER(*str)!=jj) \
					while(str<strend && cmp && st->max--)\
					{\
						str++;\
						if(TOLOWER(str[1])!=jj)break;\
					} \
				break; \
			} \
			case opNotSymbolIgnoreCase: \
			{ \
				jj=op->next->symbol; \
				if(TOLOWER(*str)==jj) \
					while(str<strend && cmp && st->max--)\
					{\
						str++;\
						if(TOLOWER(str[1])==jj)break;\
					} \
				break; \
			} \
			case opType: \
			{ \
				jj=op->next->type; \
				if(!(ISTYPE(*str,jj))) \
					while(str<strend && cmp && st->max--)\
					{\
						str++;\
						if(!(ISTYPE(str[1],jj)))break;\
					} \
				break; \
			} \
			case opNotType: \
			{ \
				jj=op->next->type; \
				if((ISTYPE(*str,jj))) \
					while(str<strend && cmp && st->max--)\
					{\
						str++;\
						if((ISTYPE(str[1],jj)))break;\
					} \
				break; \
			} \
			case opSymbolClass: \
			{ \
				cl=op->next->symbolclass; \
				if(!GetBit(cl,*str)) \
					while(str<strend && cmp && st->max--)\
					{\
						str++;\
						if(!GetBit(cl,str[1]))break;\
					} \
				break; \
			} \
		} \
	}

#ifdef RELIB
static void KillMatchList(MatchList *ml)
{
	for (int i=0; i<ml->Count(); i++)
	{
		KillMatchList((*ml)[i].sublist);
		(*ml)[i].sublist=nullptr;
	}

	ml->Clean();
}

#endif


int RegExp::InnerMatch(const prechar str,const prechar strend,PMatch match,int& matchcount
#ifdef NAMEDBRACKETS
                       ,PMatchHash hmatch
#endif
                      )
{
//  register prechar str=start;
	int i,j;
	int minimizing;
	PREOpCode op,tmp=nullptr;
	PMatch m;
#ifdef UNICODE
	UniSet *cl;
#else
	prechar cl;
#endif
#ifdef RELIB
	SMatchListItem ml;
#endif
	int inrangebracket=0;

	if (errorcode==errNotCompiled)return 0;

	if (matchcount<maxbackref)return SetError(errNotEnoughMatches,maxbackref);

#ifdef NAMEDBRACKETS

	if (havenamedbrackets && !hmatch)return SetError(errNoStorageForNB,0);

#endif
#ifdef RELIB

	if (reclevel<=1)
	{
#endif
		stackcount=0;
		lastpage=firstpage;
		stack=lastpage->stack;
		st=&stack[0];
#ifdef RELIB
	}

#endif
	StateStackItem *ps;
	errorcode=errNone;

	/*for(i=0;i<matchcount;i++)
	{
	  match[i].start=-1;
	  match[i].end=-1;
	}*/
	if (bracketscount<matchcount)matchcount=bracketscount;

	memset(match,-1,sizeof(*match)*matchcount);

	for (op=code; op; op=op->next)
	{
		//dpf(("op:%s,\tpos:%d,\tstr:%d\n",ops[OP.op],pos,str-start));
		dpf(("=================\n"));
		dpf(("S:%s\n%*s\n",start,str-start+3,"^"));
		dpf(("R:%s\n%*s\n",resrc,OP.srcpos+3,"^"));

		if (str<=strend)
			switch (OP.op)
			{
				case opLineStart:
				{
					if (str==start || str[-1]==0x0d || str[-1]==0x0a)continue;

					break;
				}
				case opLineEnd:
				{
					if (str==strend)continue;

					if (str[0]==0x0d || str[0]==0x0a)
					{
						if (str[0]==0x0d)str++;

						if (str[0]==0x0a)str++;

						continue;
					}

					break;
				}
				case opDataStart:
				{
					if (str==start)continue;

					break;
				}
				case opDataEnd:
				{
					if (str==strend)continue;

					break;
				}
				case opWordBound:
				{
					if ((str==start && ISWORD(*str))||
					        (!(ISWORD(str[-1])) && ISWORD(*str)) ||
					        (!(ISWORD(*str)) && ISWORD(str[-1])) ||
					        (str==strend && ISWORD(str[-1])))continue;

					break;
				}
				case opNotWordBound:
				{
					if (!((str==start && ISWORD(*str))||
					        (!(ISWORD(str[-1])) && ISWORD(*str)) ||
					        (!(ISWORD(*str)) && ISWORD(str[-1])) ||
					        (str==strend && ISWORD(str[-1]))))continue;

					break;
				}
				case opType:
				{
					if (ISTYPE(*str,OP.type))
					{
						str++;
						continue;
					}

					break;
				}
				case opNotType:
				{
					if (!(ISTYPE(*str,OP.type)))
					{
						str++;
						continue;
					}

					break;
				}
				case opCharAny:
				{
					if (*str!=0x0d && *str!=0x0a)
					{
						str++;
						continue;
					}

					break;
				}
				case opCharAnyAll:
				{
					str++;
					continue;
				}
				case opSymbol:
				{
					if (*str==OP.symbol)
					{
						str++;
						continue;
					}

					break;
				}
				case opNotSymbol:
				{
					if (*str!=OP.symbol)
					{
						str++;
						continue;
					}

					break;
				}
				case opSymbolIgnoreCase:
				{
					if (TOLOWER(*str)==OP.symbol)
					{
						str++;
						continue;
					}

					break;
				}
				case opNotSymbolIgnoreCase:
				{
					if (TOLOWER(*str)!=OP.symbol)
					{
						str++;
						continue;
					}

					break;
				}
				case opSymbolClass:
				{
					if (GetBit(OP.symbolclass,*str))
					{
						str++;
						continue;
					}

					break;
				}
				case opOpenBracket:
				{
					if (OP.bracket.index>=0 && OP.bracket.index<matchcount)
					{
						//if (inrangebracket) Mantis#1388
						{
							st->op=opOpenBracket;
							st->pos=op;
							st->min=match[OP.bracket.index].start;
							st->max=match[OP.bracket.index].end;
							PushState();
						}

						match[OP.bracket.index].start=(int)(str-start);
					}

					if (OP.bracket.nextalt)
					{
						st->op=opAlternative;
						st->pos=OP.bracket.nextalt;
						st->savestr=str;
						PushState();
					}

					continue;
				}
#ifdef NAMEDBRACKETS
				case opNamedBracket:
				{
					if (hmatch)
					{
						PMatch m2;

						if (!hmatch->Exists((char*)OP.nbracket.name))
						{
							tag_Match sm;
							sm.start=-1;
							sm.end=-1;
							m2=hmatch->SetItem((char*)OP.nbracket.name,sm);
						}
						else
						{
							m2=hmatch->GetPtr((char*)OP.nbracket.name);
			…
Large files files are truncated, but you can click here to view the full file