sphinxjson.cpp | searchcode

/src/sphinxjson.cpp

https://bitbucket.org/ecairn/sphinx-official
C++ | 1159 lines | 1019 code | 93 blank | 47 comment | 87 complexity | b8e8b75cca260aae436996c19e7cba9b MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0

//
// $Id$
//

//
// Copyright (c) 2011-2015, Andrew Aksyonoff
// Copyright (c) 2011-2015, Sphinx Technologies Inc
// All rights reserved
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License. You should have
// received a copy of the GPL license along with this program; if you
// did not, you can find it at http://www.gnu.org/
//

#include "sphinxjson.h"
#include "sphinxint.h"

#if USE_WINDOWS
#include <io.h> // for isatty() in llsphinxjson.c
#endif

//////////////////////////////////////////////////////////////////////////

/// parser view on a generic node
struct JsonNode_t
{
	ESphJsonType	m_eType;		///< node type
	int64_t			m_iValue;		///< integer value, only used for JSON_INT32 and JSON_INT64
	double			m_fValue;		///< floating point value, only used for JSON_DOUBLE
	int				m_iStart;		///< string value, start index (inclusive) into m_pBuf, only used for JSON_STRING
	int				m_iEnd;			///< string value, end index (exclusive) into m_pBuf, only used for JSON_STRING
	int				m_iHandle;		///< subobject value, index into m_dNodes storage
	int				m_iKeyStart;	///< node name, start index (inclusive) into m_pBuf
	int				m_iKeyEnd;		///< node name, end index (exclusive) into m_pBuf

	JsonNode_t ()
		: m_eType ( JSON_TOTAL )
	{}
};
#define YYSTYPE JsonNode_t

// must be included after YYSTYPE declaration
#include "yysphinxjson.h"

/// actually, JSON-to-SphinxBSON converter helper, but who cares
class JsonParser_c : ISphNoncopyable
{
public:
	void *				m_pScanner;
	const char *		m_pLastToken;
	CSphVector<BYTE> &	m_dBuffer;
	CSphString &		m_sError;
	bool				m_bAutoconv;
	bool				m_bToLowercase;
	char *				m_pBuf;
	CSphVector < CSphVector<JsonNode_t> >	m_dNodes;
	CSphVector<JsonNode_t>					m_dEmpty;

public:
	JsonParser_c ( CSphVector<BYTE> & dBuffer, bool bAutoconv, bool bToLowercase, CSphString & sError )
		: m_pScanner ( NULL )
		, m_pLastToken ( NULL )
		, m_dBuffer ( dBuffer )
		, m_sError ( sError )
		, m_bAutoconv ( bAutoconv )
		, m_bToLowercase ( bToLowercase )
	{
		// reserve 4 bytes for Bloom mask
		StoreInt ( 0 );
	}

protected:
	BYTE * BufAlloc ( int iLen )
	{
		int iPos = m_dBuffer.GetLength();
		m_dBuffer.Resize ( m_dBuffer.GetLength()+iLen );
		return m_dBuffer.Begin()+iPos;
	}

	void StoreInt ( int v )
	{
		BYTE * p = BufAlloc ( 4 );
		*p++ = BYTE(DWORD(v));
		*p++ = BYTE(DWORD(v) >> 8);
		*p++ = BYTE(DWORD(v) >> 16);
		*p++ = BYTE(DWORD(v) >> 24);
	}

	void StoreBigint ( int64_t v )
	{
		StoreInt ( (DWORD)( v & 0xffffffffUL ) );
		StoreInt ( (int)( v>>32 ) );
	}

	int PackLen ( DWORD v )
	{
		if ( v<=251 )
			return 1;
		else if ( v<65536 )
			return 3;
		else if ( v<16777216 )
			return 4;
		else
			return 5;
	}

	void PackInt ( DWORD v )
	{
		assert ( v<16777216 ); // strings over 16M bytes and arrays over 16M entries are not supported
		if ( v<252 )
		{
			m_dBuffer.Add ( BYTE(v) );
		} else if ( v<65536 )
		{
			m_dBuffer.Add ( 252 );
			m_dBuffer.Add ( BYTE ( v & 255 ) );
			m_dBuffer.Add ( BYTE ( v>>8 ) );
		} else
		{
			m_dBuffer.Add ( 253 );
			m_dBuffer.Add ( BYTE ( v & 255 ) );
			m_dBuffer.Add ( BYTE ( ( v>>8 ) & 255 ) );
			m_dBuffer.Add ( BYTE ( v>>16 ) );
		}
	}

	void PackStr ( const char * s, int iLen )
	{
		iLen = Min ( iLen, 0xffffff );
		PackInt ( iLen );
		if ( iLen )
		{
			BYTE * p = BufAlloc ( iLen );
			memcpy ( p, s, iLen );
		}
	}

	int JsonUnescape ( char ** pEscaped, int iLen )
	{
		assert ( pEscaped );
		char * s = *pEscaped;

		// skip heading and trailing quotes
		if ( ( s[0]=='\'' && s[iLen-1]=='\'' ) || ( s[0]=='"' && s[iLen-1]=='"' ) )
		{
			s++;
			iLen -= 2;
		}

		char * sMax = s+iLen;
		char * d = s;
		char * pStart = d;
		char sBuf[8] = { 0 };

		while ( s<sMax )
		{
			if ( s[0]=='\\' )
			{
				switch ( s[1] )
				{
				case 'b': *d++ = '\b'; break;
				case 'n': *d++ = '\n'; break;
				case 'r': *d++ = '\r'; break;
				case 't': *d++ = '\t'; break;
				case 'f': *d++ = '\f'; break; // formfeed (rfc 4627)
				case 'u':
					// convert 6-byte sequences \u four-hex-digits (rfc 4627) to UTF-8
					if ( s+6<=sMax && isxdigit ( s[2] ) && isxdigit ( s[3] ) && isxdigit ( s[4] ) && isxdigit ( s[5] ) )
					{
						memcpy ( sBuf, s+2, 4 );
						d += sphUTF8Encode ( (BYTE*)d, (int)strtol ( sBuf, NULL, 16 ) );
						s += 4;
					} else
						*d++ = s[1];
					break;
				default:
					*d++ = s[1];
				}
				s += 2;
			} else
				*d++ = *s++;
		}

		*pEscaped = pStart;
		return d - pStart;
	}

	void PackNodeStr ( const JsonNode_t & tNode )
	{
		int iLen = tNode.m_iEnd-tNode.m_iStart;
		char *s = m_pBuf + tNode.m_iStart;
		iLen = JsonUnescape ( &s, iLen );
		PackStr ( s, iLen );
	}

	int KeyUnescape ( char ** ppKey, int iLen )
	{
		char * s = *ppKey;
		iLen = JsonUnescape ( &s, iLen );
		if ( m_bToLowercase )
			for ( int i=0; i<iLen; i++ )
				s[i] = (char)tolower ( s[i] ); // OPTIMIZE! not sure if significant, but known to be hell slow
		*ppKey = s;
		return iLen;
	}

	void StoreMask ( int iOfs, DWORD uMask )
	{
		for ( int i=0; i<4; i++ )
		{
			m_dBuffer[iOfs+i] = BYTE ( uMask & 0xff );
			uMask >>= 8;
		}
	}

	/// reserve a single byte for a yet-unknown length, to be written later with PackSize()
	/// returns its offset, to be used by PackSize() to both calculate and stored the length
	int ReserveSize()
	{
		int iOfs = m_dBuffer.GetLength();
		m_dBuffer.Resize ( iOfs+1 );
		return iOfs;
	}

	/// compute current length from the offset reserved with ReserveSize(), and pack the value back there
	/// in most cases that single byte is enough; if not, we make room by memmove()ing the data
	void PackSize ( int iOfs )
	{
		int iSize = m_dBuffer.GetLength()-iOfs-1;
		int iPackLen = PackLen ( iSize );

		if ( iPackLen!=1 )
		{
			m_dBuffer.Resize ( iOfs+iPackLen+iSize );
			memmove ( m_dBuffer.Begin()+iOfs+iPackLen, m_dBuffer.Begin()+iOfs+1, iSize );
		}

		m_dBuffer.Resize ( iOfs );
		PackInt ( iSize );
		m_dBuffer.Resize ( iOfs+iPackLen+iSize );
	}

public:
	void Finalize()
	{
		m_dBuffer.Add ( JSON_EOF );
	}

	void NumericFixup ( JsonNode_t & tNode )
	{
		// auto-convert string values, if necessary
		if ( tNode.m_eType==JSON_STRING && m_bAutoconv )
			if ( !sphJsonStringToNumber ( m_pBuf+tNode.m_iStart+1, tNode.m_iEnd-tNode.m_iStart-2, tNode.m_eType, tNode.m_iValue, tNode.m_fValue ) )
				return;

		// parser and converter emits int64 values, fix them up to int32
		if ( tNode.m_eType==JSON_INT64 )
		{
			int iVal = int(tNode.m_iValue);
			if ( tNode.m_iValue==int64_t(iVal) )
				tNode.m_eType = JSON_INT32;
		}
	}

	bool WriteNode ( JsonNode_t & tNode, const char * sKey=NULL, int iKeyLen=0 )
	{
		// convert int64 to int32, strings to numbers if needed
		NumericFixup ( tNode );

		ESphJsonType eType = tNode.m_eType;

		// note m_iHandle may be uninitialized on simple nodes
		CSphVector<JsonNode_t> & dNodes = ( ( eType==JSON_MIXED_VECTOR || eType==JSON_OBJECT ) && tNode.m_iHandle>=0 )
			? m_dNodes[ tNode.m_iHandle ]
			: m_dEmpty;

		// process mixed vector, convert to generic vector if possible
		if ( eType==JSON_MIXED_VECTOR )
		{
			ARRAY_FOREACH ( i, dNodes )
				NumericFixup ( dNodes[i] );

			ESphJsonType eBase = dNodes.GetLength()>0 ? dNodes[0].m_eType : JSON_EOF;
			bool bGeneric = ARRAY_ALL ( bGeneric, dNodes, dNodes[_all].m_eType==eBase );

			if ( bGeneric )
				switch ( eBase )
			{
				case JSON_INT32:	eType = JSON_INT32_VECTOR; break;
				case JSON_INT64:	eType = JSON_INT64_VECTOR; break;
				case JSON_DOUBLE:	eType = JSON_DOUBLE_VECTOR; break;
				case JSON_STRING:	eType = JSON_STRING_VECTOR; break;
				default:			break; // type matches across all entries, but we do not have a special format for that type
			}
		}

		// check for the root (bson v1), note sKey shouldn't be set
		if ( eType==JSON_OBJECT && m_dBuffer.GetLength()==4 && !sKey )
			eType = JSON_ROOT;

		// write node type
		if ( eType!=JSON_ROOT )
			m_dBuffer.Add ( (BYTE)eType );

		// write key if given
		if ( sKey )
			PackStr ( sKey, iKeyLen );

		switch ( eType )
		{
		// basic types
		case JSON_INT32:	StoreInt ( (int)tNode.m_iValue ); break;
		case JSON_INT64:	StoreBigint ( tNode.m_iValue ); break;
		case JSON_DOUBLE:	StoreBigint ( sphD2QW ( tNode.m_fValue ) ); break;
		case JSON_STRING:	PackNodeStr ( tNode ); break;

		// literals
		case JSON_TRUE:
		case JSON_FALSE:
		case JSON_NULL:
			// no content
			break;

		// associative arrays
		case JSON_ROOT:
		case JSON_OBJECT:
			{
				DWORD uMask = 0;
				int iOfs = 0;

				if ( eType==JSON_OBJECT )
				{
					iOfs = ReserveSize();
					StoreInt ( uMask );
				}

				ARRAY_FOREACH ( i, dNodes )
				{
					char * sObjKey = m_pBuf + dNodes[i].m_iKeyStart;
					int iLen = KeyUnescape ( &sObjKey, dNodes[i].m_iKeyEnd-dNodes[i].m_iKeyStart );
					WriteNode ( dNodes[i], sObjKey, iLen );
					uMask |= sphJsonKeyMask ( sObjKey, iLen );
				}
				m_dBuffer.Add ( JSON_EOF );

				if ( eType==JSON_OBJECT )
				{
					StoreMask ( iOfs+1, uMask );
					PackSize ( iOfs ); // MUST be in this order, because PackSize() might move the data!
				} else
				{
					assert ( eType==JSON_ROOT );
					StoreMask ( 0, uMask );
				}
				break;
			}

		// mixed array
		case JSON_MIXED_VECTOR:
			{
				int iOfs = ReserveSize();
				PackInt ( dNodes.GetLength() );
				ARRAY_FOREACH ( i, dNodes )
					WriteNode ( dNodes[i] );
				PackSize ( iOfs );
				break;
			}

		// optimized (generic) arrays
		case JSON_INT32_VECTOR:
			PackInt ( dNodes.GetLength() );
			ARRAY_FOREACH ( i, dNodes )
				StoreInt ( (int)dNodes[i].m_iValue );
			break;
		case JSON_INT64_VECTOR:
			PackInt ( dNodes.GetLength() );
			ARRAY_FOREACH ( i, dNodes )
				StoreBigint ( dNodes[i].m_iValue );
			break;
		case JSON_DOUBLE_VECTOR:
			PackInt ( dNodes.GetLength() );
			ARRAY_FOREACH ( i, dNodes )
				StoreBigint ( sphD2QW ( dNodes[i].m_fValue ) );
			break;
		case JSON_STRING_VECTOR:
			{
				int iOfs = ReserveSize();
				PackInt ( dNodes.GetLength() );
				ARRAY_FOREACH ( i, dNodes )
					PackNodeStr ( dNodes[i] );
				PackSize ( iOfs );
				break;
			}
		default:
			assert ( 0 && "internal error: unhandled type" );
			return false;
		}
		return true;
	}

	void DebugIndent ( int iLevel )
	{
		for ( int i=0; i<iLevel; i++ )
			printf ( "    " );
	}

	void DebugDump ( ESphJsonType eType, const BYTE ** ppData, int iLevel )
	{
		DebugIndent ( iLevel );

		const BYTE * p = *ppData;

		switch ( eType )
		{
		case JSON_INT32: printf ( "JSON_INT32 %d\n", sphJsonLoadInt ( &p ) ); break;
		case JSON_INT64: printf ( "JSON_INT64 "INT64_FMT"\n", sphJsonLoadBigint ( &p ) ); break;
		case JSON_DOUBLE: printf ( "JSON_DOUBLE %lf\n", sphQW2D ( sphJsonLoadBigint ( &p ) ) ); break;
		case JSON_STRING:
			{
				int iLen = sphJsonUnpackInt ( &p );
				CSphString sVal;
				sVal.SetBinary ( (const char*)p, iLen );
				printf ( "JSON_STRING \"%s\"\n", sVal.cstr() );
				p += iLen;
				break;
			}

		case JSON_TRUE:		printf ( "JSON_TRUE\n" ); break;
		case JSON_FALSE:	printf ( "JSON_FALSE\n" ); break;
		case JSON_NULL:		printf ( "JSON_NULL\n" ); break;
		case JSON_EOF:		printf ( "JSON_EOF\n" ); break;

		// associative arrays
		case JSON_ROOT:
		case JSON_OBJECT:
			{
				if ( eType==JSON_OBJECT )
					sphJsonUnpackInt ( &p );

				DWORD uMask = sphGetDword(p);
				printf ( "%s (bloom mask: 0x%08x)\n", eType==JSON_OBJECT ? "JSON_OBJECT" : "JSON_ROOT", uMask );
				p += 4; // skip bloom table
				for ( ;; )
				{
					ESphJsonType eInnerType = (ESphJsonType) *p++;
					if ( eInnerType==JSON_EOF )
						break;
					const int iStrLen = sphJsonUnpackInt ( &p );
					CSphString sVal;
					sVal.SetBinary ( (const char*)p, iStrLen );
					DebugIndent ( iLevel+1 );
					printf ( "\"%s\"", sVal.cstr() );
					p += iStrLen;
					DebugDump ( eInnerType, &p, iLevel+1 );
				}
				break;
			}

		case JSON_MIXED_VECTOR:
			{
				int iTotalLen = sphJsonUnpackInt ( &p );
				int iLen = sphJsonUnpackInt ( &p );
				printf ( "JSON_MIXED_VECTOR [%d] (%d bytes)\n", iLen, iTotalLen );
				for ( int i=0; i<iLen; i++ )
				{
					ESphJsonType eInnerType = (ESphJsonType)*p++;
					DebugDump ( eInnerType, &p, iLevel+1 );
				}
				break;
			}

		// optimized arrays ( note they can't be empty )
		case JSON_STRING_VECTOR:
			{
				sphJsonUnpackInt ( &p );
				int iLen = sphJsonUnpackInt ( &p );
				printf ( "JSON_STRING_VECTOR (%d) [", iLen );
				for ( int i=0; i<iLen; i++ )
				{
					int iStrLen = sphJsonUnpackInt ( &p );
					CSphString sVal;
					sVal.SetBinary ( (const char*)p, iStrLen );
					printf ( "\"%s\"%s", sVal.cstr(), i<iLen-1 ? "," : "]\n" );
					p += iStrLen;
				}
			break;
			}
		case JSON_INT32_VECTOR:
			{
				int iLen = sphJsonUnpackInt ( &p );
				printf ( "JSON_INT32_VECTOR (%d) [", iLen );
				for ( int i=0; i<iLen; i++ )
					printf ( "%d%s", sphJsonLoadInt ( &p ), i<iLen-1 ? "," : "]\n" );
				break;
			}
		case JSON_INT64_VECTOR:
			{
				int iLen = sphJsonUnpackInt ( &p );
				printf ( "JSON_INT64_VECTOR (%d) [", iLen );
				for ( int i=0; i<iLen; i++ )
					printf ( INT64_FMT"%s", sphJsonLoadBigint ( &p ), i<iLen-1 ? "," : "]\n" );
				break;
			}
		case JSON_DOUBLE_VECTOR:
			{
				int iLen = sphJsonUnpackInt ( &p );
				printf ( "JSON_DOUBLE_VECTOR (%d) [", iLen );
				for ( int i=0; i<iLen; i++ )
					printf ( "%lf%s", sphQW2D ( sphJsonLoadBigint ( &p ) ), i<iLen-1 ? "," : "]\n" );
				break;
			}

		default:
			printf ( "UNKNOWN\n" );
			break;
		}
		*ppData = p;
	}

	void DebugDump ( const BYTE * p )
	{
		CSphVector<BYTE> dOut;
		sphJsonFormat ( dOut, m_dBuffer.Begin() );
		dOut.Add ( '\0' );
		printf ( "sphJsonFormat: %s\n", (char*)dOut.Begin() );

		printf ( "Blob size: %d bytes\n", m_dBuffer.GetLength() );
		ESphJsonType eType = sphJsonFindFirst ( &p );
		DebugDump ( eType, &p, 0 );
		printf ( "\n" );
	}
};

// unused parameter, simply to avoid type clash between all my yylex() functions
#define YY_NO_UNISTD_H 1
#define YYLEX_PARAM pParser->m_pScanner, pParser
#define YY_DECL int yylex ( YYSTYPE * lvalp, void * yyscanner, JsonParser_c * pParser )

#include "llsphinxjson.c"

void yyerror ( JsonParser_c * pParser, const char * sMessage )
{
	yy2lex_unhold ( pParser->m_pScanner );
	pParser->m_sError.SetSprintf ( "%s near '%s'", sMessage, pParser->m_pLastToken );
}

#include "yysphinxjson.c"

bool sphJsonParse ( CSphVector<BYTE> & dData, char * sData, bool bAutoconv, bool bToLowercase, CSphString & sError )
{
	int iLen = strlen ( sData );
	if ( sData[iLen+1]!=0 )
	{
		sError = "internal error: input data passed to sphJsonParse() must be terminated with a double zero";
		return false;
	}

	JsonParser_c tParser ( dData, bAutoconv, bToLowercase, sError );
	yy2lex_init ( &tParser.m_pScanner );

	tParser.m_pBuf = sData; // sphJsonParse() is intentionally destructive, no need to copy data here

	YY_BUFFER_STATE tLexerBuffer = yy2_scan_buffer ( sData, iLen+2, tParser.m_pScanner );
	if ( !tLexerBuffer )
	{
		sError = "internal error: yy_scan_buffer() failed";
		return false;
	}

	int iRes = yyparse ( &tParser );
	yy2_delete_buffer ( tLexerBuffer, tParser.m_pScanner );
	yy2lex_destroy ( tParser.m_pScanner );

	tParser.Finalize();

	if ( iRes!=0 )
		dData.Reset();

	return iRes==0;
}

//////////////////////////////////////////////////////////////////////////

DWORD sphJsonKeyMask ( const char * sKey, int iLen )
{
	DWORD uCrc = sphCRC32 ( sKey, iLen );
	return
		( 1UL<<( uCrc & 31 ) ) +
		( 1UL<<( ( uCrc>>8 ) & 31 ) );
}


// returns -1 if size is unreachable (for remote agents)
int sphJsonNodeSize ( ESphJsonType eType, const BYTE *pData )
{
	int iLen;
	const BYTE * p = pData;
	switch ( eType )
	{
	case JSON_INT32:
		return 4;
	case JSON_INT64:
	case JSON_DOUBLE:
		return 8;
	case JSON_INT32_VECTOR:
		if ( !p )
			return -1;
		iLen = sphJsonUnpackInt ( &p );
		return p - pData + iLen * 4;
	case JSON_INT64_VECTOR:
	case JSON_DOUBLE_VECTOR:
		if ( !p )
			return -1;
		iLen = sphJsonUnpackInt ( &p );
		return p - pData + iLen * 8;
	case JSON_STRING:
	case JSON_STRING_VECTOR:
	case JSON_MIXED_VECTOR:
	case JSON_OBJECT:
		if ( !p )
			return -1;
		iLen = sphJsonUnpackInt ( &p );
		return p - pData + iLen;
	case JSON_ROOT:
		if ( !p )
			return -1;
		p += 4; // skip filter
		for ( ;; )
		{
			ESphJsonType eNode = (ESphJsonType) *p++;
			if ( eNode==JSON_EOF )
				break;
			// skip key and node
			iLen = sphJsonUnpackInt ( &p );
			p += iLen;
			sphJsonSkipNode ( eNode, &p );
		}
		return p - pData;
	default:
		return 0;
	}
}


void sphJsonSkipNode ( ESphJsonType eType, const BYTE ** ppData )
{
	int iSize = sphJsonNodeSize ( eType, *ppData );
	*ppData += iSize;
}


int sphJsonFieldLength ( ESphJsonType eType, const BYTE * pData )
{
	const BYTE * p = pData;
	int iCount = 0;
	switch ( eType )
	{
	case JSON_INT32:
	case JSON_INT64:
	case JSON_DOUBLE:
		return 1;
	case JSON_STRING_VECTOR:
	case JSON_MIXED_VECTOR:
		sphJsonUnpackInt ( &p );
		return sphJsonUnpackInt ( &p );
	case JSON_INT32_VECTOR:
	case JSON_INT64_VECTOR:
	case JSON_DOUBLE_VECTOR:
		return sphJsonUnpackInt ( &p );
	case JSON_OBJECT:
	case JSON_ROOT:
		if ( eType==JSON_OBJECT )
			sphJsonUnpackInt ( &p ); // skip size
		p += 4; // skip filter
		for ( ;; )
		{
			ESphJsonType eNode = (ESphJsonType) *p++;
			if ( eNode==JSON_EOF )
				break;
			int iLen = sphJsonUnpackInt ( &p );
			p += iLen;
			sphJsonSkipNode ( eNode, &p );
			iCount++;
		}
		return iCount;
	default:
		return 0;
	}
}


ESphJsonType sphJsonFindFirst ( const BYTE ** ppData )
{
	// non-zero bloom mask? that is JSON_ROOT (basically a JSON_OBJECT without node header)
	if ( sphGetDword(*ppData) )
		return JSON_ROOT;

	// zero mask? must be followed by the type byte (typically JSON_EOF)
	ESphJsonType eType = (ESphJsonType)((*ppData)[4]);
	*ppData += 5;
	return eType;
}


ESphJsonType sphJsonFindByKey ( ESphJsonType eType, const BYTE ** ppValue, const void * pKey, int iLen, DWORD uMask )
{
	if ( eType!=JSON_OBJECT && eType!=JSON_ROOT )
		return JSON_EOF;

	const BYTE * p = *ppValue;
	if ( eType==JSON_OBJECT )
		sphJsonUnpackInt ( &p );

	if ( ( sphGetDword(p) & uMask )!=uMask )
		return JSON_EOF;

	p += 4;
	for ( ;; )
	{
		eType = (ESphJsonType) *p++;
		if ( eType==JSON_EOF )
			break;
		int iStrLen = sphJsonUnpackInt ( &p );
		p += iStrLen;
		if ( iStrLen==iLen && !memcmp ( p-iStrLen, pKey, iStrLen ) )
		{
			*ppValue = p;
			return eType;
		}
		sphJsonSkipNode ( eType, &p );
	}

	return JSON_EOF;
}


ESphJsonType sphJsonFindByIndex ( ESphJsonType eType, const BYTE ** ppValue, int iIndex )
{
	if ( iIndex<0 )
		return JSON_EOF;

	const BYTE * p = *ppValue;
	switch ( eType )
	{
	case JSON_INT32_VECTOR:
	case JSON_INT64_VECTOR:
	case JSON_DOUBLE_VECTOR:
		{
			int iLen = sphJsonUnpackInt ( &p );
			if ( iIndex>=iLen )
				return JSON_EOF;
			p += iIndex * ( eType==JSON_INT32_VECTOR ? 4 : 8 );
			*ppValue = p;
			return eType==JSON_INT32_VECTOR ? JSON_INT32
				: eType==JSON_INT64_VECTOR ? JSON_INT64
				: JSON_DOUBLE;
		}
	case JSON_STRING_VECTOR:
		{
			sphJsonUnpackInt ( &p );
			int iLen = sphJsonUnpackInt ( &p );
			if ( iIndex>=iLen )
				return JSON_EOF;
			for ( int i=0; i<iIndex; i++ )
			{
				int iStrLen = sphJsonUnpackInt ( &p );
				p += iStrLen;
			}
			*ppValue = p;
			return JSON_STRING;
		}
	case JSON_MIXED_VECTOR:
		{
			sphJsonUnpackInt ( &p );
			int iLen = sphJsonUnpackInt ( &p );
			if ( iIndex>=iLen )
				return JSON_EOF;
			for ( int i=0; i<iIndex; i++ )
			{
				eType = (ESphJsonType)*p++;
				sphJsonSkipNode ( eType, &p );
			}
			eType = (ESphJsonType)*p;
			*ppValue = p+1;
			return eType;
		}
	default:
		return JSON_EOF;
		break;
	}
}

//////////////////////////////////////////////////////////////////////////

static const BYTE * JsonFormatStr ( CSphVector<BYTE> & dOut, const BYTE * p, bool bQuote=true )
{
	int iLen = sphJsonUnpackInt ( &p );
	dOut.Reserve ( dOut.GetLength()+iLen );
	if ( bQuote )
		dOut.Add ( '"' );
	while ( iLen-- )
	{
		if ( bQuote )
		{
			switch ( *p )
			{
				case '\b': dOut.Add('\\'); dOut.Add('b'); break;
				case '\n': dOut.Add('\\'); dOut.Add('n'); break;
				case '\r': dOut.Add('\\'); dOut.Add('r'); break;
				case '\t': dOut.Add('\\'); dOut.Add('t'); break;
				case '\f': dOut.Add('\\'); dOut.Add('f'); break; // formfeed (rfc 4627)
				default:
					if ( *p == '"' || *p=='\\' || *p=='/' )
						dOut.Add ( '\\' );
					dOut.Add ( *p );
			}
		} else
			dOut.Add ( *p );
		p++;
	}
	if ( bQuote )
		dOut.Add ( '"' );
	return p;
}


void JsonAddStr ( CSphVector<BYTE> & dOut, const char * pStr )
{
	while ( *pStr )
		dOut.Add ( *pStr++ );
}


void sphJsonFormat ( CSphVector<BYTE> & dOut, const BYTE * pData )
{
	if ( !pData )
		return;
	ESphJsonType eType = sphJsonFindFirst ( &pData );

	// check for the empty root
	if ( eType==JSON_EOF )
	{
		JsonAddStr ( dOut, "{}" );
		return;
	}

	sphJsonFieldFormat ( dOut, pData, eType );
}


const BYTE * sphJsonFieldFormat ( CSphVector<BYTE> & dOut, const BYTE * pData, ESphJsonType eType, bool bQuoteString )
{
	const BYTE * p = pData;

	// format value
	switch ( eType )
	{
		case JSON_INT32:
		{
			int iOff = dOut.GetLength();
			dOut.Resize ( iOff+32 );
			int iLen = snprintf ( (char *)dOut.Begin()+iOff, 32, "%d", sphJsonLoadInt ( &p ) ); // NOLINT
			dOut.Resize ( iOff+iLen );
			break;
		}
		case JSON_INT64:
		{
			int iOff = dOut.GetLength();
			dOut.Resize ( iOff+32 );
			int iLen = snprintf ( (char *)dOut.Begin()+iOff, 32, INT64_FMT, sphJsonLoadBigint ( &p ) ); // NOLINT
			dOut.Resize ( iOff+iLen );
			break;
		}
		case JSON_DOUBLE:
		{
			int iOff = dOut.GetLength();
			dOut.Resize ( iOff+32 );
			int iLen = snprintf ( (char *)dOut.Begin()+iOff, 32, "%lf", sphQW2D ( sphJsonLoadBigint ( &p ) ) ); // NOLINT
			dOut.Resize ( iOff+iLen );
			break;
		}
		case JSON_STRING:
			p = JsonFormatStr ( dOut, p, bQuoteString );
			break;
		case JSON_STRING_VECTOR:
		{
			int iLen = sphJsonUnpackInt ( &p );
			dOut.Reserve ( dOut.GetLength()+iLen );
			int iVals = sphJsonUnpackInt ( &p );
			dOut.Add ( '[' );
			for ( int i=0; i<iVals; i++ )
			{
				if ( i>0 )
					dOut.Add ( ',' );
				p = JsonFormatStr ( dOut, p );
			}
			dOut.Add ( ']' );
			break;
		}
		case JSON_INT32_VECTOR:
		case JSON_INT64_VECTOR:
		case JSON_DOUBLE_VECTOR:
		{
			int iVals = sphJsonUnpackInt ( &p );
			dOut.Add ( '[' );
			for ( int i=0; i<iVals; i++ )
			{
				if ( i>0 )
					dOut.Add ( ',' );
				int iOff = dOut.GetLength();
				dOut.Resize ( iOff+32 );
				int iLen = 0;
				char * b = (char *)dOut.Begin()+iOff;
				switch ( eType )
				{
				case JSON_INT32_VECTOR: iLen = snprintf ( b, 32, "%d", sphJsonLoadInt ( &p ) ); break; // NOLINT
				case JSON_INT64_VECTOR: iLen = snprintf ( b, 32, INT64_FMT, sphJsonLoadBigint ( &p ) ); break; // NOLINT
				case JSON_DOUBLE_VECTOR: iLen = snprintf ( b, 32, "%lf", sphQW2D ( sphJsonLoadBigint ( &p ) ) ); break; // NOLINT
				default:
					break;
				}
				dOut.Resize ( iOff+iLen );
			}
			dOut.Add ( ']' );
			break;
		}
		case JSON_MIXED_VECTOR:
			{
				sphJsonUnpackInt ( &p );
				int iVals = sphJsonUnpackInt ( &p );
				dOut.Add ( '[' );
				for ( int i=0; i<iVals; i++ )
				{
					if ( i>0 )
						dOut.Add ( ',' );
					ESphJsonType eNode = (ESphJsonType) *p++;
					p = sphJsonFieldFormat ( dOut, p, eNode, true );
				}
				dOut.Add ( ']' );
				break;
			}
		case JSON_ROOT:
		case JSON_OBJECT:
			{
				if ( eType==JSON_OBJECT )
					sphJsonUnpackInt ( &p );
				p += 4; // skip bloom table
				dOut.Add ( '{' );
				for ( int i=0;;i++ )
				{
					ESphJsonType eNode = (ESphJsonType) *p++;
					if ( eNode==JSON_EOF )
						break;
					if ( i>0 )
						dOut.Add ( ',' );
					p = JsonFormatStr ( dOut, p );
					dOut.Add ( ':' );
					p = sphJsonFieldFormat ( dOut, p, eNode, true );
				}
				dOut.Add ( '}' );
				break;
			}
		case JSON_TRUE:		JsonAddStr ( dOut, bQuoteString ? "true" : "1" ); break;
		case JSON_FALSE:	JsonAddStr ( dOut, bQuoteString ? "false" : "0" ); break;
		case JSON_NULL:		JsonAddStr ( dOut, bQuoteString ? "null" : "" ); break;
		case JSON_EOF:		break;
		case JSON_TOTAL:	break;
	}

	return p;
}


bool sphJsonNameSplit ( const char * sName, CSphString * sColumn, CSphString * sKey )
{
	if ( !sName )
		return false;

	// find either '[' or '.', what comes first
	const char * pSep = sName;
	while ( *pSep && *pSep!='.' && *pSep!='[' )
	{
		// check for invalid characters
		if ( !sphIsAttr( *pSep ) && *pSep!=' ' )
			return false;
		pSep++;
	}

	if ( !*pSep )
		return false;

	int iSep = pSep - sName;
	if ( sColumn )
	{
		sColumn->SetBinary ( sName, iSep );
		sColumn->Trim();
	}

	if ( sKey )
		*sKey = sName + iSep + ( *pSep=='.' ? 1 : 0 );

	return true;
}


JsonKey_t::JsonKey_t ()
	: m_uMask ( 0 )
	, m_iLen ( 0 )
{}


JsonKey_t::JsonKey_t ( const char * sKey, int iLen )
{
	m_iLen = iLen;
	m_uMask = sphJsonKeyMask ( sKey, m_iLen );
	m_sKey.SetBinary ( sKey, m_iLen );
}


void JsonStoreInt ( BYTE * p, int v )
{
	*p++ = BYTE(DWORD(v));
	*p++ = BYTE(DWORD(v) >> 8);
	*p++ = BYTE(DWORD(v) >> 16);
	*p++ = BYTE(DWORD(v) >> 24);
}


void JsonStoreBigint ( BYTE * p, int64_t v )
{
	JsonStoreInt ( p, (DWORD)( v & 0xffffffffUL ) );
	JsonStoreInt ( p+4, (int)( v>>32 ) );
}


bool sphJsonInplaceUpdate ( ESphJsonType eValueType, int64_t iValue, ISphExpr * pExpr, BYTE * pStrings, const CSphRowitem * pRow, bool bUpdate )
{
	if ( !pExpr || !pStrings )
		return false;

	pExpr->Command ( SPH_EXPR_SET_STRING_POOL, (void*)pStrings );

	CSphMatch tMatch;
	tMatch.m_pStatic = pRow;

	uint64_t uPacked = pExpr->Int64Eval ( tMatch );
	BYTE * pData = pStrings + ( uPacked & 0xffffffff );
	ESphJsonType eType = (ESphJsonType)( uPacked >> 32 );

	switch ( eType )
	{
	case JSON_INT32:
		if ( eValueType==JSON_DOUBLE )
			iValue = (int64_t)sphQW2D ( iValue );
		if ( int64_t(int(iValue))!=iValue )
			return false;
		if ( bUpdate )
			JsonStoreInt ( pData, (int)iValue );
		break;
	case JSON_INT64:
		if ( bUpdate )
			JsonStoreBigint ( pData, eValueType==JSON_DOUBLE ? (int64_t)sphQW2D ( iValue ) : iValue );
		break;
	case JSON_DOUBLE:
		if ( bUpdate )
			JsonStoreBigint ( pData, eValueType==JSON_DOUBLE ? iValue : sphD2QW ( (double)iValue ) );
		break;
	default:
		return false;
	}
	return true;
}


bool sphJsonStringToNumber ( const char * s, int iLen, ESphJsonType & eType, int64_t & iVal, double & fVal )
{
	// skip whitespace
	while ( iLen>0 && ( *s==' ' || *s=='\n' || *s=='\r' || *s=='\t' || *s=='\f' ) )
		s++, iLen--;

	if ( iLen<=0 )
		return false;

	// check whether the string looks like a numeric
	const char * p = s;
	const char * pEnd = p+iLen-1;
	bool bNumeric = ( *p=='-' || *p=='.' || ( *p>='0' && *p<='9' ) );
	bool bDot = ( *p=='.' );
	bool bExp = false;
	bool bExpSign = false;
	while ( bNumeric && p<pEnd )
	{
		p++;
		switch ( *p )
		{
		case '.':
			if ( bDot )
				bNumeric = false;
			bDot = true;
			break;
		case 'e':
		case 'E':
			if ( bExp )
				bNumeric = false;
			bExp = true;
			break;
		case '-':
		case '+':
			if ( !bExp || bExpSign )
				bNumeric = false;
			bExpSign = true;
			break;
		default:
			if ( *p<'0' || *p >'9' )
				bNumeric = false;
		}
	}

	// convert string to number
	if ( bNumeric && iLen<32 )
	{
		char sVal[32];
		memcpy ( sVal, s, iLen );
		sVal[iLen] = '\0';
		char * pCur;

		// setting errno to zero is necessary because strtod/strtoll do not indicate
		// whether it was an overflow or a valid input for borderline values
		errno = 0;

		if ( bDot || bExp )
		{
			double fRes = strtod ( sVal, &pCur );
			if ( pCur==sVal+iLen && errno!=ERANGE )
			{
				eType = JSON_DOUBLE;
				fVal = fRes;
				return true;
			}

		} else
		{
			int64_t iRes = strtoll ( sVal, &pCur, 10 );
			if ( pCur==sVal+iLen && errno!=ERANGE )
			{
				eType = JSON_INT64;
				iVal = iRes;
				return true;
			}
		}
	}

	return false;
}

//
// $Id$
//