PageRenderTime 148ms CodeModel.GetById 28ms RepoModel.GetById 6ms app.codeStats 1ms

/ThirdParty/xmlParser/xmlParser.cpp

https://bitbucket.org/Alexab/rdk
C++ | 3016 lines | 2406 code | 234 blank | 376 comment | 495 complexity | 21cf8a1a54b345101676ce3b649769aa MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /**
  2. ****************************************************************************
  3. * <P> XML.c - implementation file for basic XML parser written in ANSI C++
  4. * for portability. It works by using recursion and a node tree for breaking
  5. * down the elements of an XML document. </P>
  6. *
  7. * @version V2.43
  8. * @author Frank Vanden Berghen
  9. *
  10. * NOTE:
  11. *
  12. * If you add "#define STRICT_PARSING", on the first line of this file
  13. * the parser will see the following XML-stream:
  14. * <a><b>some text</b><b>other text </a>
  15. * as an error. Otherwise, this tring will be equivalent to:
  16. * <a><b>some text</b><b>other text</b></a>
  17. *
  18. * NOTE:
  19. *
  20. * If you add "#define APPROXIMATE_PARSING" on the first line of this file
  21. * the parser will see the following XML-stream:
  22. * <data name="n1">
  23. * <data name="n2">
  24. * <data name="n3" />
  25. * as equivalent to the following XML-stream:
  26. * <data name="n1" />
  27. * <data name="n2" />
  28. * <data name="n3" />
  29. * This can be useful for badly-formed XML-streams but prevent the use
  30. * of the following XML-stream (problem is: tags at contiguous levels
  31. * have the same names):
  32. * <data name="n1">
  33. * <data name="n2">
  34. * <data name="n3" />
  35. * </data>
  36. * </data>
  37. *
  38. * NOTE:
  39. *
  40. * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file
  41. * the "openFileHelper" function will always display error messages inside the
  42. * console instead of inside a message-box-window. Message-box-windows are
  43. * available on windows 9x/NT/2000/XP/Vista only.
  44. *
  45. * Copyright (c) 2002, Business-Insight
  46. * <a href="http://www.Business-Insight.com">Business-Insight</a>
  47. * All rights reserved.
  48. * See the file "AFPL-license.txt" about the licensing terms
  49. *
  50. ****************************************************************************
  51. */
  52. #define _XMLPARSER_NO_MESSAGEBOX_
  53. #ifndef _CRT_SECURE_NO_DEPRECATE
  54. #define _CRT_SECURE_NO_DEPRECATE
  55. #endif
  56. #include "xmlParser.h"
  57. #ifdef _XMLWINDOWS
  58. //#ifdef _DEBUG
  59. //#define _CRTDBG_MAP_ALLOC
  60. //#include <crtdbg.h>
  61. //#endif
  62. #define WIN32_LEAN_AND_MEAN
  63. #include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files
  64. // to have "MessageBoxA" to display error messages for openFilHelper
  65. #endif
  66. //#include <memory.h>
  67. #include <assert.h>
  68. #include <stdio.h>
  69. #include <string.h>
  70. #include <stdlib.h>
  71. XMLCSTR XMLNode::getVersion() { return _CXML("v2.43"); }
  72. void freeXMLString(XMLSTR t){if(t)free(t);}
  73. RDK_XML_DEBUG_STATIC char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1;
  74. inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }
  75. // You can modify the initialization of the variable "XMLClearTags" below
  76. // to change the clearTags that are currently recognized by the library.
  77. // The number on the second columns is the length of the string inside the
  78. // first column.
  79. // The "<!DOCTYPE" declaration must be the second in the list.
  80. // The "<!--" declaration must be the third in the list.
  81. // All ClearTag Strings must start with the '<' character.
  82. typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag;
  83. RDK_XML_DEBUG_STATIC ALLXMLClearTag XMLClearTags[] =
  84. {
  85. { _CXML("<![CDATA["),9, _CXML("]]>") },
  86. { _CXML("<!DOCTYPE"),9, _CXML(">") },
  87. { _CXML("<!--") ,4, _CXML("-->") },
  88. { _CXML("<PRE>") ,5, _CXML("</PRE>") },
  89. // { _CXML("<Script>") ,8, _CXML("</Script>")},
  90. { NULL ,0, NULL }
  91. };
  92. // You can modify the initialization of the variable "XMLEntities" below
  93. // to change the character entities that are currently recognized by the library.
  94. // The number on the second columns is the length of the string inside the
  95. // first column. Additionally, the syntaxes "&#xA0;" and "&#160;" are recognized.
  96. typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;
  97. const RDK_XML_DEBUG_STATIC XMLCharacterEntity XMLEntities[] =
  98. {
  99. { _CXML("&amp;" ), 5, _CXML('&' )},
  100. { _CXML("&lt;" ), 4, _CXML('<' )},
  101. { _CXML("&gt;" ), 4, _CXML('>' )},
  102. { _CXML("&quot;"), 6, _CXML('\"')},
  103. { _CXML("&apos;"), 6, _CXML('\'')},
  104. { NULL , 0, '\0' }
  105. };
  106. // When rendering the XMLNode to a string (using the "createXMLString" function),
  107. // you can ask for a beautiful formatting. This formatting is using the
  108. // following indentation character:
  109. #define INDENTCHAR _CXML('\t')
  110. // The following function parses the XML errors into a user friendly string.
  111. // You can edit this to change the output language of the library to something else.
  112. XMLCSTR XMLNode::getError(XMLError xerror)
  113. {
  114. switch (xerror)
  115. {
  116. case eXMLErrorNone: return _CXML("No error");
  117. case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag");
  118. case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found");
  119. case eXMLErrorEmpty: return _CXML("Error: No XML data");
  120. case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name");
  121. case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name");
  122. case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag");
  123. case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end");
  124. case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found");
  125. case eXMLErrorNoElements: return _CXML("Error: No elements found");
  126. case eXMLErrorFileNotFound: return _CXML("Error: File not found");
  127. case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found");
  128. case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity");
  129. case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode.");
  130. case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars");
  131. case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing");
  132. case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file");
  133. case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4");
  134. case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated");
  135. case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character");
  136. case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small");
  137. };
  138. return _CXML("Unknown");
  139. }
  140. /////////////////////////////////////////////////////////////////////////
  141. // Here start the abstraction layer to be OS-independent //
  142. /////////////////////////////////////////////////////////////////////////
  143. // Here is an abstraction layer to access some common string manipulation functions.
  144. // The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0,
  145. // Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++.
  146. // If you plan to "port" the library to a new system/compiler, all you have to do is
  147. // to edit the following lines.
  148. #ifdef XML_NO_WIDE_CHAR
  149. char myIsTextWideChar(const void *b, int len) { return FALSE; }
  150. #else
  151. #if 1//defined (UNDER_CE) || !defined(_XMLWINDOWS)
  152. char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode
  153. {
  154. #ifdef sun
  155. // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer.
  156. if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE;
  157. #endif
  158. const wchar_t *s=(const wchar_t*)b;
  159. // buffer too small:
  160. if (len<(int)sizeof(wchar_t)) return FALSE;
  161. // odd length test
  162. if (len&1) return FALSE;
  163. /* only checks the first 256 characters */
  164. len=mmin(256,len/sizeof(wchar_t));
  165. // Check for the special byte order:
  166. if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
  167. if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE
  168. // checks for ASCII characters in the UNICODE stream
  169. int i,stats=0;
  170. for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
  171. if (stats>len/2) return TRUE;
  172. // Check for UNICODE NULL chars
  173. for (i=0; i<len; i++) if (!s[i]) return TRUE;
  174. return FALSE;
  175. }
  176. #else
  177. char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); }
  178. #endif
  179. #endif
  180. #ifdef _XMLWINDOWS
  181. // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
  182. #ifdef _XMLWIDECHAR
  183. wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
  184. {
  185. int i;
  186. if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0);
  187. else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0);
  188. if (i<0) return NULL;
  189. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR));
  190. if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i);
  191. else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i);
  192. d[i]=0;
  193. return d;
  194. }
  195. RDK_XML_DEBUG_STATIC inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); }
  196. RDK_XML_DEBUG_STATIC inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); }
  197. RDK_XML_DEBUG_STATIC inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);}
  198. RDK_XML_DEBUG_STATIC inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
  199. RDK_XML_DEBUG_STATIC inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); }
  200. RDK_XML_DEBUG_STATIC inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
  201. RDK_XML_DEBUG_STATIC inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
  202. #else
  203. char *myWideCharToMultiByte(const wchar_t *s, XMLNode::XMLCharEncoding &characterEncoding)
  204. {
  205. UINT codePage=CP_ACP; if (characterEncoding==XMLNode::char_encoding_UTF8) codePage=CP_UTF8;
  206. int i=(int)WideCharToMultiByte(codePage, // code page
  207. 0, // performance and mapping flags
  208. s, // wide-character string
  209. -1, // number of chars in string
  210. NULL, // buffer for new string
  211. 0, // size of buffer
  212. NULL, // default for unmappable chars
  213. NULL // set when default char used
  214. );
  215. if (i<0) return NULL;
  216. char *d=(char*)malloc(i+1);
  217. WideCharToMultiByte(codePage, // code page
  218. 0, // performance and mapping flags
  219. s, // wide-character string
  220. -1, // number of chars in string
  221. d, // buffer for new string
  222. i, // size of buffer
  223. NULL, // default for unmappable chars
  224. NULL // set when default char used
  225. );
  226. d[i]=0;
  227. return d;
  228. }
  229. RDK_XML_DEBUG_STATIC inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
  230. RDK_XML_DEBUG_STATIC inline int xstrlen(XMLCSTR c) { return (int)strlen(c); }
  231. #ifdef __BORLANDC__
  232. RDK_XML_DEBUG_STATIC inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strnicmp(c1,c2,l);}
  233. RDK_XML_DEBUG_STATIC inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return stricmp(c1,c2); }
  234. #else
  235. RDK_XML_DEBUG_STATIC inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);}
  236. RDK_XML_DEBUG_STATIC inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); }
  237. #endif
  238. RDK_XML_DEBUG_STATIC inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
  239. RDK_XML_DEBUG_STATIC inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
  240. RDK_XML_DEBUG_STATIC inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
  241. #endif
  242. #else
  243. // for gcc and CC
  244. #ifdef XML_NO_WIDE_CHAR
  245. char *myWideCharToMultiByte(const wchar_t *s) { return NULL; }
  246. #else
  247. char *myWideCharToMultiByte(const wchar_t *s)
  248. {
  249. const wchar_t *ss=s;
  250. int i=(int)wcsrtombs(NULL,&ss,0,NULL);
  251. if (i<0) return NULL;
  252. char *d=(char *)malloc(i+1);
  253. wcsrtombs(d,&s,i,NULL);
  254. d[i]=0;
  255. return d;
  256. }
  257. #endif
  258. #ifdef _XMLWIDECHAR
  259. wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
  260. {
  261. const char *ss=s;
  262. int i=(int)mbsrtowcs(NULL,&ss,0,NULL);
  263. if (i<0) return NULL;
  264. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t));
  265. mbsrtowcs(d,&s,i,NULL);
  266. d[i]=0;
  267. return d;
  268. }
  269. int xstrlen(XMLCSTR c) { return wcslen(c); }
  270. #ifdef sun
  271. // for CC
  272. #include <widec.h>
  273. RDK_XML_DEBUG_STATIC inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);}
  274. RDK_XML_DEBUG_STATIC inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);}
  275. RDK_XML_DEBUG_STATIC inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); }
  276. #else
  277. RDK_XML_DEBUG_STATIC inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
  278. #ifdef __linux__
  279. // for gcc/linux
  280. RDK_XML_DEBUG_STATIC inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);}
  281. RDK_XML_DEBUG_STATIC inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); }
  282. #else
  283. #include <wctype.h>
  284. // for gcc/non-linux (MacOS X 10.3, FreeBSD 6.0, NetBSD 3.0, OpenBSD 3.8, AIX 4.3.2, HP-UX 11, IRIX 6.5, OSF/1 5.1, Cygwin, mingw)
  285. RDK_XML_DEBUG_STATIC inline int xstricmp(XMLCSTR c1, XMLCSTR c2)
  286. {
  287. wchar_t left,right;
  288. do
  289. {
  290. left=towlower(*c1++); right=towlower(*c2++);
  291. } while (left&&(left==right));
  292. return (int)left-(int)right;
  293. }
  294. RDK_XML_DEBUG_STATIC inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l)
  295. {
  296. wchar_t left,right;
  297. while(l--)
  298. {
  299. left=towlower(*c1++); right=towlower(*c2++);
  300. if ((!left)||(left!=right)) return (int)left-(int)right;
  301. }
  302. return 0;
  303. }
  304. #endif
  305. #endif
  306. RDK_XML_DEBUG_STATIC inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
  307. RDK_XML_DEBUG_STATIC inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
  308. RDK_XML_DEBUG_STATIC inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode)
  309. {
  310. char *filenameAscii=myWideCharToMultiByte(filename);
  311. FILE *f;
  312. if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb");
  313. else f=fopen(filenameAscii,"wb");
  314. free(filenameAscii);
  315. return f;
  316. }
  317. #else
  318. RDK_XML_DEBUG_STATIC inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
  319. RDK_XML_DEBUG_STATIC inline int xstrlen(XMLCSTR c) { return strlen(c); }
  320. RDK_XML_DEBUG_STATIC inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}//{ return strncasecmp(c1,c2,l);}
  321. RDK_XML_DEBUG_STATIC inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
  322. RDK_XML_DEBUG_STATIC inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcmp(c1,c2); }//{ return strcasecmp(c1,c2); }
  323. RDK_XML_DEBUG_STATIC inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
  324. RDK_XML_DEBUG_STATIC inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
  325. #endif
  326. RDK_XML_DEBUG_STATIC inline int _strnicmp(const char *c1,const char *c2, int l) { return strncmp(c1,c2,l);}//{ return strncasecmp(c1,c2,l);}
  327. #endif
  328. ///////////////////////////////////////////////////////////////////////////////
  329. // the "xmltoc,xmltob,xmltoi,xmltol,xmltof,xmltoa" functions //
  330. ///////////////////////////////////////////////////////////////////////////////
  331. // These 6 functions are not used inside the XMLparser.
  332. // There are only here as "convenience" functions for the user.
  333. // If you don't need them, you can delete them without any trouble.
  334. #ifdef _XMLWIDECHAR
  335. #ifdef _XMLWINDOWS
  336. // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
  337. char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)_wtoi(t); return v; }
  338. int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; }
  339. long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; }
  340. double xmltof(XMLCSTR t,double v){ if (t&&(*t)) swscanf(t, L"%lf", &v); /*v=_wtof(t);*/ return v; }
  341. #else
  342. #ifdef sun
  343. // for CC
  344. #include <widec.h>
  345. char xmltob(XMLCSTR t,char v){ if (t) return (char)wstol(t,NULL,10); return v; }
  346. int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; }
  347. long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; }
  348. #else
  349. // for gcc
  350. char xmltob(XMLCSTR t,char v){ if (t) return (char)wcstol(t,NULL,10); return v; }
  351. int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; }
  352. long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; }
  353. #endif
  354. double xmltof(XMLCSTR t,double v){ if (t&&(*t)) swscanf(t, L"%lf", &v); /*v=_wtof(t);*/ return v; }
  355. #endif
  356. #else
  357. char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; }
  358. int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; }
  359. long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; }
  360. double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; }
  361. #endif
  362. XMLCSTR xmltoa(XMLCSTR t, XMLCSTR v){ if (t) return t; return v; }
  363. XMLCHAR xmltoc(XMLCSTR t,const XMLCHAR v){ if (t&&(*t)) return *t; return v; }
  364. /////////////////////////////////////////////////////////////////////////
  365. // the "openFileHelper" function //
  366. /////////////////////////////////////////////////////////////////////////
  367. // Since each application has its own way to report and deal with errors, you should modify & rewrite
  368. // the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs.
  369. XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
  370. {
  371. // guess the value of the global parameter "characterEncoding"
  372. // (the guess is based on the first 200 bytes of the file).
  373. FILE *f=xfopen(filename,_CXML("rb"));
  374. if (f)
  375. {
  376. char bb[205];
  377. int l=(int)fread(bb,1,200,f);
  378. setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText);
  379. fclose(f);
  380. }
  381. // parse the file
  382. XMLResults pResults;
  383. XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);
  384. // display error message (if any)
  385. if (pResults.error != eXMLErrorNone)
  386. {
  387. // create message
  388. char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML("");
  389. if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; }
  390. sprintf(message,
  391. #ifdef _XMLWIDECHAR
  392. "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
  393. #else
  394. "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
  395. #endif
  396. ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);
  397. // display message
  398. #if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_)
  399. MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);
  400. #else
  401. printf("%s",message);
  402. #endif
  403. exit(255);
  404. }
  405. return xnode;
  406. }
  407. /////////////////////////////////////////////////////////////////////////
  408. // Here start the core implementation of the XMLParser library //
  409. /////////////////////////////////////////////////////////////////////////
  410. // You should normally not change anything below this point.
  411. #ifndef _XMLWIDECHAR
  412. // If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte.
  413. // If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes).
  414. // If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes).
  415. // This table is used as lookup-table to know the length of a character (in byte) based on the
  416. // content of the first byte of the character.
  417. // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).
  418. RDK_XML_DEBUG_STATIC const char XML_utf8ByteTable[256] =
  419. {
  420. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  421. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  422. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  423. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  424. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  425. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  426. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  427. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  428. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range
  429. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid
  430. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
  431. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
  432. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
  433. 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte
  434. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  435. 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte
  436. 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
  437. };
  438. RDK_XML_DEBUG_STATIC const char XML_legacyByteTable[256] =
  439. {
  440. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  441. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  442. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  443. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  444. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  445. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
  446. };
  447. RDK_XML_DEBUG_STATIC const char XML_sjisByteTable[256] =
  448. {
  449. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  450. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  451. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  452. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  453. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  454. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  455. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  456. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  457. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
  458. 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes
  459. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
  460. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
  461. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
  462. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0
  463. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0
  464. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes
  465. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0
  466. };
  467. RDK_XML_DEBUG_STATIC const char XML_gb2312ByteTable[256] =
  468. {
  469. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  470. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  471. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  472. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  473. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  474. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  475. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  476. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  477. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
  478. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80
  479. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
  480. 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes
  481. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
  482. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
  483. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  484. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
  485. 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0
  486. };
  487. RDK_XML_DEBUG_STATIC const char XML_gbk_big5_ByteTable[256] =
  488. {
  489. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  490. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  491. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  492. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  493. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  494. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  495. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  496. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  497. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
  498. 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes
  499. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
  500. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0
  501. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
  502. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
  503. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  504. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
  505. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0
  506. };
  507. RDK_XML_DEBUG_STATIC const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8"
  508. #endif
  509. XMLNode XMLNode::emptyXMLNode;
  510. //XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};
  511. //XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};
  512. // Enumeration used to decipher what type a token is
  513. typedef enum XMLTokenTypeTag
  514. {
  515. eTokenText = 0,
  516. eTokenQuotedText,
  517. eTokenTagStart, /* "<" */
  518. eTokenTagEnd, /* "</" */
  519. eTokenCloseTag, /* ">" */
  520. eTokenEquals, /* "=" */
  521. eTokenDeclaration, /* "<?" */
  522. eTokenShortHandClose, /* "/>" */
  523. eTokenClear,
  524. eTokenError
  525. } XMLTokenType;
  526. // Main structure used for parsing XML
  527. typedef struct XML
  528. {
  529. XMLCSTR lpXML;
  530. XMLCSTR lpszText;
  531. int nIndex,nIndexMissigEndTag;
  532. enum XMLError error;
  533. XMLCSTR lpEndTag;
  534. int cbEndTag;
  535. XMLCSTR lpNewElement;
  536. int cbNewElement;
  537. int nFirst;
  538. } XML;
  539. typedef struct
  540. {
  541. ALLXMLClearTag *pClr;
  542. XMLCSTR pStr;
  543. } NextToken;
  544. // Enumeration used when parsing attributes
  545. typedef enum Attrib
  546. {
  547. eAttribName = 0,
  548. eAttribEquals,
  549. eAttribValue
  550. } Attrib;
  551. // Enumeration used when parsing elements to dictate whether we are currently
  552. // inside a tag
  553. typedef enum XMLStatus
  554. {
  555. eInsideTag = 0,
  556. eOutsideTag
  557. } XMLStatus;
  558. XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const
  559. {
  560. if (!d) return eXMLErrorNone;
  561. FILE *f=xfopen(filename,_CXML("wb"));
  562. if (!f) return eXMLErrorCannotOpenWriteFile;
  563. #ifdef _XMLWIDECHAR
  564. unsigned char h[2]={ 0xFF, 0xFE };
  565. if (!fwrite(h,2,1,f))
  566. {
  567. fclose(f);
  568. return eXMLErrorCannotWriteFile;
  569. }
  570. if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
  571. {
  572. if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",sizeof(wchar_t)*40,1,f))
  573. {
  574. fclose(f);
  575. return eXMLErrorCannotWriteFile;
  576. }
  577. }
  578. #else
  579. if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
  580. {
  581. if (characterEncoding==char_encoding_UTF8)
  582. {
  583. // header so that windows recognize the file as UTF-8:
  584. unsigned char h[3]={0xEF,0xBB,0xBF};
  585. if (!fwrite(h,3,1,f))
  586. {
  587. fclose(f);
  588. return eXMLErrorCannotWriteFile;
  589. }
  590. encoding="utf-8";
  591. } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS";
  592. if (!encoding) encoding="ISO-8859-1";
  593. if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0)
  594. {
  595. fclose(f);
  596. return eXMLErrorCannotWriteFile;
  597. }
  598. } else
  599. {
  600. if (characterEncoding==char_encoding_UTF8)
  601. {
  602. unsigned char h[3]={0xEF,0xBB,0xBF};
  603. if (!fwrite(h,3,1,f))
  604. {
  605. fclose(f);
  606. return eXMLErrorCannotWriteFile;
  607. }
  608. }
  609. }
  610. #endif
  611. int i;
  612. XMLSTR t=createXMLString(nFormat,&i);
  613. if (!fwrite(t,sizeof(XMLCHAR)*i,1,f))
  614. {
  615. free(t);
  616. fclose(f);
  617. return eXMLErrorCannotWriteFile;
  618. }
  619. if (fclose(f)!=0)
  620. {
  621. free(t);
  622. return eXMLErrorCannotWriteFile;
  623. }
  624. free(t);
  625. return eXMLErrorNone;
  626. }
  627. // Duplicate a given string.
  628. XMLSTR stringDup(XMLCSTR lpszData, int cbData)
  629. {
  630. if (lpszData==NULL) return NULL;
  631. XMLSTR lpszNew;
  632. if (cbData==-1) cbData=(int)xstrlen(lpszData);
  633. lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));
  634. if (lpszNew)
  635. {
  636. memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));
  637. lpszNew[cbData] = (XMLCHAR)NULL;
  638. }
  639. return lpszNew;
  640. }
  641. XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source)
  642. {
  643. XMLSTR dd=dest;
  644. XMLCHAR ch;
  645. const XMLCharacterEntity *entity;
  646. while ((ch=*source))
  647. {
  648. entity=XMLEntities;
  649. do
  650. {
  651. if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }
  652. entity++;
  653. } while(entity->s);
  654. #ifdef _XMLWIDECHAR
  655. *(dest++)=*(source++);
  656. #else
  657. switch(XML_ByteTable[(unsigned char)ch])
  658. {
  659. case 4: *(dest++)=*(source++);
  660. case 3: *(dest++)=*(source++);
  661. case 2: *(dest++)=*(source++);
  662. case 1: *(dest++)=*(source++);
  663. }
  664. #endif
  665. out_of_loop1:
  666. ;
  667. }
  668. *dest=0;
  669. return dd;
  670. }
  671. // private (used while rendering):
  672. int ToXMLStringTool::lengthXMLString(XMLCSTR source)
  673. {
  674. int r=0;
  675. const XMLCharacterEntity *entity;
  676. XMLCHAR ch;
  677. while ((ch=*source))
  678. {
  679. entity=XMLEntities;
  680. do
  681. {
  682. if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }
  683. entity++;
  684. } while(entity->s);
  685. #ifdef _XMLWIDECHAR
  686. r++; source++;
  687. #else
  688. ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;
  689. #endif
  690. out_of_loop1:
  691. ;
  692. }
  693. return r;
  694. }
  695. ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); }
  696. void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }
  697. XMLSTR ToXMLStringTool::toXML(XMLCSTR source)
  698. {
  699. if (!source)
  700. {
  701. if (buflen<1) { buflen=1; buf=(XMLSTR)malloc(sizeof(XMLCHAR)); }
  702. *buf=0;
  703. return buf;
  704. }
  705. int l=lengthXMLString(source)+1;
  706. if (l>buflen) { freeBuffer(); buflen=l; buf=(XMLSTR)malloc(l*sizeof(XMLCHAR)); }
  707. return toXMLUnSafe(buf,source);
  708. }
  709. // private:
  710. XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
  711. {
  712. // This function is the opposite of the function "toXMLString". It decodes the escape
  713. // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
  714. // &,",',<,>. This function is used internally by the XML Parser. All the calls to
  715. // the XML library will always gives you back "decoded" strings.
  716. //
  717. // in: string (s) and length (lo) of string
  718. // out: new allocated string converted from xml
  719. if (!s) return NULL;
  720. int ll=0,j;
  721. XMLSTR d;
  722. XMLCSTR ss=s;
  723. const XMLCharacterEntity *entity;
  724. while ((lo>0)&&(*s))
  725. {
  726. if (*s==_CXML('&'))
  727. {
  728. if ((lo>2)&&(s[1]==_CXML('#')))
  729. {
  730. s+=2; lo-=2;
  731. if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; }
  732. while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++;
  733. if (*s!=_CXML(';'))
  734. {
  735. pXML->error=eXMLErrorUnknownCharacterEntity;
  736. return NULL;
  737. }
  738. s++; lo--;
  739. } else
  740. {
  741. entity=XMLEntities;
  742. do
  743. {
  744. if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }
  745. entity++;
  746. } while(entity->s);
  747. if (!entity->s)
  748. {
  749. pXML->error=eXMLErrorUnknownCharacterEntity;
  750. return NULL;
  751. }
  752. }
  753. } else
  754. {
  755. #ifdef _XMLWIDECHAR
  756. s++; lo--;
  757. #else
  758. j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;
  759. #endif
  760. }
  761. ll++;
  762. }
  763. d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));
  764. s=d;
  765. while (ll-->0)
  766. {
  767. if (*ss==_CXML('&'))
  768. {
  769. if (ss[1]==_CXML('#'))
  770. {
  771. ss+=2; j=0;
  772. if ((*ss==_CXML('X'))||(*ss==_CXML('x')))
  773. {
  774. ss++;
  775. while (*ss!=_CXML(';'))
  776. {
  777. if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0');
  778. else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10;
  779. else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10;
  780. else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
  781. ss++;
  782. }
  783. } else
  784. {
  785. while (*ss!=_CXML(';'))
  786. {
  787. if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0');
  788. else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
  789. ss++;
  790. }
  791. }
  792. #ifndef _XMLWIDECHAR
  793. if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;}
  794. #endif
  795. (*d++)=(XMLCHAR)j; ss++;
  796. } else
  797. {
  798. entity=XMLEntities;
  799. do
  800. {
  801. if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; }
  802. entity++;
  803. } while(entity->s);
  804. }
  805. } else
  806. {
  807. #ifdef _XMLWIDECHAR
  808. *(d++)=*(ss++);
  809. #else
  810. switch(XML_ByteTable[(unsigned char)*ss])
  811. {
  812. case 4: *(d++)=*(ss++); ll--;
  813. case 3: *(d++)=*(ss++); ll--;
  814. case 2: *(d++)=*(ss++); ll--;
  815. case 1: *(d++)=*(ss++);
  816. }
  817. #endif
  818. }
  819. }
  820. *d=0;
  821. return (XMLSTR)s;
  822. }
  823. #define XML_isSPACECHAR(ch) ((ch==_CXML('\n'))||(ch==_CXML(' '))||(ch== _CXML('\t'))||(ch==_CXML('\r')))
  824. // private:
  825. char myTagCompare(XMLCSTR cclose, XMLCSTR copen)
  826. // !!!! WARNING strange convention&:
  827. // return 0 if equals
  828. // return 1 if different
  829. {
  830. if (!cclose) return 1;
  831. int l=(int)xstrlen(cclose);
  832. if (xstrnicmp(cclose, copen, l)!=0) return 1;
  833. const XMLCHAR c=copen[l];
  834. if (XML_isSPACECHAR(c)||
  835. (c==_CXML('/' ))||
  836. (c==_CXML('<' ))||
  837. (c==_CXML('>' ))||
  838. (c==_CXML('=' ))) return 0;
  839. return 1;
  840. }
  841. // Obtain the next character from the string.
  842. RDK_XML_DEBUG_STATIC inline XMLCHAR getNextChar(XML *pXML)
  843. {
  844. XMLCHAR ch = pXML->lpXML[pXML->nIndex];
  845. #ifdef _XMLWIDECHAR
  846. if (ch!=0) pXML->nIndex++;
  847. #else
  848. pXML->nIndex+=XML_ByteTable[(unsigned char)ch];
  849. #endif
  850. return ch;
  851. }
  852. // Find the next token in a string.
  853. // pcbToken contains the number of characters that have been read.
  854. RDK_XML_DEBUG_STATIC NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType)
  855. {
  856. NextToken result;
  857. XMLCHAR ch;
  858. XMLCHAR chTemp;
  859. int indexStart,nFoundMatch,nIsText=FALSE;
  860. result.pClr=NULL; // prevent warning
  861. // Find next non-white space character
  862. do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch);
  863. if (ch)
  864. {
  865. // Cache the current string pointer
  866. result.pStr = &pXML->lpXML[indexStart];
  867. // check for standard tokens
  868. switch(ch)
  869. {
  870. // Check for quotes
  871. case _CXML('\''):
  872. case _CXML('\"'):
  873. // Type of token
  874. *pType = eTokenQuotedText;
  875. chTemp = ch;
  876. // Set the size
  877. nFoundMatch = FALSE;
  878. // Search through the string to find a matching quote
  879. while((ch = getNextChar(pXML)))
  880. {
  881. if (ch==chTemp) { nFoundMatch = TRUE; break; }
  882. if (ch==_CXML('<')) break;
  883. }
  884. // If we failed to find a matching quote
  885. if (nFoundMatch == FALSE)
  886. {
  887. pXML->nIndex=indexStart+1;
  888. nIsText=TRUE;
  889. break;
  890. }
  891. // 4.02.2002
  892. // if (FindNonWhiteSpace(pXML)) pXML->nIndex--;
  893. break;
  894. // Equals (used with attribute values)
  895. case _CXML('='):
  896. *pType = eTokenEquals;
  897. break;
  898. // Close tag
  899. case _CXML('>'):
  900. *pType = eTokenCloseTag;
  901. break;
  902. // Check for tag start and tag end
  903. case _CXML('<'):
  904. {
  905. // First check whether the token is in the clear tag list (meaning it
  906. // does not need formatting).
  907. ALLXMLClearTag *ctag=XMLClearTags;
  908. do
  909. {
  910. if (!xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen))
  911. {
  912. result.pClr=ctag;
  913. pXML->nIndex+=ctag->openTagLen-1;
  914. *pType=eTokenClear;
  915. return result;
  916. }
  917. ctag++;
  918. } while(ctag->lpszOpen);
  919. // Peek at the next character to see if we have an end tag '</',
  920. // or an xml declaration '<?'
  921. chTemp = pXML->lpXML[pXML->nIndex];
  922. // If we have a tag end...
  923. if (chTemp == _CXML('/'))
  924. {
  925. // Set the type and ensure we point at the next character
  926. getNextChar(pXML);
  927. *pType = eTokenTagEnd;
  928. }
  929. // If we have an XML declaration tag
  930. else if (chTemp == _CXML('?'))
  931. {
  932. // Set the type and ensure we point at the next character
  933. getNextChar(pXML);
  934. *pType = eTokenDeclaration;
  935. }
  936. // Otherwise we must have a start tag
  937. else
  938. {
  939. *pType = eTokenTagStart;
  940. }
  941. break;
  942. }
  943. // Check to see if we have a short hand type end tag ('/>').
  944. case _CXML('/'):
  945. // Peek at the next character to see if we have a short end tag '/>'
  946. chTemp = pXML->lpXML[pXML->nIndex];
  947. // If we have a short hand end tag...
  948. if (chTemp == _CXML('>'))
  949. {
  950. // Set the type and ensure we point at the next character
  951. getNextChar(pXML);
  952. *pType = eTokenShortHandClose;
  953. break;
  954. }
  955. // If we haven't found a short hand closing tag then drop into the
  956. // text process
  957. // Other characters
  958. default:
  959. nIsText = TRUE;
  960. }
  961. // If this is a TEXT node
  962. if (nIsText)
  963. {
  964. // Indicate we are dealing with text
  965. *pType = eTokenText;
  966. while((ch = getNextChar(pXML)))
  967. {
  968. if XML_isSPACECHAR(ch)
  969. {
  970. indexStart++; break;
  971. } else if (ch==_CXML('/'))
  972. {
  973. // If we find a slash then this maybe text or a short hand end tag
  974. // Peek at the next character to see it we have short hand end tag
  975. ch=pXML->lpXML[pXML->nIndex];
  976. // If we found a short hand end tag then we need to exit the loop
  977. if (ch==_CXML('>')) { pXML->nIndex--; break; }
  978. } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('=')))
  979. {
  980. pXML->nIndex--; break;
  981. }
  982. }
  983. }
  984. *pcbToken = pXML->nIndex-indexStart;
  985. } else
  986. {
  987. // If we failed to obtain a valid character
  988. *pcbToken = 0;
  989. *pType = eTokenError;
  990. result.pStr=NULL;
  991. }
  992. return result;
  993. }
  994. XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName)
  995. {
  996. if (!d) { free(lpszName); return NULL; }
  997. if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName);
  998. d->lpszName=lpszName;
  999. return lpszName;
  1000. }
  1001. XMLNode::XMLNode(struct XMLNodeDataTag *p)
  1002. {
  1003. characterEncoding=XMLNode::char_encoding_UTF8;
  1004. emptyXMLClear.lpszValue=0;
  1005. emptyXMLClear.lpszOpenTag=0;
  1006. emptyXMLClear.lpszCloseTag=0;
  1007. emptyXMLAttribute.lpszName=0;
  1008. emptyXMLAttribute.lpszValue=0;
  1009. d=p; (p->ref_count)++;
  1010. }
  1011. XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration)
  1012. {
  1013. characterEncoding=XMLNode::char_encoding_UTF8;
  1014. emptyXMLClear.lpszValue=0;
  1015. emptyXMLClear.lpszOpenTag=0;
  1016. emptyXMLClear.lpszCloseTag=0;
  1017. emptyXMLAttribute.lpszName=0;
  1018. emptyXMLAttribute.lpszValue=0;
  1019. d=(XMLNodeData*)malloc(sizeof(XMLNodeData));
  1020. d->ref_count=1;
  1021. d->lpszName=NULL;
  1022. d->nChild= 0;
  1023. d->nText = 0;
  1024. d->nClear = 0;
  1025. d->nAttribute = 0;
  1026. d->isDeclaration = isDeclaration;
  1027. d->pParent = pParent;
  1028. d->pChild= NULL;
  1029. d->pText= NULL;
  1030. d->pClear= NULL;
  1031. d->pAttribute= NULL;
  1032. d->pOrder= NULL;
  1033. updateName_WOSD(lpszName);
  1034. }
  1035. XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); }
  1036. XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); }
  1037. #define MEMORYINCREASE 50
  1038. RDK_XML_DEBUG_STATIC inline void myFree(void *p) { if (p) free(p); }
  1039. RDK_XML_DEBUG_STATIC inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem)
  1040. {
  1041. if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); }
  1042. if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem);
  1043. // if (!p)
  1044. // {
  1045. // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220);
  1046. // }
  1047. return p;
  1048. }
  1049. // private:
  1050. XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype) const
  1051. {
  1052. if (index<0) return -1;
  1053. int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i;
  1054. }
  1055. // private:
  1056. // update "order" information when deleting a content of a XMLNode
  1057. int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
  1058. {
  1059. int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t);
  1060. memmove(o+i, o+i+1, (n-i)*sizeof(int));
  1061. for (;i<n;i++)
  1062. if ((o[i]&3)==(int)t) o[i]-=4;
  1063. // We should normally do:
  1064. // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));
  1065. // but we skip reallocation because it's too time consuming.
  1066. // Anyway, at the end, it will be free'd completely at once.
  1067. return i;
  1068. }
  1069. void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype)
  1070. {
  1071. // in: *_pos is the position inside d->pOrder ("-1" means "EndOf")
  1072. // out: *_pos is the index inside p
  1073. p=myRealloc(p,(nc+1),memoryIncrease,size);
  1074. int n=d->nChild+d->nText+d->nClear;
  1075. d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int));
  1076. int pos=*_pos,*o=d->pOrder;
  1077. if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
  1078. int i=pos;
  1079. memmove(o+i+1, o+i, (n-i)*sizeof(int));
  1080. while ((pos<n)&&((o[pos]&3)!=(int)xtype)) pos++;
  1081. if (pos==n) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
  1082. o[i]=o[pos];
  1083. for (i=pos+1;i<=n;i++) if ((o[i]&3)==(int)xtype) o[i]+=4;
  1084. *_pos=pos=o[pos]>>2;
  1085. memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size);
  1086. return p;
  1087. }
  1088. // Add a child node to the given element.
  1089. XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos)
  1090. {
  1091. if (!lpszName) return emptyXMLNode;
  1092. d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);
  1093. d->pChild[pos].d=NULL;
  1094. d->pChild[pos]=XMLNode(d,lpszName,isDeclaration);
  1095. d->nChild++;
  1096. return d->pChild[pos];
  1097. }
  1098. // Add an attribute to an element.
  1099. XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev)
  1100. {
  1101. if (!lpszName) return &emptyXMLAttribute;
  1102. if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; }
  1103. int nc=d->nAttribute;
  1104. d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute));
  1105. XMLAttribute *pAttr=d->pAttribute+nc;
  1106. pAttr->lpszName = lpszName;
  1107. pAttr->lpszValue = lpszValuev;
  1108. d->nAttribute++;
  1109. return pAttr;
  1110. }
  1111. // Add text to the element.
  1112. XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos)
  1113. {
  1114. if (!lpszValue) return NULL;
  1115. if (!d) { myFree(lpszValue); return NULL; }
  1116. d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText);
  1117. d->pText[pos]=lpszValue;
  1118. d->nText++;
  1119. return lpszValue;
  1120. }
  1121. // Add clear (unformatted) text to the element.
  1122. XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos)
  1123. {
  1124. if (!lpszValue) return &emptyXMLClear;
  1125. if (!d) { myFree(lpszValue); return &emptyXMLClear; }
  1126. d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear);
  1127. XMLClear *pNewClear=d->pClear+pos;
  1128. pNewClear->lpszValue = lpszValue;
  1129. if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen;
  1130. if (!lpszClose) lpszClose=XMLClearTags->lpszClose;
  1131. pNewClear->lpszOpenTag = lpszOpen;
  1132. pNewClear->lpszCloseTag = lpszClose;
  1133. d->nClear++;
  1134. return pNewClear;
  1135. }
  1136. // private:
  1137. // Parse a clear (unformatted) type node.
  1138. char XMLNode::parseClearTag(void *px, void *_pClear)
  1139. {
  1140. XML *pXML=(XML *)px;
  1141. ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear);
  1142. int cbTemp=0;
  1143. XMLCSTR lpszTemp=NULL;
  1144. XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex];
  1145. RDK_XML_DEBUG_STATIC XMLCSTR docTypeEnd=_CXML("]>");
  1146. // Find the closing tag
  1147. // Seems the <!DOCTYPE need a better treatment so lets handle it
  1148. if (pClear.lpszOpen==XMLClearTags[1].lpszOpen)
  1149. {
  1150. XMLCSTR pCh=lpXML;
  1151. while (*pCh)
  1152. {
  1153. if (*pCh==_CXML('<')) { pClear.lpszClose=docTypeEnd; lpszTemp=xstrstr(lpXML,docTypeEnd); break; }
  1154. else if (*pCh==_CXML('>')) { lpszTemp=pCh; break; }
  1155. #ifdef _XMLWIDECHAR
  1156. pCh++;
  1157. #else
  1158. pCh+=XML_ByteTable[(unsigned char)(*pCh)];
  1159. #endif
  1160. }
  1161. } else lpszTemp=xstrstr(lpXML, pClear.lpszClose);
  1162. if (lpszTemp)
  1163. {
  1164. // Cache the size and increment the index
  1165. cbTemp = (int)(lpszTemp - lpXML);
  1166. pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose);
  1167. // Add the clear node to the current element
  1168. addClear_priv(MEMORYINCREASE,cbTemp?stringDup(lpXML,cbTemp):NULL, pClear.lpszOpen, pClear.lpszClose,-1);
  1169. return 0;
  1170. }
  1171. // If we failed to find the end tag
  1172. pXML->error = eXMLErrorUnmatchedEndClearTag;
  1173. return 1;
  1174. }
  1175. void XMLNode::exactMemory(XMLNodeData *d)
  1176. {
  1177. if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int));
  1178. if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode));
  1179. if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute));
  1180. if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR));
  1181. if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear));
  1182. }
  1183. char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr)
  1184. {
  1185. XML *pXML=(XML *)pa;
  1186. XMLCSTR lpszText=pXML->lpszText;
  1187. if (!lpszText) return 0;
  1188. if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++;
  1189. int cbText = (int)(tokenPStr - lpszText);
  1190. if (!cbText) { pXML->lpszText=NULL; return 0; }
  1191. if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; }
  1192. if (!cbText) { pXML->lpszText=NULL; return 0; }
  1193. XMLSTR lpt=fromXMLString(lpszText,cbText,pXML);
  1194. if (!lpt) return 1;
  1195. pXML->lpszText=NULL;
  1196. if (removeCommentsInMiddleOfText && d->nText && d->nClear)
  1197. {
  1198. // if the previous insertion was a comment (<!-- -->) AND
  1199. // if the previous previous insertion was a text then, delete the comment and append the text
  1200. int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder;
  1201. if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText))
  1202. {
  1203. int i=o[n]>>2;
  1204. if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen)
  1205. {
  1206. deleteClear(i);
  1207. i=o[n-1]>>2;
  1208. n=xstrlen(d->pText[i]);
  1209. int n2=xstrlen(lpt)+1;
  1210. d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR));
  1211. if (!d->pText[i]) return 1;
  1212. memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR));
  1213. free(lpt);
  1214. return 0;
  1215. }
  1216. }
  1217. }
  1218. addText_priv(MEMORYINCREASE,lpt,-1);
  1219. return 0;
  1220. }
  1221. // privateā€¦

Large files files are truncated, but you can click here to view the full file