PageRenderTime 73ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 1ms

/libs/base/src/utils/xmlparser/xmlParser.cpp

http://github.com/gamman/MRPT
C++ | 2836 lines | 2262 code | 228 blank | 346 comment | 468 complexity | e8a57226a5cecf99716b251990b28efb MD5 | raw file
Possible License(s): GPL-3.0, BSD-3-Clause

Large files files are truncated, but you can click here to view the full file

  1. /* +---------------------------------------------------------------------------+
  2. | The Mobile Robot Programming Toolkit (MRPT) C++ library |
  3. | |
  4. | http://www.mrpt.org/ |
  5. | |
  6. | Copyright (C) 2005-2011 University of Malaga |
  7. | |
  8. | This software was written by the Machine Perception and Intelligent |
  9. | Robotics Lab, University of Malaga (Spain). |
  10. | Contact: Jose-Luis Blanco <jlblanco@ctima.uma.es> |
  11. | |
  12. | This file is part of the MRPT project. |
  13. | |
  14. | MRPT is free software: you can redistribute it and/or modify |
  15. | it under the terms of the GNU General Public License as published by |
  16. | the Free Software Foundation, either version 3 of the License, or |
  17. | (at your option) any later version. |
  18. | |
  19. | MRPT is distributed in the hope that it will be useful, |
  20. | but WITHOUT ANY WARRANTY; without even the implied warranty of |
  21. | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
  22. | GNU General Public License for more details. |
  23. | |
  24. | You should have received a copy of the GNU General Public License |
  25. | along with MRPT. If not, see <http://www.gnu.org/licenses/>. |
  26. | |
  27. +---------------------------------------------------------------------------+ */
  28. #ifndef _CRT_SECURE_NO_DEPRECATE
  29. #define _CRT_SECURE_NO_DEPRECATE
  30. #endif
  31. #undef _UNICODE // JLBC
  32. #include "xmlParser.h"
  33. #ifdef _XMLWINDOWS
  34. //#ifdef _DEBUG
  35. //#define _CRTDBG_MAP_ALLOC
  36. //#include <crtdbg.h>
  37. //#endif
  38. #define WIN32_LEAN_AND_MEAN
  39. #include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files
  40. // to have "MessageBoxA" to display error messages for openFilHelper
  41. #endif
  42. #include <memory.h>
  43. #include <assert.h>
  44. #include <stdio.h>
  45. #include <string.h>
  46. #include <stdlib.h>
  47. XMLCSTR XMLNode::getVersion() { return _CXML("v2.39"); }
  48. void freeXMLString(XMLSTR t){if(t)free(t);}
  49. static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8;
  50. static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1;
  51. inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }
  52. // You can modify the initialization of the variable "XMLClearTags" below
  53. // to change the clearTags that are currently recognized by the library.
  54. // The number on the second columns is the length of the string inside the
  55. // first column. The "<!DOCTYPE" declaration must be the second in the list.
  56. // The "<!--" declaration must be the third in the list.
  57. typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag;
  58. static ALLXMLClearTag XMLClearTags[] =
  59. {
  60. { _CXML("<![CDATA["),9, _CXML("]]>") },
  61. { _CXML("<!DOCTYPE"),9, _CXML(">") },
  62. { _CXML("<!--") ,4, _CXML("-->") },
  63. { _CXML("<PRE>") ,5, _CXML("</PRE>") },
  64. // { _CXML("<Script>") ,8, _CXML("</Script>")},
  65. { NULL ,0, NULL }
  66. };
  67. // You can modify the initialization of the variable "XMLEntities" below
  68. // to change the character entities that are currently recognized by the library.
  69. // The number on the second columns is the length of the string inside the
  70. // first column. Additionally, the syntaxes "&#xA0;" and "&#160;" are recognized.
  71. typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;
  72. static XMLCharacterEntity XMLEntities[] =
  73. {
  74. { _CXML("&amp;" ), 5, _CXML('&' )},
  75. { _CXML("&lt;" ), 4, _CXML('<' )},
  76. { _CXML("&gt;" ), 4, _CXML('>' )},
  77. { _CXML("&quot;"), 6, _CXML('\"')},
  78. { _CXML("&apos;"), 6, _CXML('\'')},
  79. { NULL , 0, '\0' }
  80. };
  81. // When rendering the XMLNode to a string (using the "createXMLString" function),
  82. // you can ask for a beautiful formatting. This formatting is using the
  83. // following indentation character:
  84. #define INDENTCHAR _CXML('\t')
  85. // The following function parses the XML errors into a user friendly string.
  86. // You can edit this to change the output language of the library to something else.
  87. XMLCSTR XMLNode::getError(XMLError xerror)
  88. {
  89. switch (xerror)
  90. {
  91. case eXMLErrorNone: return _CXML("No error");
  92. case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag");
  93. case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found");
  94. case eXMLErrorEmpty: return _CXML("Error: No XML data");
  95. case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name");
  96. case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name");
  97. case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag");
  98. case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end");
  99. case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found");
  100. case eXMLErrorNoElements: return _CXML("Error: No elements found");
  101. case eXMLErrorFileNotFound: return _CXML("Error: File not found");
  102. case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found");
  103. case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity");
  104. case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode.");
  105. case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars");
  106. case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing");
  107. case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file");
  108. case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4");
  109. case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated");
  110. case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character");
  111. case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small");
  112. };
  113. return _CXML("Unknown");
  114. }
  115. /////////////////////////////////////////////////////////////////////////
  116. // Here start the abstraction layer to be OS-independent //
  117. /////////////////////////////////////////////////////////////////////////
  118. // Here is an abstraction layer to access some common string manipulation functions.
  119. // The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0,
  120. // Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++.
  121. // If you plan to "port" the library to a new system/compiler, all you have to do is
  122. // to edit the following lines.
  123. #ifdef XML_NO_WIDE_CHAR
  124. char myIsTextWideChar(const void *b, int len) { return FALSE; }
  125. #else
  126. #if defined (UNDER_CE) || !defined(_XMLWINDOWS)
  127. char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode
  128. {
  129. #ifdef sun
  130. // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer.
  131. if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE;
  132. #endif
  133. const wchar_t *s=(const wchar_t*)b;
  134. // buffer too small:
  135. if (len<(int)sizeof(wchar_t)) return FALSE;
  136. // odd length test
  137. if (len&1) return FALSE;
  138. /* only checks the first 256 characters */
  139. len=mmin(256,len/sizeof(wchar_t));
  140. // Check for the special byte order:
  141. if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
  142. if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE
  143. // checks for ASCII characters in the UNICODE stream
  144. int i,stats=0;
  145. for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
  146. if (stats>len/2) return TRUE;
  147. // Check for UNICODE NULL chars
  148. for (i=0; i<len; i++) if (!s[i]) return TRUE;
  149. return FALSE;
  150. }
  151. #else
  152. char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); }
  153. #endif
  154. #endif
  155. #ifdef _XMLWINDOWS
  156. // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
  157. #ifdef _XMLWIDECHAR
  158. wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
  159. {
  160. int i;
  161. if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0);
  162. else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0);
  163. if (i<0) return NULL;
  164. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR));
  165. if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i);
  166. else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i);
  167. d[i]=0;
  168. return d;
  169. }
  170. static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); }
  171. static inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); }
  172. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);}
  173. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
  174. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); }
  175. static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
  176. static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
  177. #else
  178. char *myWideCharToMultiByte(const wchar_t *s)
  179. {
  180. UINT codePage=CP_ACP; if (characterEncoding==XMLNode::char_encoding_UTF8) codePage=CP_UTF8;
  181. int i=(int)WideCharToMultiByte(codePage, // code page
  182. 0, // performance and mapping flags
  183. s, // wide-character string
  184. -1, // number of chars in string
  185. NULL, // buffer for new string
  186. 0, // size of buffer
  187. NULL, // default for unmappable chars
  188. NULL // set when default char used
  189. );
  190. if (i<0) return NULL;
  191. char *d=(char*)malloc(i+1);
  192. WideCharToMultiByte(codePage, // code page
  193. 0, // performance and mapping flags
  194. s, // wide-character string
  195. -1, // number of chars in string
  196. d, // buffer for new string
  197. i, // size of buffer
  198. NULL, // default for unmappable chars
  199. NULL // set when default char used
  200. );
  201. d[i]=0;
  202. return d;
  203. }
  204. static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
  205. static inline int xstrlen(XMLCSTR c) { return (int)strlen(c); }
  206. #ifdef __BORLANDC__
  207. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strnicmp(c1,c2,l);}
  208. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return stricmp(c1,c2); }
  209. #else
  210. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);}
  211. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); }
  212. #endif
  213. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
  214. static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
  215. static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
  216. #endif
  217. #else
  218. // for gcc and CC
  219. #ifdef XML_NO_WIDE_CHAR
  220. char *myWideCharToMultiByte(const wchar_t *s) { return NULL; }
  221. #else
  222. char *myWideCharToMultiByte(const wchar_t *s)
  223. {
  224. const wchar_t *ss=s;
  225. int i=(int)wcsrtombs(NULL,&ss,0,NULL);
  226. if (i<0) return NULL;
  227. char *d=(char *)malloc(i+1);
  228. wcsrtombs(d,&s,i,NULL);
  229. d[i]=0;
  230. return d;
  231. }
  232. #endif
  233. #ifdef _XMLWIDECHAR
  234. wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce)
  235. {
  236. const char *ss=s;
  237. int i=(int)mbsrtowcs(NULL,&ss,0,NULL);
  238. if (i<0) return NULL;
  239. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t));
  240. mbsrtowcs(d,&s,i,NULL);
  241. d[i]=0;
  242. return d;
  243. }
  244. int xstrlen(XMLCSTR c) { return wcslen(c); }
  245. #ifdef sun
  246. // for CC
  247. #include <widec.h>
  248. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);}
  249. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);}
  250. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); }
  251. #else
  252. // for gcc
  253. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);}
  254. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
  255. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); }
  256. #endif
  257. static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
  258. static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
  259. static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode)
  260. {
  261. char *filenameAscii=myWideCharToMultiByte(filename);
  262. FILE *f;
  263. if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb");
  264. else f=fopen(filenameAscii,"wb");
  265. free(filenameAscii);
  266. return f;
  267. }
  268. #else
  269. static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
  270. static inline int xstrlen(XMLCSTR c) { return strlen(c); }
  271. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);}
  272. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
  273. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); }
  274. static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
  275. static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
  276. #endif
  277. static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);}
  278. #endif
  279. ///////////////////////////////////////////////////////////////////////////////
  280. // the "xmltoc,xmltob,xmltoi,xmltol,xmltof,xmltoa" functions //
  281. ///////////////////////////////////////////////////////////////////////////////
  282. // These 6 functions are not used inside the XMLparser.
  283. // There are only here as "convenience" functions for the user.
  284. // If you don't need them, you can delete them without any trouble.
  285. #ifdef _XMLWIDECHAR
  286. #ifdef _XMLWINDOWS
  287. // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0
  288. char xmltob(XMLCSTR t,int v){ if (t&&(*t)) return (char)_wtoi(t); return v; }
  289. int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; }
  290. long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; }
  291. double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; }
  292. #else
  293. #ifdef sun
  294. // for CC
  295. #include <widec.h>
  296. char xmltob(XMLCSTR t,int v){ if (t) return (char)wstol(t,NULL,10); return v; }
  297. int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; }
  298. long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; }
  299. #else
  300. // for gcc
  301. char xmltob(XMLCSTR t,int v){ if (t) return (char)wcstol(t,NULL,10); return v; }
  302. int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; }
  303. long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; }
  304. #endif
  305. double xmltof(XMLCSTR t,double v){ if (t&&(*t)) wscanf(t, "%f", &v); /*v=_wtof(t);*/ return v; }
  306. #endif
  307. #else
  308. char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; }
  309. int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; }
  310. long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; }
  311. double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; }
  312. #endif
  313. XMLCSTR xmltoa(XMLCSTR t,XMLCSTR v){ if (t) return t; return v; }
  314. XMLCHAR xmltoc(XMLCSTR t,XMLCHAR v){ if (t&&(*t)) return *t; return v; }
  315. /////////////////////////////////////////////////////////////////////////
  316. // the "openFileHelper" function //
  317. /////////////////////////////////////////////////////////////////////////
  318. // Since each application has its own way to report and deal with errors, you should modify & rewrite
  319. // the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs.
  320. XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
  321. {
  322. // guess the value of the global parameter "characterEncoding"
  323. // (the guess is based on the first 200 bytes of the file).
  324. FILE *f=xfopen(filename,_CXML("rb"));
  325. if (f)
  326. {
  327. char bb[205];
  328. int l=(int)fread(bb,1,200,f);
  329. setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText);
  330. fclose(f);
  331. }
  332. // parse the file
  333. XMLResults pResults;
  334. XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);
  335. // display error message (if any)
  336. if (pResults.error != eXMLErrorNone)
  337. {
  338. // create message
  339. char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML("");
  340. if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; }
  341. sprintf(message,
  342. #ifdef _XMLWIDECHAR
  343. "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
  344. #else
  345. "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
  346. #endif
  347. ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);
  348. // display message
  349. #if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_)
  350. MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);
  351. #else
  352. printf("%s",message);
  353. #endif
  354. // exit(255);
  355. }
  356. return xnode;
  357. }
  358. /////////////////////////////////////////////////////////////////////////
  359. // Here start the core implementation of the XMLParser library //
  360. /////////////////////////////////////////////////////////////////////////
  361. // You should normally not change anything below this point.
  362. #ifndef _XMLWIDECHAR
  363. // If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte.
  364. // If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes).
  365. // If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes).
  366. // This table is used as lookup-table to know the length of a character (in byte) based on the
  367. // content of the first byte of the character.
  368. // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).
  369. static const char XML_utf8ByteTable[256] =
  370. {
  371. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  372. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  373. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  374. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  375. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  376. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  377. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  378. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  379. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range
  380. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid
  381. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
  382. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
  383. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
  384. 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte
  385. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  386. 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte
  387. 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
  388. };
  389. static const char XML_legacyByteTable[256] =
  390. {
  391. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  392. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  393. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  394. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  395. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  396. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
  397. };
  398. static const char XML_sjisByteTable[256] =
  399. {
  400. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  401. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  402. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  403. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  404. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  405. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  406. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  407. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  408. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
  409. 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes
  410. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
  411. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
  412. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
  413. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0
  414. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0
  415. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes
  416. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0
  417. };
  418. static const char XML_gb2312ByteTable[256] =
  419. {
  420. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  421. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  422. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  423. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  424. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  425. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  426. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  427. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  428. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
  429. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80
  430. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
  431. 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes
  432. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
  433. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
  434. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  435. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
  436. 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0
  437. };
  438. static const char XML_gbk_big5_ByteTable[256] =
  439. {
  440. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  441. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  442. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  443. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  444. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  445. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  446. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  447. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  448. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70
  449. 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes
  450. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
  451. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0
  452. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0
  453. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0
  454. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  455. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0
  456. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0
  457. };
  458. static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8"
  459. #endif
  460. XMLNode XMLNode::emptyXMLNode;
  461. XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};
  462. XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};
  463. // Enumeration used to decipher what type a token is
  464. typedef enum XMLTokenTypeTag
  465. {
  466. eTokenText = 0,
  467. eTokenQuotedText,
  468. eTokenTagStart, /* "<" */
  469. eTokenTagEnd, /* "</" */
  470. eTokenCloseTag, /* ">" */
  471. eTokenEquals, /* "=" */
  472. eTokenDeclaration, /* "<?" */
  473. eTokenShortHandClose, /* "/>" */
  474. eTokenClear,
  475. eTokenError
  476. } XMLTokenType;
  477. // Main structure used for parsing XML
  478. typedef struct XML
  479. {
  480. XMLCSTR lpXML;
  481. XMLCSTR lpszText;
  482. int nIndex,nIndexMissigEndTag;
  483. enum XMLError error;
  484. XMLCSTR lpEndTag;
  485. int cbEndTag;
  486. XMLCSTR lpNewElement;
  487. int cbNewElement;
  488. int nFirst;
  489. } XML;
  490. typedef struct
  491. {
  492. ALLXMLClearTag *pClr;
  493. XMLCSTR pStr;
  494. } NextToken;
  495. // Enumeration used when parsing attributes
  496. typedef enum Attrib
  497. {
  498. eAttribName = 0,
  499. eAttribEquals,
  500. eAttribValue
  501. } Attrib;
  502. // Enumeration used when parsing elements to dictate whether we are currently
  503. // inside a tag
  504. typedef enum Status
  505. {
  506. eInsideTag = 0,
  507. eOutsideTag
  508. } Status;
  509. XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const
  510. {
  511. if (!d) return eXMLErrorNone;
  512. FILE *f=xfopen(filename,_CXML("wb"));
  513. if (!f) return eXMLErrorCannotOpenWriteFile;
  514. #ifdef _XMLWIDECHAR
  515. unsigned char h[2]={ 0xFF, 0xFE };
  516. if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile;
  517. if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
  518. {
  519. if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",sizeof(wchar_t)*40,1,f))
  520. return eXMLErrorCannotWriteFile;
  521. }
  522. #else
  523. if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
  524. {
  525. if (characterEncoding==char_encoding_UTF8)
  526. {
  527. // header so that windows recognize the file as UTF-8:
  528. unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
  529. encoding="utf-8";
  530. } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS";
  531. if (!encoding) encoding="ISO-8859-1";
  532. if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) return eXMLErrorCannotWriteFile;
  533. } else
  534. {
  535. if (characterEncoding==char_encoding_UTF8)
  536. {
  537. unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
  538. }
  539. }
  540. #endif
  541. int i;
  542. XMLSTR t=createXMLString(nFormat,&i);
  543. if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile;
  544. if (fclose(f)!=0) return eXMLErrorCannotWriteFile;
  545. free(t);
  546. return eXMLErrorNone;
  547. }
  548. // Duplicate a given string.
  549. XMLSTR stringDup(XMLCSTR lpszData, int cbData)
  550. {
  551. if (lpszData==NULL) return NULL;
  552. XMLSTR lpszNew;
  553. if (cbData==-1) cbData=(int)xstrlen(lpszData);
  554. lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));
  555. if (lpszNew)
  556. {
  557. memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));
  558. lpszNew[cbData] = (XMLCHAR)NULL;
  559. }
  560. return lpszNew;
  561. }
  562. XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source)
  563. {
  564. XMLSTR dd=dest;
  565. XMLCHAR ch;
  566. XMLCharacterEntity *entity;
  567. while ((ch=*source))
  568. {
  569. entity=XMLEntities;
  570. do
  571. {
  572. if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }
  573. entity++;
  574. } while(entity->s);
  575. #ifdef _XMLWIDECHAR
  576. *(dest++)=*(source++);
  577. #else
  578. switch(XML_ByteTable[(unsigned char)ch])
  579. {
  580. case 4: *(dest++)=*(source++);
  581. case 3: *(dest++)=*(source++);
  582. case 2: *(dest++)=*(source++);
  583. case 1: *(dest++)=*(source++);
  584. }
  585. #endif
  586. out_of_loop1:
  587. ;
  588. }
  589. *dest=0;
  590. return dd;
  591. }
  592. // private (used while rendering):
  593. int ToXMLStringTool::lengthXMLString(XMLCSTR source)
  594. {
  595. int r=0;
  596. XMLCharacterEntity *entity;
  597. XMLCHAR ch;
  598. while ((ch=*source))
  599. {
  600. entity=XMLEntities;
  601. do
  602. {
  603. if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }
  604. entity++;
  605. } while(entity->s);
  606. #ifdef _XMLWIDECHAR
  607. r++; source++;
  608. #else
  609. ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;
  610. #endif
  611. out_of_loop1:
  612. ;
  613. }
  614. return r;
  615. }
  616. ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); }
  617. void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }
  618. XMLSTR ToXMLStringTool::toXML(XMLCSTR source)
  619. {
  620. int l=lengthXMLString(source)+1;
  621. if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); }
  622. return toXMLUnSafe(buf,source);
  623. }
  624. // private:
  625. XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
  626. {
  627. // This function is the opposite of the function "toXMLString". It decodes the escape
  628. // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
  629. // &,",',<,>. This function is used internally by the XML Parser. All the calls to
  630. // the XML library will always gives you back "decoded" strings.
  631. //
  632. // in: string (s) and length (lo) of string
  633. // out: new allocated string converted from xml
  634. if (!s) return NULL;
  635. int ll=0,j;
  636. XMLSTR d;
  637. XMLCSTR ss=s;
  638. XMLCharacterEntity *entity;
  639. while ((lo>0)&&(*s))
  640. {
  641. if (*s==_CXML('&'))
  642. {
  643. if ((lo>2)&&(s[1]==_CXML('#')))
  644. {
  645. s+=2; lo-=2;
  646. if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; }
  647. while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++;
  648. if (*s!=_CXML(';'))
  649. {
  650. pXML->error=eXMLErrorUnknownCharacterEntity;
  651. return NULL;
  652. }
  653. s++; lo--;
  654. } else
  655. {
  656. entity=XMLEntities;
  657. do
  658. {
  659. if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }
  660. entity++;
  661. } while(entity->s);
  662. if (!entity->s)
  663. {
  664. pXML->error=eXMLErrorUnknownCharacterEntity;
  665. return NULL;
  666. }
  667. }
  668. } else
  669. {
  670. #ifdef _XMLWIDECHAR
  671. s++; lo--;
  672. #else
  673. j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;
  674. #endif
  675. }
  676. ll++;
  677. }
  678. d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));
  679. s=d;
  680. while (ll-->0)
  681. {
  682. if (*ss==_CXML('&'))
  683. {
  684. if (ss[1]==_CXML('#'))
  685. {
  686. ss+=2; j=0;
  687. if ((*ss==_CXML('X'))||(*ss==_CXML('x')))
  688. {
  689. ss++;
  690. while (*ss!=_CXML(';'))
  691. {
  692. if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0');
  693. else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10;
  694. else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10;
  695. else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
  696. ss++;
  697. }
  698. } else
  699. {
  700. while (*ss!=_CXML(';'))
  701. {
  702. if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0');
  703. else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
  704. ss++;
  705. }
  706. }
  707. #ifndef _XMLWIDECHAR
  708. if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;}
  709. #endif
  710. (*d++)=(XMLCHAR)j; ss++;
  711. } else
  712. {
  713. entity=XMLEntities;
  714. do
  715. {
  716. if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; }
  717. entity++;
  718. } while(entity->s);
  719. }
  720. } else
  721. {
  722. #ifdef _XMLWIDECHAR
  723. *(d++)=*(ss++);
  724. #else
  725. switch(XML_ByteTable[(unsigned char)*ss])
  726. {
  727. case 4: *(d++)=*(ss++); ll--;
  728. case 3: *(d++)=*(ss++); ll--;
  729. case 2: *(d++)=*(ss++); ll--;
  730. case 1: *(d++)=*(ss++);
  731. }
  732. #endif
  733. }
  734. }
  735. *d=0;
  736. return (XMLSTR)s;
  737. }
  738. #define XML_isSPACECHAR(ch) ((ch==_CXML('\n'))||(ch==_CXML(' '))||(ch== _CXML('\t'))||(ch==_CXML('\r')))
  739. // private:
  740. char myTagCompare(XMLCSTR cclose, XMLCSTR copen)
  741. // !!!! WARNING strange convention&:
  742. // return 0 if equals
  743. // return 1 if different
  744. {
  745. if (!cclose) return 1;
  746. int l=(int)xstrlen(cclose);
  747. if (xstrnicmp(cclose, copen, l)!=0) return 1;
  748. const XMLCHAR c=copen[l];
  749. if (XML_isSPACECHAR(c)||
  750. (c==_CXML('/' ))||
  751. (c==_CXML('<' ))||
  752. (c==_CXML('>' ))||
  753. (c==_CXML('=' ))) return 0;
  754. return 1;
  755. }
  756. // Obtain the next character from the string.
  757. static inline XMLCHAR getNextChar(XML *pXML)
  758. {
  759. XMLCHAR ch = pXML->lpXML[pXML->nIndex];
  760. #ifdef _XMLWIDECHAR
  761. if (ch!=0) pXML->nIndex++;
  762. #else
  763. pXML->nIndex+=XML_ByteTable[(unsigned char)ch];
  764. #endif
  765. return ch;
  766. }
  767. // Find the next token in a string.
  768. // pcbToken contains the number of characters that have been read.
  769. static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType)
  770. {
  771. NextToken result;
  772. XMLCHAR ch;
  773. XMLCHAR chTemp;
  774. int indexStart,nFoundMatch,nIsText=FALSE;
  775. result.pClr=NULL; // prevent warning
  776. // Find next non-white space character
  777. do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch);
  778. if (ch)
  779. {
  780. // Cache the current string pointer
  781. result.pStr = &pXML->lpXML[indexStart];
  782. // First check whether the token is in the clear tag list (meaning it
  783. // does not need formatting).
  784. ALLXMLClearTag *ctag=XMLClearTags;
  785. do
  786. {
  787. if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0)
  788. {
  789. result.pClr=ctag;
  790. pXML->nIndex+=ctag->openTagLen-1;
  791. *pType=eTokenClear;
  792. return result;
  793. }
  794. ctag++;
  795. } while(ctag->lpszOpen);
  796. // If we didn't find a clear tag then check for standard tokens
  797. switch(ch)
  798. {
  799. // Check for quotes
  800. case _CXML('\''):
  801. case _CXML('\"'):
  802. // Type of token
  803. *pType = eTokenQuotedText;
  804. chTemp = ch;
  805. // Set the size
  806. nFoundMatch = FALSE;
  807. // Search through the string to find a matching quote
  808. while((ch = getNextChar(pXML)))
  809. {
  810. if (ch==chTemp) { nFoundMatch = TRUE; break; }
  811. if (ch==_CXML('<')) break;
  812. }
  813. // If we failed to find a matching quote
  814. if (nFoundMatch == FALSE)
  815. {
  816. pXML->nIndex=indexStart+1;
  817. nIsText=TRUE;
  818. break;
  819. }
  820. // 4.02.2002
  821. // if (FindNonWhiteSpace(pXML)) pXML->nIndex--;
  822. break;
  823. // Equals (used with attribute values)
  824. case _CXML('='):
  825. *pType = eTokenEquals;
  826. break;
  827. // Close tag
  828. case _CXML('>'):
  829. *pType = eTokenCloseTag;
  830. break;
  831. // Check for tag start and tag end
  832. case _CXML('<'):
  833. // Peek at the next character to see if we have an end tag '</',
  834. // or an xml declaration '<?'
  835. chTemp = pXML->lpXML[pXML->nIndex];
  836. // If we have a tag end...
  837. if (chTemp == _CXML('/'))
  838. {
  839. // Set the type and ensure we point at the next character
  840. getNextChar(pXML);
  841. *pType = eTokenTagEnd;
  842. }
  843. // If we have an XML declaration tag
  844. else if (chTemp == _CXML('?'))
  845. {
  846. // Set the type and ensure we point at the next character
  847. getNextChar(pXML);
  848. *pType = eTokenDeclaration;
  849. }
  850. // Otherwise we must have a start tag
  851. else
  852. {
  853. *pType = eTokenTagStart;
  854. }
  855. break;
  856. // Check to see if we have a short hand type end tag ('/>').
  857. case _CXML('/'):
  858. // Peek at the next character to see if we have a short end tag '/>'
  859. chTemp = pXML->lpXML[pXML->nIndex];
  860. // If we have a short hand end tag...
  861. if (chTemp == _CXML('>'))
  862. {
  863. // Set the type and ensure we point at the next character
  864. getNextChar(pXML);
  865. *pType = eTokenShortHandClose;
  866. break;
  867. }
  868. // If we haven't found a short hand closing tag then drop into the
  869. // text process
  870. // Other characters
  871. default:
  872. nIsText = TRUE;
  873. }
  874. // If this is a TEXT node
  875. if (nIsText)
  876. {
  877. // Indicate we are dealing with text
  878. *pType = eTokenText;
  879. while((ch = getNextChar(pXML)))
  880. {
  881. if XML_isSPACECHAR(ch)
  882. {
  883. indexStart++; break;
  884. } else if (ch==_CXML('/'))
  885. {
  886. // If we find a slash then this maybe text or a short hand end tag
  887. // Peek at the next character to see it we have short hand end tag
  888. ch=pXML->lpXML[pXML->nIndex];
  889. // If we found a short hand end tag then we need to exit the loop
  890. if (ch==_CXML('>')) { pXML->nIndex--; break; }
  891. } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('=')))
  892. {
  893. pXML->nIndex--; break;
  894. }
  895. }
  896. }
  897. *pcbToken = pXML->nIndex-indexStart;
  898. } else
  899. {
  900. // If we failed to obtain a valid character
  901. *pcbToken = 0;
  902. *pType = eTokenError;
  903. result.pStr=NULL;
  904. }
  905. return result;
  906. }
  907. XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName)
  908. {
  909. if (!d) { free(lpszName); return NULL; }
  910. if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName);
  911. d->lpszName=lpszName;
  912. return lpszName;
  913. }
  914. // private:
  915. XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; }
  916. XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration)
  917. {
  918. d=(XMLNodeData*)malloc(sizeof(XMLNodeData));
  919. d->ref_count=1;
  920. d->lpszName=NULL;
  921. d->nChild= 0;
  922. d->nText = 0;
  923. d->nClear = 0;
  924. d->nAttribute = 0;
  925. d->isDeclaration = isDeclaration;
  926. d->pParent = pParent;
  927. d->pChild= NULL;
  928. d->pText= NULL;
  929. d->pClear= NULL;
  930. d->pAttribute= NULL;
  931. d->pOrder= NULL;
  932. updateName_WOSD(lpszName);
  933. }
  934. XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); }
  935. XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); }
  936. #define MEMORYINCREASE 50
  937. static inline void myFree(void *p) { if (p) free(p); }
  938. static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem)
  939. {
  940. if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); }
  941. if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem);
  942. // if (!p)
  943. // {
  944. // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220);
  945. // }
  946. return p;
  947. }
  948. // private:
  949. XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype)
  950. {
  951. if (index<0) return -1;
  952. int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i;
  953. }
  954. // private:
  955. // update "order" information when deleting a content of a XMLNode
  956. int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
  957. {
  958. int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t);
  959. memmove(o+i, o+i+1, (n-i)*sizeof(int));
  960. for (;i<n;i++)
  961. if ((o[i]&3)==(int)t) o[i]-=4;
  962. // We should normally do:
  963. // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));
  964. // but we skip reallocation because it's too time consuming.
  965. // Anyway, at the end, it will be free'd completely at once.
  966. return i;
  967. }
  968. void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype)
  969. {
  970. // in: *_pos is the position inside d->pOrder ("-1" means "EndOf")
  971. // out: *_pos is the index inside p
  972. p=myRealloc(p,(nc+1),memoryIncrease,size);
  973. int n=d->nChild+d->nText+d->nClear;
  974. d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int));
  975. int pos=*_pos,*o=d->pOrder;
  976. if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
  977. int i=pos;
  978. memmove(o+i+1, o+i, (n-i)*sizeof(int));
  979. while ((pos<n)&&((o[pos]&3)!=(int)xtype)) pos++;
  980. if (pos==n) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
  981. o[i]=o[pos];
  982. for (i=pos+1;i<=n;i++) if ((o[i]&3)==(int)xtype) o[i]+=4;
  983. *_pos=pos=o[pos]>>2;
  984. memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size);
  985. return p;
  986. }
  987. // Add a child node to the given element.
  988. XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos)
  989. {
  990. if (!lpszName) return emptyXMLNode;
  991. d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);
  992. d->pChild[pos].d=NULL;
  993. d->pChild[pos]=XMLNode(d,lpszName,isDeclaration);
  994. d->nChild++;
  995. return d->pChild[pos];
  996. }
  997. // Add an attribute to an element.
  998. XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev)
  999. {
  1000. if (!lpszName) return &emptyXMLAttribute;
  1001. if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; }
  1002. int nc=d->nAttribute;
  1003. d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute));
  1004. XMLAttribute *pAttr=d->pAttribute+nc;
  1005. pAttr->lpszName = lpszName;
  1006. pAttr->lpszValue = lpszValuev;
  1007. d->nAttribute++;
  1008. return pAttr;
  1009. }
  1010. // Add text to the element.
  1011. XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos)
  1012. {
  1013. if (!lpszValue) return NULL;
  1014. if (!d) { myFree(lpszValue); return NULL; }
  1015. d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText);
  1016. d->pText[pos]=lpszValue;
  1017. d->nText++;
  1018. return lpszValue;
  1019. }
  1020. // Add clear (unformatted) text to the element.
  1021. XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos)
  1022. {
  1023. if (!lpszValue) return &emptyXMLClear;
  1024. if (!d) { myFree(lpszValue); return &emptyXMLClear; }
  1025. d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear);
  1026. XMLClear *pNewClear=d->pClear+pos;
  1027. pNewClear->lpszValue = lpszValue;
  1028. if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen;
  1029. if (!lpszClose) lpszClose=XMLClearTags->lpszClose;
  1030. pNewClear->lpszOpenTag = lpszOpen;
  1031. pNewClear->lpszCloseTag = lpszClose;
  1032. d->nClear++;
  1033. return pNewClear;
  1034. }
  1035. // private:
  1036. // Parse a clear (unformatted) type node.
  1037. char XMLNode::parseClearTag(void *px, void *_pClear)
  1038. {
  1039. XML *pXML=(XML *)px;
  1040. ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear);
  1041. int cbTemp=0;
  1042. XMLCSTR lpszTemp=NULL;
  1043. XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex];
  1044. static XMLCSTR docTypeEnd=_CXML("]>");
  1045. // Find the closing tag
  1046. // Seems the <!DOCTYPE need a better treatment so lets handle it
  1047. if (pClear.lpszOpen==XMLClearTags[1].lpszOpen)
  1048. {
  1049. XMLCSTR pCh=lpXML;
  1050. while (*pCh)
  1051. {
  1052. if (*pCh==_CXML('<')) { pClear.lpszClose=docTypeEnd; lpszTemp=xstrstr(lpXML,docTypeEnd); break; }
  1053. else if (*pCh==_CXML('>')) { lpszTemp=pCh; break; }
  1054. #ifdef _XMLWIDECHAR
  1055. pCh++;
  1056. #else
  1057. pCh+=XML_ByteTable[(unsigned char)(*pCh)];
  1058. #endif
  1059. }
  1060. } else lpszTemp=xstrstr(lpXML, pClear.lpszClose);
  1061. if (lpszTemp)
  1062. {
  1063. // Cache the size and increment the index
  1064. cbTemp = (int)(lpszTemp - lpXML);
  1065. pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose);
  1066. // Add the clear node to the current element
  1067. addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1);
  1068. return 0;
  1069. }
  1070. // If we failed to find the end tag
  1071. pXML->error = eXMLErrorUnmatchedEndClearTag;
  1072. return 1;
  1073. }
  1074. void XMLNode::exactMemory(XMLNodeData *d)
  1075. {
  1076. if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int));
  1077. if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode));
  1078. if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute));
  1079. if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR));
  1080. if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear));
  1081. }
  1082. char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr)
  1083. {
  1084. XML *pXML=(XML *)pa;
  1085. XMLCSTR lpszText=pXML->lpszText;
  1086. if (!lpszText) return 0;
  1087. if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++;
  1088. int cbText = (int)(tokenPStr - lpszText);
  1089. if (!cbText) { pXML->lpszText=NULL; return 0; }
  1090. if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; }
  1091. if (!cbText) { pXML->lpszText=NULL; return 0; }
  1092. XMLSTR lpt=fromXMLString(lpszText,cbText,pXML);
  1093. if (!lpt) return 1;
  1094. pXML->lpszText=NULL;
  1095. if (removeCommentsInMiddleOfText && d->nText && d->nClear)
  1096. {
  1097. // if the previous insertion was a comment (<!-- -->) AND
  1098. // if the previous previous insertion was a text then, delete the comment and append the text
  1099. int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder;
  1100. if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText))
  1101. {
  1102. int i=o[n]>>2;
  1103. if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen)
  1104. {
  1105. deleteClear(i);
  1106. i=o[n-1]>>2;
  1107. n=xstrlen(d->pText[i]);
  1108. int n2=xstrlen(lpt)+1;
  1109. d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR));
  1110. if (!d->pText[i]) return 1;
  1111. memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR));
  1112. free(lpt);
  1113. return 0;
  1114. }
  1115. }
  1116. }
  1117. addText_priv(MEMORYINCREASE,lpt,-1);
  1118. return 0;
  1119. }
  1120. // private:
  1121. // Recursively parse an XML element.
  1122. int XMLNode::ParseXMLElement(void *pa)
  1123. {
  1124. XML *pXML=(XML *)pa;
  1125. int cbToken;
  1126. enum XMLTokenTypeTag xtype;
  1127. NextToken token;
  1128. XMLCSTR lpszTemp=NULL;
  1129. int cbTemp=0;
  1130. char nDeclaration;
  1131. XMLNode pNew;
  1132. enum Status status; // inside or outside a tag
  1133. enum Attrib attrib = eAttribName;
  1134. assert(pXML);
  1135. // If this is the first call to the function
  1136. if (pXML->nFirst)
  1137. {
  1138. // Assume we are outside of a tag definition
  1139. pXML->nFirst = FALSE;
  1140. status = eOutsideTag;
  1141. } else
  1142. {
  1143. // If this is not the first call then we should only be called when inside a tag.
  1144. status = eInsideTag;
  1145. }
  1146. // Iterate through the tokens in the document
  1147. for(;;)
  1148. {
  1149. // Obtain the next token
  1150. token = GetNextToken(pXML, &cbToken, &xtype);
  1151. if (xtype != eTokenError)
  1152. {
  1153. // Check the current status
  1154. switch(status)
  1155. {
  1156. // If we are outside of a tag definition
  1157. case eOutsideTag:
  1158. // Check what type of token we obtained
  1159. switch(xtype)
  1160. {
  1161. // If we have found text or quoted text
  1162. case eTokenText:
  1163. case eTokenCloseTag: /* '>' */
  1164. case eTokenShortHandClose: /* '/>' */
  1165. case eTokenQuotedText:
  1166. case eTokenEquals:
  1167. break;
  1168. // If we found a start tag '<' and declarations '<?'
  1169. case eTokenTagStart:
  1170. case eTokenDeclaration:
  1171. // Cache whether this new element is a declaration or not
  1172. nDeclaration = (xtype == eTokenDeclaration);
  1173. // If we have node text then add this to the element
  1174. if (maybeAddTxT(pXML,token.pStr)) return FALSE;
  1175. // Find the name of the tag

Large files files are truncated, but you can click here to view the full file