PageRenderTime 55ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/RayTrace/RayTrace/xmlParser.cpp

https://bitbucket.org/jibarra/cse470-assign5
C++ | 2671 lines | 2073 code | 224 blank | 374 comment | 422 complexity | a40f83767b244a121124c80d5d1f3161 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /**
  2. ****************************************************************************
  3. * <P> XML.c - implementation file for basic XML parser written in ANSI C++
  4. * for portability. It works by using recursion and a node tree for breaking
  5. * down the elements of an XML document. </P>
  6. *
  7. * @version V2.32
  8. * @author Frank Vanden Berghen
  9. *
  10. * NOTE:
  11. *
  12. * If you add "#define STRICT_PARSING", on the first line of this file
  13. * the parser will see the following XML-stream:
  14. * <a><b>some text</b><b>other text </a>
  15. * as an error. Otherwise, this tring will be equivalent to:
  16. * <a><b>some text</b><b>other text</b></a>
  17. *
  18. * NOTE:
  19. *
  20. * If you add "#define APPROXIMATE_PARSING" on the first line of this file
  21. * the parser will see the following XML-stream:
  22. * <data name="n1">
  23. * <data name="n2">
  24. * <data name="n3" />
  25. * as equivalent to the following XML-stream:
  26. * <data name="n1" />
  27. * <data name="n2" />
  28. * <data name="n3" />
  29. * This can be useful for badly-formed XML-streams but prevent the use
  30. * of the following XML-stream (problem is: tags at contiguous levels
  31. * have the same names):
  32. * <data name="n1">
  33. * <data name="n2">
  34. * <data name="n3" />
  35. * </data>
  36. * </data>
  37. *
  38. * NOTE:
  39. *
  40. * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file
  41. * the "openFileHelper" function will always display error messages inside the
  42. * console instead of inside a message-box-window. Message-box-windows are
  43. * available on windows 9x/NT/2000/XP/Vista only.
  44. *
  45. * BSD license:
  46. * Copyright (c) 2002, Frank Vanden Berghen
  47. * All rights reserved.
  48. * Redistribution and use in source and binary forms, with or without
  49. * modification, are permitted provided that the following conditions are met:
  50. *
  51. * * Redistributions of source code must retain the above copyright
  52. * notice, this list of conditions and the following disclaimer.
  53. * * Redistributions in binary form must reproduce the above copyright
  54. * notice, this list of conditions and the following disclaimer in the
  55. * documentation and/or other materials provided with the distribution.
  56. * * Neither the name of the Frank Vanden Berghen nor the
  57. * names of its contributors may be used to endorse or promote products
  58. * derived from this software without specific prior written permission.
  59. *
  60. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
  61. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  62. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  63. * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
  64. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  65. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  66. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  67. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  68. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  69. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  70. *
  71. ****************************************************************************
  72. */
  73. #ifndef _CRT_SECURE_NO_DEPRECATE
  74. #define _CRT_SECURE_NO_DEPRECATE
  75. #endif
  76. #include "xmlParser.h"
  77. #ifdef _XMLWINDOWS
  78. //#ifdef _DEBUG
  79. //#define _CRTDBG_MAP_ALLOC
  80. //#include <crtdbg.h>
  81. //#endif
  82. #define WIN32_LEAN_AND_MEAN
  83. #include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files
  84. // to have "MessageBoxA" to display error messages for openFilHelper
  85. #endif
  86. #include <memory.h>
  87. #include <assert.h>
  88. #include <stdio.h>
  89. #include <string.h>
  90. #include <stdlib.h>
  91. XMLCSTR XMLNode::getVersion() { return _X("v2.30"); }
  92. void freeXMLString(XMLSTR t){free(t);}
  93. static XMLNode::XMLCharEncoding characterEncoding=XMLNode::encoding_UTF8;
  94. static char guessWideCharChars=1, dropWhiteSpace=1;
  95. inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }
  96. // You can modify the initialization of the variable "XMLClearTags" below
  97. // to change the clearTags that are currently recognized by the library.
  98. // The number on the second columns is the length of the string inside the
  99. // first column. The "<!DOCTYPE" declaration must be the second in the list.
  100. typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag;
  101. static ALLXMLClearTag XMLClearTags[] =
  102. {
  103. { _X("<![CDATA["),9, _X("]]>") },
  104. { _X("<!DOCTYPE"),9, _X(">") },
  105. { _X("<PRE>") ,5, _X("</PRE>") },
  106. { _X("<Script>") ,8, _X("</Script>")},
  107. { _X("<!--") ,4, _X("-->") },
  108. { NULL ,0, NULL }
  109. };
  110. // You can modify the initialization of the variable "XMLEntities" below
  111. // to change the character entities that are currently recognized by the library.
  112. // The number on the second columns is the length of the string inside the
  113. // first column. Additionally, the syntaxes "&#xA0;" and "&#160;" are recognized.
  114. typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;
  115. static XMLCharacterEntity XMLEntities[] =
  116. {
  117. { _X("&amp;" ), 5, _X('&' )},
  118. { _X("&lt;" ), 4, _X('<' )},
  119. { _X("&gt;" ), 4, _X('>' )},
  120. { _X("&quot;"), 6, _X('\"')},
  121. { _X("&apos;"), 6, _X('\'')},
  122. { NULL , 0, '\0' }
  123. };
  124. // When rendering the XMLNode to a string (using the "createXMLString" function),
  125. // you can ask for a beautiful formatting. This formatting is using the
  126. // following indentation character:
  127. #define INDENTCHAR _X('\t')
  128. // The following function parses the XML errors into a user friendly string.
  129. // You can edit this to change the output language of the library to something else.
  130. XMLCSTR XMLNode::getError(XMLError xerror)
  131. {
  132. switch (xerror)
  133. {
  134. case eXMLErrorNone: return _X("No error");
  135. case eXMLErrorMissingEndTag: return _X("Warning: Unmatched end tag");
  136. case eXMLErrorNoXMLTagFound: return _X("Warning: No XML tag found");
  137. case eXMLErrorEmpty: return _X("Error: No XML data");
  138. case eXMLErrorMissingTagName: return _X("Error: Missing start tag name");
  139. case eXMLErrorMissingEndTagName: return _X("Error: Missing end tag name");
  140. case eXMLErrorUnmatchedEndTag: return _X("Error: Unmatched end tag");
  141. case eXMLErrorUnmatchedEndClearTag: return _X("Error: Unmatched clear tag end");
  142. case eXMLErrorUnexpectedToken: return _X("Error: Unexpected token found");
  143. case eXMLErrorNoElements: return _X("Error: No elements found");
  144. case eXMLErrorFileNotFound: return _X("Error: File not found");
  145. case eXMLErrorFirstTagNotFound: return _X("Error: First Tag not found");
  146. case eXMLErrorUnknownCharacterEntity:return _X("Error: Unknown character entity");
  147. case eXMLErrorCharConversionError: return _X("Error: unable to convert between WideChar and MultiByte chars");
  148. case eXMLErrorCannotOpenWriteFile: return _X("Error: unable to open file for writing");
  149. case eXMLErrorCannotWriteFile: return _X("Error: cannot write into file");
  150. case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _X("Warning: Base64-string length is not a multiple of 4");
  151. case eXMLErrorBase64DecodeTruncatedData: return _X("Warning: Base64-string is truncated");
  152. case eXMLErrorBase64DecodeIllegalCharacter: return _X("Error: Base64-string contains an illegal character");
  153. case eXMLErrorBase64DecodeBufferTooSmall: return _X("Error: Base64 decode output buffer is too small");
  154. };
  155. return _X("Unknown");
  156. }
  157. /////////////////////////////////////////////////////////////////////////
  158. // Here start the abstraction layer to be OS-independent //
  159. /////////////////////////////////////////////////////////////////////////
  160. // Here is an abstraction layer to access some common string manipulation functions.
  161. // The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0,
  162. // Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++.
  163. // If you plan to "port" the library to a new system/compiler, all you have to do is
  164. // to edit the following lines.
  165. #ifdef XML_NO_WIDE_CHAR
  166. char myIsTextWideChar(const void *b, int len) { return FALSE; }
  167. #else
  168. #if defined (UNDER_CE) || !defined(_XMLWINDOWS)
  169. char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode
  170. {
  171. #ifdef sun
  172. // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer.
  173. if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE;
  174. #endif
  175. const wchar_t *s=(const wchar_t*)b;
  176. // buffer too small:
  177. if (len<(int)sizeof(wchar_t)) return FALSE;
  178. // odd length test
  179. if (len&1) return FALSE;
  180. /* only checks the first 256 characters */
  181. len=mmin(256,len/sizeof(wchar_t));
  182. // Check for the special byte order:
  183. if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
  184. if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE
  185. // checks for ASCII characters in the UNICODE stream
  186. int i,stats=0;
  187. for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
  188. if (stats>len/2) return TRUE;
  189. // Check for UNICODE NULL chars
  190. for (i=0; i<len; i++) if (!s[i]) return TRUE;
  191. return FALSE;
  192. }
  193. #else
  194. char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); };
  195. #endif
  196. #endif
  197. #ifdef _XMLWINDOWS
  198. // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET,
  199. #ifdef _XMLWIDECHAR
  200. wchar_t *myMultiByteToWideChar(const char *s)
  201. {
  202. int i;
  203. if (characterEncoding==XMLNode::encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0);
  204. else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0);
  205. if (i<0) return NULL;
  206. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR));
  207. if (characterEncoding==XMLNode::encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i);
  208. else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i);
  209. d[i]=0;
  210. return d;
  211. }
  212. static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); }
  213. static inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); }
  214. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);}
  215. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
  216. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); }
  217. static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
  218. static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
  219. #else
  220. char *myWideCharToMultiByte(const wchar_t *s)
  221. {
  222. UINT codePage=CP_ACP; if (characterEncoding==XMLNode::encoding_UTF8) codePage=CP_UTF8;
  223. int i=(int)WideCharToMultiByte(codePage, // code page
  224. 0, // performance and mapping flags
  225. s, // wide-character string
  226. -1, // number of chars in string
  227. NULL, // buffer for new string
  228. 0, // size of buffer
  229. NULL, // default for unmappable chars
  230. NULL // set when default char used
  231. );
  232. if (i<0) return NULL;
  233. char *d=(char*)malloc(i+1);
  234. WideCharToMultiByte(codePage, // code page
  235. 0, // performance and mapping flags
  236. s, // wide-character string
  237. -1, // number of chars in string
  238. d, // buffer for new string
  239. i, // size of buffer
  240. NULL, // default for unmappable chars
  241. NULL // set when default char used
  242. );
  243. d[i]=0;
  244. return d;
  245. }
  246. static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
  247. static inline int xstrlen(XMLCSTR c) { return (int)strlen(c); }
  248. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);}
  249. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
  250. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); }
  251. static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
  252. static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
  253. #endif
  254. #ifdef __BORLANDC__
  255. static inline int _strnicmp(char *c1, char *c2, int l){ return strnicmp(c1,c2,l);}
  256. #endif
  257. #else
  258. // for gcc and CC
  259. #ifdef XML_NO_WIDE_CHAR
  260. char *myWideCharToMultiByte(const wchar_t *s) { return NULL; }
  261. #else
  262. char *myWideCharToMultiByte(const wchar_t *s)
  263. {
  264. const wchar_t *ss=s;
  265. int i=(int)wcsrtombs(NULL,&ss,0,NULL);
  266. if (i<0) return NULL;
  267. char *d=(char *)malloc(i+1);
  268. wcsrtombs(d,&s,i,NULL);
  269. d[i]=0;
  270. return d;
  271. }
  272. #endif
  273. #ifdef _XMLWIDECHAR
  274. wchar_t *myMultiByteToWideChar(const char *s)
  275. {
  276. const char *ss=s;
  277. int i=(int)mbsrtowcs(NULL,&ss,0,NULL);
  278. if (i<0) return NULL;
  279. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t));
  280. mbsrtowcs(d,&s,i,NULL);
  281. d[i]=0;
  282. return d;
  283. }
  284. int xstrlen(XMLCSTR c) { return wcslen(c); }
  285. #ifdef sun
  286. // for CC
  287. #include <widec.h>
  288. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);}
  289. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);}
  290. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); }
  291. #else
  292. // for gcc
  293. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);}
  294. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);}
  295. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); }
  296. #endif
  297. static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
  298. static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
  299. static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode)
  300. {
  301. char *filenameAscii=myWideCharToMultiByte(filename);
  302. FILE *f;
  303. if (mode[0]==_X('r')) f=fopen(filenameAscii,"rb");
  304. else f=fopen(filenameAscii,"wb");
  305. free(filenameAscii);
  306. return f;
  307. }
  308. #else
  309. static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
  310. static inline int xstrlen(XMLCSTR c) { return strlen(c); }
  311. static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);}
  312. static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);}
  313. static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); }
  314. static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
  315. static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
  316. #endif
  317. static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);}
  318. #endif
  319. /////////////////////////////////////////////////////////////////////////
  320. // the "openFileHelper" function //
  321. /////////////////////////////////////////////////////////////////////////
  322. // Since each application has its own way to report and deal with errors, you should modify & rewrite
  323. // the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs.
  324. XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
  325. {
  326. // guess the value of the global parameter "characterEncoding"
  327. // (the guess is based on the first 200 bytes of the file).
  328. FILE *f=xfopen(filename,_X("rb"));
  329. if (f)
  330. {
  331. char bb[205];
  332. int l=(int)fread(bb,1,200,f);
  333. setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace);
  334. fclose(f);
  335. }
  336. // parse the file
  337. XMLResults pResults;
  338. XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);
  339. // display error message (if any)
  340. if (pResults.error != eXMLErrorNone)
  341. {
  342. // create message
  343. char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_X("");
  344. if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; }
  345. sprintf(message,
  346. #ifdef _XMLWIDECHAR
  347. "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
  348. #else
  349. "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
  350. #endif
  351. ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);
  352. // display message
  353. #if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_)
  354. MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);
  355. #else
  356. printf("%s",message);
  357. #endif
  358. exit(255);
  359. }
  360. return xnode;
  361. }
  362. /////////////////////////////////////////////////////////////////////////
  363. // Here start the core implementation of the XMLParser library //
  364. /////////////////////////////////////////////////////////////////////////
  365. // You should normally not change anything below this point.
  366. #ifndef _XMLWIDECHAR
  367. // If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte.
  368. // If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes).
  369. // If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes).
  370. // This table is used as lookup-table to know the length of a character (in byte) based on the
  371. // content of the first byte of the character.
  372. // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).
  373. static const char XML_utf8ByteTable[256] =
  374. {
  375. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  376. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  377. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  378. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  379. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  380. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  381. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  382. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  383. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70End of ASCII range
  384. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid
  385. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
  386. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
  387. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
  388. 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte
  389. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  390. 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte
  391. 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
  392. };
  393. static const char XML_asciiByteTable[256] =
  394. {
  395. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  396. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  397. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  398. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  399. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  400. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
  401. };
  402. static const char XML_sjisByteTable[256] =
  403. {
  404. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  405. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  406. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  407. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  408. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  409. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  410. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  411. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  412. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range
  413. 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes
  414. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90
  415. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
  416. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
  417. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0
  418. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0
  419. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes
  420. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0
  421. };
  422. static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8"
  423. #endif
  424. XMLNode XMLNode::emptyXMLNode;
  425. XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};
  426. XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};
  427. // Enumeration used to decipher what type a token is
  428. typedef enum XMLTokenTypeTag
  429. {
  430. eTokenText = 0,
  431. eTokenQuotedText,
  432. eTokenTagStart, /* "<" */
  433. eTokenTagEnd, /* "</" */
  434. eTokenCloseTag, /* ">" */
  435. eTokenEquals, /* "=" */
  436. eTokenDeclaration, /* "<?" */
  437. eTokenShortHandClose, /* "/>" */
  438. eTokenClear,
  439. eTokenError
  440. } XMLTokenType;
  441. // Main structure used for parsing XML
  442. typedef struct XML
  443. {
  444. XMLCSTR lpXML;
  445. XMLCSTR lpszText;
  446. int nIndex,nIndexMissigEndTag;
  447. enum XMLError error;
  448. XMLCSTR lpEndTag;
  449. int cbEndTag;
  450. XMLCSTR lpNewElement;
  451. int cbNewElement;
  452. int nFirst;
  453. } XML;
  454. typedef struct
  455. {
  456. ALLXMLClearTag *pClr;
  457. XMLCSTR pStr;
  458. } NextToken;
  459. // Enumeration used when parsing attributes
  460. typedef enum Attrib
  461. {
  462. eAttribName = 0,
  463. eAttribEquals,
  464. eAttribValue
  465. } Attrib;
  466. // Enumeration used when parsing elements to dictate whether we are currently
  467. // inside a tag
  468. typedef enum Status
  469. {
  470. eInsideTag = 0,
  471. eOutsideTag
  472. } Status;
  473. XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const
  474. {
  475. if (!d) return eXMLErrorNone;
  476. FILE *f=xfopen(filename,_X("wb"));
  477. if (!f) return eXMLErrorCannotOpenWriteFile;
  478. #ifdef _XMLWIDECHAR
  479. unsigned char h[2]={ 0xFF, 0xFE };
  480. if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile;
  481. if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
  482. {
  483. if (!fwrite(_X("<?xml version=\"1.0\" encoding=\"utf-16\"?>\n"),sizeof(wchar_t)*40,1,f))
  484. return eXMLErrorCannotWriteFile;
  485. }
  486. #else
  487. if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration())))
  488. {
  489. if (characterEncoding==encoding_UTF8)
  490. {
  491. // header so that windows recognize the file as UTF-8:
  492. unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
  493. encoding="utf-8";
  494. } else if (characterEncoding==encoding_ShiftJIS) encoding="SHIFT-JIS";
  495. if (!encoding) encoding="ISO-8859-1";
  496. if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) return eXMLErrorCannotWriteFile;
  497. } else
  498. {
  499. if (characterEncoding==encoding_UTF8)
  500. {
  501. unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
  502. }
  503. }
  504. #endif
  505. int i;
  506. XMLSTR t=createXMLString(nFormat,&i);
  507. if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile;
  508. if (fclose(f)!=0) return eXMLErrorCannotWriteFile;
  509. free(t);
  510. return eXMLErrorNone;
  511. }
  512. // Duplicate a given string.
  513. XMLSTR stringDup(XMLCSTR lpszData, int cbData)
  514. {
  515. if (lpszData==NULL) return NULL;
  516. XMLSTR lpszNew;
  517. if (cbData==0) cbData=(int)xstrlen(lpszData);
  518. lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));
  519. if (lpszNew)
  520. {
  521. memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));
  522. lpszNew[cbData] = (XMLCHAR)NULL;
  523. }
  524. return lpszNew;
  525. }
  526. XMLSTR toXMLStringUnSafe(XMLSTR dest,XMLCSTR source)
  527. {
  528. XMLSTR dd=dest;
  529. XMLCHAR ch;
  530. XMLCharacterEntity *entity;
  531. while ((ch=*source))
  532. {
  533. entity=XMLEntities;
  534. do
  535. {
  536. if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }
  537. entity++;
  538. } while(entity->s);
  539. #ifdef _XMLWIDECHAR
  540. *(dest++)=*(source++);
  541. #else
  542. switch(XML_ByteTable[(unsigned char)ch])
  543. {
  544. case 4: *(dest++)=*(source++);
  545. case 3: *(dest++)=*(source++);
  546. case 2: *(dest++)=*(source++);
  547. case 1: *(dest++)=*(source++);
  548. }
  549. #endif
  550. out_of_loop1:
  551. ;
  552. }
  553. *dest=0;
  554. return dd;
  555. }
  556. // private (used while rendering):
  557. int lengthXMLString(XMLCSTR source)
  558. {
  559. int r=0;
  560. XMLCharacterEntity *entity;
  561. XMLCHAR ch;
  562. while ((ch=*source))
  563. {
  564. entity=XMLEntities;
  565. do
  566. {
  567. if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }
  568. entity++;
  569. } while(entity->s);
  570. #ifdef _XMLWIDECHAR
  571. r++; source++;
  572. #else
  573. ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;
  574. #endif
  575. out_of_loop1:
  576. ;
  577. }
  578. return r;
  579. }
  580. ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); }
  581. void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; }
  582. XMLSTR ToXMLStringTool::toXML(XMLCSTR source)
  583. {
  584. int l=lengthXMLString(source)+1;
  585. if (l>buflen) { buflen=l; buf=(XMLSTR)realloc(buf,l*sizeof(XMLCHAR)); }
  586. return toXMLStringUnSafe(buf,source);
  587. }
  588. // private:
  589. XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
  590. {
  591. // This function is the opposite of the function "toXMLString". It decodes the escape
  592. // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
  593. // &,",',<,>. This function is used internally by the XML Parser. All the calls to
  594. // the XML library will always gives you back "decoded" strings.
  595. //
  596. // in: string (s) and length (lo) of string
  597. // out: new allocated string converted from xml
  598. if (!s) return NULL;
  599. int ll=0,j;
  600. XMLSTR d;
  601. XMLCSTR ss=s;
  602. XMLCharacterEntity *entity;
  603. while ((lo>0)&&(*s))
  604. {
  605. if (*s==_X('&'))
  606. {
  607. if ((lo>2)&&(s[1]==_X('#')))
  608. {
  609. s+=2; lo-=2;
  610. if ((*s==_X('X'))||(*s==_X('x'))) { s++; lo--; }
  611. while ((*s)&&(*s!=_X(';'))&&((lo--)>0)) s++;
  612. if (*s!=_X(';'))
  613. {
  614. pXML->error=eXMLErrorUnknownCharacterEntity;
  615. return NULL;
  616. }
  617. s++; lo--;
  618. } else
  619. {
  620. entity=XMLEntities;
  621. do
  622. {
  623. if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }
  624. entity++;
  625. } while(entity->s);
  626. if (!entity->s)
  627. {
  628. pXML->error=eXMLErrorUnknownCharacterEntity;
  629. return NULL;
  630. }
  631. }
  632. } else
  633. {
  634. #ifdef _XMLWIDECHAR
  635. s++; lo--;
  636. #else
  637. j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;
  638. #endif
  639. }
  640. ll++;
  641. }
  642. d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));
  643. s=d;
  644. while (ll-->0)
  645. {
  646. if (*ss==_X('&'))
  647. {
  648. if (ss[1]==_X('#'))
  649. {
  650. ss+=2; j=0;
  651. if ((*ss==_X('X'))||(*ss==_X('x')))
  652. {
  653. ss++;
  654. while (*ss!=_X(';'))
  655. {
  656. if ((*ss>=_X('0'))&&(*ss<=_X('9'))) j=(j<<4)+*ss-_X('0');
  657. else if ((*ss>=_X('A'))&&(*ss<=_X('F'))) j=(j<<4)+*ss-_X('A')+10;
  658. else if ((*ss>=_X('a'))&&(*ss<=_X('f'))) j=(j<<4)+*ss-_X('a')+10;
  659. else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
  660. ss++;
  661. }
  662. } else
  663. {
  664. while (*ss!=_X(';'))
  665. {
  666. if ((*ss>=_X('0'))&&(*ss<=_X('9'))) j=(j*10)+*ss-_X('0');
  667. else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;}
  668. ss++;
  669. }
  670. }
  671. (*d++)=(XMLCHAR)j; ss++;
  672. } else
  673. {
  674. entity=XMLEntities;
  675. do
  676. {
  677. if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; }
  678. entity++;
  679. } while(entity->s);
  680. }
  681. } else
  682. {
  683. #ifdef _XMLWIDECHAR
  684. *(d++)=*(ss++);
  685. #else
  686. switch(XML_ByteTable[(unsigned char)*ss])
  687. {
  688. case 4: *(d++)=*(ss++); ll--;
  689. case 3: *(d++)=*(ss++); ll--;
  690. case 2: *(d++)=*(ss++); ll--;
  691. case 1: *(d++)=*(ss++);
  692. }
  693. #endif
  694. }
  695. }
  696. *d=0;
  697. return (XMLSTR)s;
  698. }
  699. #define XML_isSPACECHAR(ch) ((ch==_X('\n'))||(ch==_X(' '))||(ch== _X('\t'))||(ch==_X('\r')))
  700. // private:
  701. char myTagCompare(XMLCSTR cclose, XMLCSTR copen)
  702. // !!!! WARNING strange convention&:
  703. // return 0 if equals
  704. // return 1 if different
  705. {
  706. if (!cclose) return 1;
  707. int l=(int)xstrlen(cclose);
  708. if (xstrnicmp(cclose, copen, l)!=0) return 1;
  709. const XMLCHAR c=copen[l];
  710. if (XML_isSPACECHAR(c)||
  711. (c==_X('/' ))||
  712. (c==_X('<' ))||
  713. (c==_X('>' ))||
  714. (c==_X('=' ))) return 0;
  715. return 1;
  716. }
  717. // Obtain the next character from the string.
  718. static inline XMLCHAR getNextChar(XML *pXML)
  719. {
  720. XMLCHAR ch = pXML->lpXML[pXML->nIndex];
  721. #ifdef _XMLWIDECHAR
  722. if (ch!=0) pXML->nIndex++;
  723. #else
  724. pXML->nIndex+=XML_ByteTable[(unsigned char)ch];
  725. #endif
  726. return ch;
  727. }
  728. // Find the next token in a string.
  729. // pcbToken contains the number of characters that have been read.
  730. static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType)
  731. {
  732. NextToken result;
  733. XMLCHAR ch;
  734. XMLCHAR chTemp;
  735. int indexStart,nFoundMatch,nIsText=FALSE;
  736. result.pClr=NULL; // prevent warning
  737. // Find next non-white space character
  738. do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch);
  739. if (ch)
  740. {
  741. // Cache the current string pointer
  742. result.pStr = &pXML->lpXML[indexStart];
  743. // First check whether the token is in the clear tag list (meaning it
  744. // does not need formatting).
  745. ALLXMLClearTag *ctag=XMLClearTags;
  746. do
  747. {
  748. if (xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0)
  749. {
  750. result.pClr=ctag;
  751. pXML->nIndex+=ctag->openTagLen-1;
  752. *pType=eTokenClear;
  753. return result;
  754. }
  755. ctag++;
  756. } while(ctag->lpszOpen);
  757. // If we didn't find a clear tag then check for standard tokens
  758. switch(ch)
  759. {
  760. // Check for quotes
  761. case _X('\''):
  762. case _X('\"'):
  763. // Type of token
  764. *pType = eTokenQuotedText;
  765. chTemp = ch;
  766. // Set the size
  767. nFoundMatch = FALSE;
  768. // Search through the string to find a matching quote
  769. while((ch = getNextChar(pXML)))
  770. {
  771. if (ch==chTemp) { nFoundMatch = TRUE; break; }
  772. if (ch==_X('<')) break;
  773. }
  774. // If we failed to find a matching quote
  775. if (nFoundMatch == FALSE)
  776. {
  777. pXML->nIndex=indexStart+1;
  778. nIsText=TRUE;
  779. break;
  780. }
  781. // 4.02.2002
  782. // if (FindNonWhiteSpace(pXML)) pXML->nIndex--;
  783. break;
  784. // Equals (used with attribute values)
  785. case _X('='):
  786. *pType = eTokenEquals;
  787. break;
  788. // Close tag
  789. case _X('>'):
  790. *pType = eTokenCloseTag;
  791. break;
  792. // Check for tag start and tag end
  793. case _X('<'):
  794. // Peek at the next character to see if we have an end tag '</',
  795. // or an xml declaration '<?'
  796. chTemp = pXML->lpXML[pXML->nIndex];
  797. // If we have a tag end...
  798. if (chTemp == _X('/'))
  799. {
  800. // Set the type and ensure we point at the next character
  801. getNextChar(pXML);
  802. *pType = eTokenTagEnd;
  803. }
  804. // If we have an XML declaration tag
  805. else if (chTemp == _X('?'))
  806. {
  807. // Set the type and ensure we point at the next character
  808. getNextChar(pXML);
  809. *pType = eTokenDeclaration;
  810. }
  811. // Otherwise we must have a start tag
  812. else
  813. {
  814. *pType = eTokenTagStart;
  815. }
  816. break;
  817. // Check to see if we have a short hand type end tag ('/>').
  818. case _X('/'):
  819. // Peek at the next character to see if we have a short end tag '/>'
  820. chTemp = pXML->lpXML[pXML->nIndex];
  821. // If we have a short hand end tag...
  822. if (chTemp == _X('>'))
  823. {
  824. // Set the type and ensure we point at the next character
  825. getNextChar(pXML);
  826. *pType = eTokenShortHandClose;
  827. break;
  828. }
  829. // If we haven't found a short hand closing tag then drop into the
  830. // text process
  831. // Other characters
  832. default:
  833. nIsText = TRUE;
  834. }
  835. // If this is a TEXT node
  836. if (nIsText)
  837. {
  838. // Indicate we are dealing with text
  839. *pType = eTokenText;
  840. while((ch = getNextChar(pXML)))
  841. {
  842. if XML_isSPACECHAR(ch)
  843. {
  844. indexStart++; break;
  845. } else if (ch==_X('/'))
  846. {
  847. // If we find a slash then this maybe text or a short hand end tag
  848. // Peek at the next character to see it we have short hand end tag
  849. ch=pXML->lpXML[pXML->nIndex];
  850. // If we found a short hand end tag then we need to exit the loop
  851. if (ch==_X('>')) { pXML->nIndex--; break; }
  852. } else if ((ch==_X('<'))||(ch==_X('>'))||(ch==_X('=')))
  853. {
  854. pXML->nIndex--; break;
  855. }
  856. }
  857. }
  858. *pcbToken = pXML->nIndex-indexStart;
  859. } else
  860. {
  861. // If we failed to obtain a valid character
  862. *pcbToken = 0;
  863. *pType = eTokenError;
  864. result.pStr=NULL;
  865. }
  866. return result;
  867. }
  868. XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName)
  869. {
  870. if (!d) { free(lpszName); return NULL; }
  871. if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName);
  872. d->lpszName=lpszName;
  873. return lpszName;
  874. }
  875. // private:
  876. XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; }
  877. XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration)
  878. {
  879. d=(XMLNodeData*)malloc(sizeof(XMLNodeData));
  880. d->ref_count=1;
  881. d->lpszName=NULL;
  882. d->nChild= 0;
  883. d->nText = 0;
  884. d->nClear = 0;
  885. d->nAttribute = 0;
  886. d->isDeclaration = isDeclaration;
  887. d->pParent = pParent;
  888. d->pChild= NULL;
  889. d->pText= NULL;
  890. d->pClear= NULL;
  891. d->pAttribute= NULL;
  892. d->pOrder= NULL;
  893. updateName_WOSD(lpszName);
  894. }
  895. XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); }
  896. XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); }
  897. #define MEMORYINCREASE 50
  898. static inline void myFree(void *p) { if (p) free(p); };
  899. static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem)
  900. {
  901. if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); }
  902. if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem);
  903. // if (!p)
  904. // {
  905. // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220);
  906. // }
  907. return p;
  908. }
  909. // private:
  910. XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype)
  911. {
  912. if (index<0) return -1;
  913. int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i;
  914. }
  915. // private:
  916. // update "order" information when deleting a content of a XMLNode
  917. int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
  918. {
  919. int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t);
  920. memmove(o+i, o+i+1, (n-i)*sizeof(int));
  921. for (;i<n;i++)
  922. if ((o[i]&3)==(int)t) o[i]-=4;
  923. // We should normally do:
  924. // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));
  925. // but we skip reallocation because it's too time consuming.
  926. // Anyway, at the end, it will be free'd completely at once.
  927. return i;
  928. }
  929. void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype)
  930. {
  931. // in: *_pos is the position inside d->pOrder ("-1" means "EndOf")
  932. // out: *_pos is the index inside p
  933. p=myRealloc(p,(nc+1),memoryIncrease,size);
  934. int n=d->nChild+d->nText+d->nClear;
  935. d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int));
  936. int pos=*_pos,*o=d->pOrder;
  937. if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
  938. int i=pos;
  939. memmove(o+i+1, o+i, (n-i)*sizeof(int));
  940. while ((pos<n)&&((o[pos]&3)!=(int)xtype)) pos++;
  941. if (pos==n) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
  942. o[i]=o[pos];
  943. for (i=pos+1;i<=n;i++) if ((o[i]&3)==(int)xtype) o[i]+=4;
  944. *_pos=pos=o[pos]>>2;
  945. memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size);
  946. return p;
  947. }
  948. // Add a child node to the given element.
  949. XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos)
  950. {
  951. if (!lpszName) return emptyXMLNode;
  952. d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);
  953. d->pChild[pos].d=NULL;
  954. d->pChild[pos]=XMLNode(d,lpszName,isDeclaration);
  955. d->nChild++;
  956. return d->pChild[pos];
  957. }
  958. // Add an attribute to an element.
  959. XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev)
  960. {
  961. if (!lpszName) return &emptyXMLAttribute;
  962. if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; }
  963. int nc=d->nAttribute;
  964. d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute));
  965. XMLAttribute *pAttr=d->pAttribute+nc;
  966. pAttr->lpszName = lpszName;
  967. pAttr->lpszValue = lpszValuev;
  968. d->nAttribute++;
  969. return pAttr;
  970. }
  971. // Add text to the element.
  972. XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos)
  973. {
  974. if (!lpszValue) return NULL;
  975. if (!d) { myFree(lpszValue); return NULL; }
  976. d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText);
  977. d->pText[pos]=lpszValue;
  978. d->nText++;
  979. return lpszValue;
  980. }
  981. // Add clear (unformatted) text to the element.
  982. XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos)
  983. {
  984. if (!lpszValue) return &emptyXMLClear;
  985. if (!d) { myFree(lpszValue); return &emptyXMLClear; }
  986. d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear);
  987. XMLClear *pNewClear=d->pClear+pos;
  988. pNewClear->lpszValue = lpszValue;
  989. if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen;
  990. if (!lpszClose) lpszClose=XMLClearTags->lpszClose;
  991. pNewClear->lpszOpenTag = lpszOpen;
  992. pNewClear->lpszCloseTag = lpszClose;
  993. d->nClear++;
  994. return pNewClear;
  995. }
  996. // private:
  997. // Parse a clear (unformatted) type node.
  998. char XMLNode::parseClearTag(void *px, void *_pClear)
  999. {
  1000. XML *pXML=(XML *)px;
  1001. ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear);
  1002. int cbTemp=0;
  1003. XMLCSTR lpszTemp=NULL;
  1004. XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex];
  1005. static XMLCSTR docTypeEnd=_X("]>");
  1006. // Find the closing tag
  1007. // Seems the <!DOCTYPE need a better treatment so lets handle it
  1008. if (pClear.lpszOpen==XMLClearTags[1].lpszOpen)
  1009. {
  1010. XMLCSTR pCh=lpXML;
  1011. while (*pCh)
  1012. {
  1013. if (*pCh==_X('<')) { pClear.lpszClose=docTypeEnd; lpszTemp=xstrstr(lpXML,docTypeEnd); break; }
  1014. else if (*pCh==_X('>')) { lpszTemp=pCh; break; }
  1015. #ifdef _XMLWIDECHAR
  1016. pCh++;
  1017. #else
  1018. pCh+=XML_ByteTable[(unsigned char)(*pCh)];
  1019. #endif
  1020. }
  1021. } else lpszTemp=xstrstr(lpXML, pClear.lpszClose);
  1022. if (lpszTemp)
  1023. {
  1024. // Cache the size and increment the index
  1025. cbTemp = (int)(lpszTemp - lpXML);
  1026. pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose);
  1027. // Add the clear node to the current element
  1028. addClear_priv(MEMORYINCREASE,stringDup(lpXML,cbTemp), pClear.lpszOpen, pClear.lpszClose,-1);
  1029. return 0;
  1030. }
  1031. // If we failed to find the end tag
  1032. pXML->error = eXMLErrorUnmatchedEndClearTag;
  1033. return 1;
  1034. }
  1035. void XMLNode::exactMemory(XMLNodeData *d)
  1036. {
  1037. if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int));
  1038. if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode));
  1039. if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute));
  1040. if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR));
  1041. if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear));
  1042. }
  1043. char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr)
  1044. {
  1045. XML *pXML=(XML *)pa;
  1046. XMLCSTR lpszText=pXML->lpszText;
  1047. if (!lpszText) return 0;
  1048. if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++;
  1049. int cbText = (int)(tokenPStr - lpszText);
  1050. if (!cbText) { pXML->lpszText=NULL; return 0; }
  1051. if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; }
  1052. if (!cbText) { pXML->lpszText=NULL; return 0; }
  1053. XMLSTR lpt=fromXMLString(lpszText,cbText,pXML);
  1054. if (!lpt) return 1;
  1055. addText_priv(MEMORYINCREASE,lpt,-1);
  1056. pXML->lpszText=NULL;
  1057. return 0;
  1058. }
  1059. // private:
  1060. // Recursively parse an XML element.
  1061. int XMLNode::ParseXMLElement(void *pa)
  1062. {
  1063. XML *pXML=(XML *)pa;
  1064. int cbToken;
  1065. enum XMLTokenTypeTag xtype;
  1066. NextToken token;
  1067. XMLCSTR lpszTemp=NULL;
  1068. int cbTemp=0;
  1069. char nDeclaration;
  1070. XMLNode pNew;
  1071. enum Status status; // inside or outside a tag
  1072. enum Attrib attrib = eAttribName;
  1073. assert(pXML);
  1074. // If this is the first call to the function
  1075. if (pXML->nFirst)
  1076. {
  1077. // Assume we are outside of a tag definition
  1078. pXML->nFirst = FALSE;
  1079. status = eOutsideTag;
  1080. } else
  1081. {
  1082. // If this is not the first call then we should only be called when inside a tag.
  1083. status = eInsideTag;
  1084. }
  1085. // Iterate through the tokens in the document
  1086. for(;;)
  1087. {
  1088. // Obtain the next token
  1089. token = GetNextToken(pXML, &cbToken, &xtype);
  1090. if (xtype != eTokenError)
  1091. {
  1092. // Check the current status
  1093. switch(status)
  1094. {
  1095. // If we are outside of a tag definition
  1096. case eOutsideTag:
  1097. // Check what type of token we obtained
  1098. switch(xtype)
  1099. {
  1100. // If we have found text or quoted text
  1101. case eTokenText:
  1102. case eTokenCloseTag: /* '>' */
  1103. case eTokenShortHandClose: /* '/>' */
  1104. case eTokenQuotedText:
  1105. case eTokenEquals:
  1106. break;
  1107. // If we found a start tag '<' and declarations '<?'
  1108. case eTokenTagStart:
  1109. case eTokenDeclaration:
  1110. // Cache whether this new element is a declaration or not
  1111. nDeclaration = (xtype == eTokenDeclaration);
  1112. // If we have node text then add this to the element
  1113. if (maybeAddTxT(pXML,token.pStr)) return FALSE;
  1114. // Find the name of the tag
  1115. token = GetNextToken(pXML, &cbToken, &xtype);
  1116. // Return an error if we couldn't obtain the next token or
  1117. // it wasnt text
  1118. if (xtype != eTokenText)
  1119. {
  1120. pXML->error = eXMLErrorMissingTagName;
  1121. return FALSE;
  1122. }
  1123. // If we found a new element which is the same as this
  1124. // element then we need to pass this back to the caller..
  1125. #ifdef APPROXIMATE_PARSING
  1126. if (d->lpszName &&
  1127. myTagCompare(d->lpszName, token.pStr) == 0)
  1128. {
  1129. // Indicate to the caller that it needs to create a
  1130. // new element.
  1131. pXML->lpNewElement = token.pStr;
  1132. pXML->cbNewElement = cbToken;
  1133. return TRUE;
  1134. } else
  1135. #endif
  1136. {
  1137. // If the name of the new element differs from the name of
  1138. // the current element we need to add the new element to
  1139. // the current one and recurse
  1140. pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1);
  1141. while (!pNew.isEmpty())
  1142. {
  1143. // Callself to process the new node. If we return
  1144. // FALSE this means we dont have any more
  1145. // processing to do...
  1146. if (!pNew.ParseXMLElement(pXML)) return FALSE;
  1147. else
  1148. {
  1149. // If the call to recurse this function
  1150. // evented in a end tag specified in XML then
  1151. // we need to unwind the calls to this
  1152. // function until we find the appropriate node
  1153. // (the element name and end tag name must
  1154. // match)
  1155. if (pXML->cbEndTag)
  1156. {
  1157. // If we are back at the root node then we
  1158. // have an unmatched end tag
  1159. if (!d->lpszName)
  1160. {
  1161. pXML->error=eXMLErrorUnmatchedEndTag;
  1162. return FALSE;
  1163. }
  1164. // If the end tag matches the name of this
  1165. // element then we only need to unwind
  1166. // once more...
  1167. if (myTagCompare(d->lpszName, pXML->lpEndTag)==0)
  1168. {
  1169. pXML->cbEndTag = 0;
  1170. }
  1171. return TRUE;
  1172. } else
  1173. if (pXML->cbNewElement)
  1174. {
  1175. // If the call indicated a new element is to
  1176. // be created on THIS element.
  1177. // If the name of this element matches the
  1178. // name of the element we need to create
  1179. // then we need to return to the caller
  1180. // and let it process the element.
  1181. if (myTagCompare(d->lpszName, pXML->lpNewElement)==0)
  1182. {
  1183. return TRUE;
  1184. }
  1185. // Add the new element and recurse
  1186. pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1);
  1187. pXML->cbNewElement = 0;
  1188. }
  1189. else
  1190. {
  1191. // If we didn't have a new element to create
  1192. pNew = emptyXMLNode;
  1193. }
  1194. }

Large files files are truncated, but you can click here to view the full file