PageRenderTime 34ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/src/xmlParser.cpp

https://github.com/tristanstcyr/MacFungus-2.0
C++ | 2558 lines | 1981 code | 217 blank | 360 comment | 365 complexity | b1a30150b57a0fcbf5d0495672c92d73 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. /**
  2. ****************************************************************************
  3. * <P> XML.c - implementation file for basic XML parser written in ANSI C++
  4. * for portability. It works by using recursion and a node tree for breaking
  5. * down the elements of an XML document. </P>
  6. *
  7. * @version V2.14
  8. * @author Frank Vanden Berghen
  9. *
  10. * NOTE:
  11. *
  12. * If you add "#define STRICT_PARSING", on the first line of this file
  13. * the parser will see the following XML-stream:
  14. * <a><b>some text</b><b>other text </a>
  15. * as an error. Otherwise, this tring will be equivalent to:
  16. * <a><b>some text</b><b>other text</b></a>
  17. *
  18. * NOTE:
  19. *
  20. * If you add "#define APPROXIMATE_PARSING" on the first line of this file
  21. * the parser will see the following XML-stream:
  22. * <data name="n1">
  23. * <data name="n2">
  24. * <data name="n3" />
  25. * as equivalent to the following XML-stream:
  26. * <data name="n1" />
  27. * <data name="n2" />
  28. * <data name="n3" />
  29. * This can be useful for badly-formed XML-streams but prevent the use
  30. * of the following XML-stream (problem is: tags at contiguous levels
  31. * have the same names):
  32. * <data name="n1">
  33. * <data name="n2">
  34. * <data name="n3" />
  35. * </data>
  36. * </data>
  37. *
  38. * NOTE:
  39. *
  40. * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file
  41. * the "openFileHelper" function will always display error messages inside the
  42. * console instead of inside a message-box-window. Message-box-windows are
  43. * available on windows only.
  44. *
  45. * BSD license:
  46. * Copyright (c) 2002, Frank Vanden Berghen
  47. * All rights reserved.
  48. * Redistribution and use in source and binary forms, with or without
  49. * modification, are permitted provided that the following conditions are met:
  50. *
  51. * * Redistributions of source code must retain the above copyright
  52. * notice, this list of conditions and the following disclaimer.
  53. * * Redistributions in binary form must reproduce the above copyright
  54. * notice, this list of conditions and the following disclaimer in the
  55. * documentation and/or other materials provided with the distribution.
  56. * * Neither the name of the Frank Vanden Berghen nor the
  57. * names of its contributors may be used to endorse or promote products
  58. * derived from this software without specific prior written permission.
  59. *
  60. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
  61. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  62. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  63. * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
  64. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  65. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  66. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  67. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  68. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  69. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  70. *
  71. ****************************************************************************
  72. */
  73. #ifdef WIN32
  74. //#ifdef _DEBUG
  75. //#define _CRTDBG_MAP_ALLOC
  76. //#include <crtdbg.h>
  77. //#endif
  78. #define WIN32_LEAN_AND_MEAN
  79. #ifndef _CRT_SECURE_NO_DEPRECATE
  80. #define _CRT_SECURE_NO_DEPRECATE
  81. #endif
  82. #include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files
  83. // to have "MessageBoxA" to display error messages for openFilHelper
  84. #endif
  85. #include <memory.h>
  86. #include <assert.h>
  87. #include <stdio.h>
  88. #include <string.h>
  89. #include <stdlib.h>
  90. #include "xmlParser.h"
  91. inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }
  92. // You can modify the initialization of the variable "XMLClearTags" below
  93. // to change the clearTags that are currently recognized by the library.
  94. ALLXMLClearTag XMLClearTags[] =
  95. {
  96. { _T("<![CDATA["),9, _T("]]>") },
  97. { _T("<PRE>") ,5, _T("</PRE>") },
  98. { _T("<Script>") ,8, _T("</Script>")},
  99. { _T("<!--") ,4, _T("-->") },
  100. { _T("<!DOCTYPE"),9, _T(">") },
  101. { NULL ,0, NULL }
  102. };
  103. // You can modify the initialization of the variable "XMLEntities" below
  104. // to change the character entities that are currently recognized by the library.
  105. // Additionally, the syntaxes "&#xA0;" and "&#160;" are recognized.
  106. typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;
  107. static XMLCharacterEntity XMLEntities[] =
  108. {
  109. { _T("&amp;" ), 5, _T('&' )},
  110. { _T("&lt;" ), 4, _T('<' )},
  111. { _T("&gt;" ), 4, _T('>' )},
  112. { _T("&quot;"), 6, _T('\"')},
  113. { _T("&apos;"), 6, _T('\'')},
  114. { NULL , 0, '\0' }
  115. };
  116. // When rendering the XMLNode to a string (using the "createXMLString" function),
  117. // you can ask for a beautiful formatting. This formatting is using the
  118. // following indentation character:
  119. #define INDENTCHAR _T('\t')
  120. // The following function parses the XML errors into a user friendly string.
  121. // You can edit this to change the output language of the library to something else.
  122. XMLCSTR XMLNode::getError(XMLError xerror)
  123. {
  124. switch (xerror)
  125. {
  126. case eXMLErrorNone: return _T("No error");
  127. case eXMLErrorMissingEndTag: return _T("Warning: Unmatched end tag");
  128. case eXMLErrorEmpty: return _T("Error: No XML data");
  129. case eXMLErrorFirstNotStartTag: return _T("Error: First token not start tag");
  130. case eXMLErrorMissingTagName: return _T("Error: Missing start tag name");
  131. case eXMLErrorMissingEndTagName: return _T("Error: Missing end tag name");
  132. case eXMLErrorNoMatchingQuote: return _T("Error: Unmatched quote");
  133. case eXMLErrorUnmatchedEndTag: return _T("Error: Unmatched end tag");
  134. case eXMLErrorUnmatchedEndClearTag: return _T("Error: Unmatched clear tag end");
  135. case eXMLErrorUnexpectedToken: return _T("Error: Unexpected token found");
  136. case eXMLErrorInvalidTag: return _T("Error: Invalid tag found");
  137. case eXMLErrorNoElements: return _T("Error: No elements found");
  138. case eXMLErrorFileNotFound: return _T("Error: File not found");
  139. case eXMLErrorFirstTagNotFound: return _T("Error: First Tag not found");
  140. case eXMLErrorUnknownEscapeSequence: return _T("Error: Unknown character entity");
  141. case eXMLErrorCharConversionError: return _T("Error: unable to convert between UNICODE and MultiByte chars");
  142. case eXMLErrorCannotOpenWriteFile: return _T("Error: unable to open file for writing");
  143. case eXMLErrorCannotWriteFile: return _T("Error: cannot write into file");
  144. case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _T("Warning: Base64-string length is not a multiple of 4");
  145. case eXMLErrorBase64DecodeTruncatedData: return _T("Warning: Base64-string is truncated");
  146. case eXMLErrorBase64DecodeIllegalCharacter: return _T("Error: Base64-string contains an illegal character");
  147. case eXMLErrorBase64DecodeBufferTooSmall: return _T("Error: Base64 decode output buffer is too small");
  148. };
  149. return _T("Unknown");
  150. }
  151. #ifndef _XMLUNICODE
  152. // If "strictUTF8Parsing=0" then we assume that all characters have the same length of 1 byte.
  153. // If "strictUTF8Parsing=1" then the characters have different lengths (from 1 byte to 4 bytes).
  154. // This table is used as lookup-table to know the length of a character (in byte) based on the
  155. // content of the first byte of the character.
  156. // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).
  157. static const char XML_utf8ByteTable[256] =
  158. {
  159. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  160. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  161. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  162. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  163. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  164. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  165. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  166. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  167. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70End of ASCII range
  168. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid
  169. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
  170. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
  171. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
  172. 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte
  173. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  174. 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte
  175. 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
  176. };
  177. #endif
  178. // Here is an abstraction layer to access some common string manipulation functions.
  179. // The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0,
  180. // Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++.
  181. // If you plan to "port" the library to a new system/compiler, all you have to do is
  182. // to edit the following lines.
  183. #ifdef WIN32
  184. // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET,
  185. char myIsTextUnicode(const void *b,int l) { return IsTextUnicode((CONST LPVOID)b,l,NULL); };
  186. #ifdef _XMLUNICODE
  187. wchar_t *myMultiByteToWideChar(const char *s,int l)
  188. {
  189. int i=(int)MultiByteToWideChar(CP_ACP, // code page
  190. MB_PRECOMPOSED, // character-type options
  191. s, // string to map
  192. l, // number of bytes in string
  193. NULL, // wide-character buffer
  194. 0); // size of buffer
  195. if (i<0) return NULL;
  196. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR));
  197. MultiByteToWideChar(CP_ACP, // code page
  198. MB_PRECOMPOSED, // character-type options
  199. s, // string to map
  200. l, // number of bytes in string
  201. d, // wide-character buffer
  202. i); // size of buffer
  203. d[i]=0;
  204. return d;
  205. }
  206. #else
  207. char *myWideCharToMultiByte(const wchar_t *s,int l)
  208. {
  209. int i=(int)WideCharToMultiByte(CP_ACP, // code page
  210. 0, // performance and mapping flags
  211. s, // wide-character string
  212. l, // number of chars in string
  213. NULL, // buffer for new string
  214. 0, // size of buffer
  215. NULL, // default for unmappable chars
  216. NULL // set when default char used
  217. );
  218. if (i<0) return NULL;
  219. char *d=(char*)malloc(i+1);
  220. WideCharToMultiByte(CP_ACP, // code page
  221. 0, // performance and mapping flags
  222. s, // wide-character string
  223. l, // number of chars in string
  224. d, // buffer for new string
  225. i, // size of buffer
  226. NULL, // default for unmappable chars
  227. NULL // set when default char used
  228. );
  229. d[i]=0;
  230. return d;
  231. }
  232. #endif
  233. #ifdef __BORLANDC__
  234. int _strnicmp(char *c1, char *c2, int l){ return strnicmp(c1,c2,l);}
  235. #endif
  236. #else
  237. // for gcc and CC
  238. char myIsTextUnicode(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode
  239. {
  240. const wchar_t *s=(const wchar_t*)b;
  241. // buffer too small:
  242. if (len<(int)sizeof(wchar_t)) return FALSE;
  243. // odd length test
  244. if (len&1) return FALSE;
  245. /* only checks the first 256 characters */
  246. len=mmin(256,len/sizeof(wchar_t));
  247. // Check for the special byte order:
  248. if (*s == 0xFFFE) return FALSE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
  249. if (*s == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE
  250. // checks for ASCII characters in the UNICODE stream
  251. int i,stats=0;
  252. for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
  253. if (stats>len/2) return TRUE;
  254. // Check for UNICODE NULL chars
  255. for (i=0; i<len; i++) if (!s[i]) return TRUE;
  256. return FALSE;
  257. }
  258. char *myWideCharToMultiByte(const wchar_t *s, int l)
  259. {
  260. const wchar_t *ss=s;
  261. int i=(int)wcsrtombs(NULL,&ss,0,NULL);
  262. if (i<0) return NULL;
  263. char *d=(char *)malloc(i+1);
  264. wcsrtombs(d,&s,i,NULL);
  265. d[i]=0;
  266. return d;
  267. }
  268. #ifdef _XMLUNICODE
  269. wchar_t *myMultiByteToWideChar(const char *s, int l)
  270. {
  271. const char *ss=s;
  272. int i=(int)mbsrtowcs(NULL,&ss,0,NULL);
  273. if (i<0) return NULL;
  274. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t));
  275. mbsrtowcs(d,&s,l,NULL);
  276. d[i]=0;
  277. return d;
  278. }
  279. int _tcslen(XMLCSTR c) { return wcslen(c); }
  280. #ifdef sun
  281. // for CC
  282. #include <widec.h>
  283. int _tcsnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);}
  284. int _tcsicmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); }
  285. #else
  286. // for gcc
  287. int _tcsnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);}
  288. int _tcsicmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); }
  289. #endif
  290. XMLSTR _tcsstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
  291. XMLSTR _tcscpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
  292. FILE *_tfopen(XMLCSTR filename,XMLCSTR mode)
  293. {
  294. char *filenameAscii=myWideCharToMultiByte(filename,0);
  295. FILE *f;
  296. if (mode[0]==_T('r')) f=fopen(filenameAscii,"rb");
  297. else f=fopen(filenameAscii,"wb");
  298. free(filenameAscii);
  299. return f;
  300. }
  301. #else
  302. FILE *_tfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
  303. int _tcslen(XMLCSTR c) { return strlen(c); }
  304. int _tcsnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);}
  305. int _tcsicmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); }
  306. XMLSTR _tcsstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
  307. XMLSTR _tcscpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
  308. #endif
  309. int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);}
  310. #endif
  311. /////////////////////////////////////////////////////////////////////////
  312. // Here start the core implementation of the XMLParser library //
  313. /////////////////////////////////////////////////////////////////////////
  314. // You should normally not change anything below this point.
  315. // For your own information, I suggest that you read the openFileHelper below:
  316. XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
  317. {
  318. // guess the value of the global parameter "strictUTF8Parsing"
  319. // (the guess is based on the first 200 bytes of the file).
  320. FILE *f=_tfopen(filename,_T("rb"));
  321. if (f)
  322. {
  323. char bb[205];
  324. int l=(int)fread(bb,1,200,f);
  325. setGlobalOptions(1,guessUTF8ParsingParameterValue(bb,l,1));
  326. fclose(f);
  327. }
  328. // parse the file
  329. XMLResults pResults;
  330. XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);
  331. // display error message (if any)
  332. if (pResults.error != eXMLErrorNone)
  333. {
  334. // create message
  335. char message[2000],*s1="",*s3=""; XMLCSTR s2=_T("");
  336. if (pResults.error==eXMLErrorFirstTagNotFound) { s1="First Tag should be '"; s2=tag; s3="'.\n"; }
  337. sprintf(message,
  338. #ifdef _XMLUNICODE
  339. "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
  340. #else
  341. "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
  342. #endif
  343. ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);
  344. // display message
  345. #ifdef WIN32
  346. #ifndef _XMLPARSER_NO_MESSAGEBOX_
  347. MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);
  348. #else
  349. printf("%s",message);
  350. #endif
  351. #else
  352. printf("%s",message);
  353. #endif
  354. exit(255);
  355. }
  356. return xnode;
  357. }
  358. static char guessUnicodeChars=1;
  359. #ifndef _XMLUNICODE
  360. static const char XML_asciiByteTable[256] =
  361. {
  362. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  363. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  364. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  365. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  366. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  367. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
  368. };
  369. static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "strictUTF8Parsing=1"
  370. #endif
  371. XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat)
  372. {
  373. int i;
  374. XMLSTR t=createXMLString(nFormat,&i);
  375. FILE *f=_tfopen(filename,_T("wb"));
  376. if (!f) return eXMLErrorCannotOpenWriteFile;
  377. #ifdef _XMLUNICODE
  378. unsigned char h[2]={ 0xFF, 0xFE };
  379. if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile;
  380. if (!isDeclaration())
  381. {
  382. if (!fwrite(_T("<?xml version=\"1.0\" encoding=\"utf-16\"?>\n"),sizeof(wchar_t)*40,1,f))
  383. return eXMLErrorCannotWriteFile;
  384. }
  385. #else
  386. if (!isDeclaration())
  387. {
  388. if ((!encoding)||(XML_ByteTable==XML_utf8ByteTable))
  389. {
  390. // header so that windows recognize the file as UTF-8:
  391. unsigned char h[3]={0xEF,0xBB,0xBF};
  392. if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
  393. if (!fwrite("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",39,1,f)) return eXMLErrorCannotWriteFile;
  394. }
  395. else
  396. if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) return eXMLErrorCannotWriteFile;
  397. } else
  398. {
  399. if (XML_ByteTable==XML_utf8ByteTable) // test if strictUTF8Parsing==1"
  400. {
  401. unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
  402. }
  403. }
  404. #endif
  405. if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile;
  406. if (fclose(f)!=0) return eXMLErrorCannotWriteFile;
  407. free(t);
  408. return eXMLErrorNone;
  409. }
  410. // Duplicate a given string.
  411. XMLSTR stringDup(XMLCSTR lpszData, int cbData)
  412. {
  413. if (lpszData==NULL) return NULL;
  414. XMLSTR lpszNew;
  415. if (cbData==0) cbData=(int)_tcslen(lpszData);
  416. lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));
  417. if (lpszNew)
  418. {
  419. memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));
  420. lpszNew[cbData] = (XMLCHAR)NULL;
  421. }
  422. return lpszNew;
  423. }
  424. XMLNode XMLNode::emptyXMLNode;
  425. XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};
  426. XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};
  427. // Enumeration used to decipher what type a token is
  428. typedef enum XMLTokenTypeTag
  429. {
  430. eTokenText = 0,
  431. eTokenQuotedText,
  432. eTokenTagStart, /* "<" */
  433. eTokenTagEnd, /* "</" */
  434. eTokenCloseTag, /* ">" */
  435. eTokenEquals, /* "=" */
  436. eTokenDeclaration, /* "<?" */
  437. eTokenShortHandClose, /* "/>" */
  438. eTokenClear,
  439. eTokenError
  440. } XMLTokenType;
  441. // Main structure used for parsing XML
  442. typedef struct XML
  443. {
  444. XMLCSTR lpXML;
  445. int nIndex,nIndexMissigEndTag;
  446. enum XMLError error;
  447. XMLCSTR lpEndTag;
  448. int cbEndTag;
  449. XMLCSTR lpNewElement;
  450. int cbNewElement;
  451. int nFirst;
  452. } XML;
  453. typedef struct
  454. {
  455. ALLXMLClearTag *pClr;
  456. XMLCSTR pStr;
  457. } NextToken;
  458. // Enumeration used when parsing attributes
  459. typedef enum Attrib
  460. {
  461. eAttribName = 0,
  462. eAttribEquals,
  463. eAttribValue
  464. } Attrib;
  465. // Enumeration used when parsing elements to dictate whether we are currently
  466. // inside a tag
  467. typedef enum Status
  468. {
  469. eInsideTag = 0,
  470. eOutsideTag
  471. } Status;
  472. // private (used while rendering):
  473. XMLSTR toXMLString(XMLSTR dest,XMLCSTR source)
  474. {
  475. XMLSTR dd=dest;
  476. XMLCHAR ch;
  477. XMLCharacterEntity *entity;
  478. while ((ch=*source))
  479. {
  480. entity=XMLEntities;
  481. do
  482. {
  483. if (ch==entity->c) {_tcscpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }
  484. entity++;
  485. } while(entity->s);
  486. #ifdef _XMLUNICODE
  487. *(dest++)=*(source++);
  488. #else
  489. switch(XML_ByteTable[(unsigned char)ch])
  490. {
  491. case 4: *(dest++)=*(source++);
  492. case 3: *(dest++)=*(source++);
  493. case 2: *(dest++)=*(source++);
  494. case 1: *(dest++)=*(source++);
  495. }
  496. #endif
  497. out_of_loop1:
  498. ;
  499. }
  500. *dest=0;
  501. return dd;
  502. }
  503. // private (used while rendering):
  504. int lengthXMLString(XMLCSTR source)
  505. {
  506. int r=0;
  507. XMLCharacterEntity *entity;
  508. XMLCHAR ch;
  509. while ((ch=*source))
  510. {
  511. entity=XMLEntities;
  512. do
  513. {
  514. if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }
  515. entity++;
  516. } while(entity->s);
  517. #ifdef _XMLUNICODE
  518. r++; source++;
  519. #else
  520. ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;
  521. #endif
  522. out_of_loop1:
  523. ;
  524. }
  525. return r;
  526. }
  527. XMLSTR toXMLString(XMLCSTR source)
  528. {
  529. XMLSTR dest=(XMLSTR)malloc((lengthXMLString(source)+1)*sizeof(XMLCHAR));
  530. return toXMLString(dest,source);
  531. }
  532. XMLSTR toXMLStringFast(XMLSTR *dest,int *destSz, XMLCSTR source)
  533. {
  534. int l=lengthXMLString(source)+1;
  535. if (l>*destSz) { *destSz=l; *dest=(XMLSTR)realloc(*dest,l*sizeof(XMLCHAR)); }
  536. return toXMLString(*dest,source);
  537. }
  538. // private:
  539. XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
  540. {
  541. // This function is the opposite of the function "toXMLString". It decodes the escape
  542. // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
  543. // &,",',<,>. This function is used internally by the XML Parser. All the calls to
  544. // the XML library will always gives you back "decoded" strings.
  545. //
  546. // in: string (s) and length (lo) of string
  547. // out: new allocated string converted from xml
  548. if (!s) return NULL;
  549. int ll=0,j;
  550. XMLSTR d;
  551. XMLCSTR ss=s;
  552. XMLCharacterEntity *entity;
  553. while ((lo>0)&&(*s))
  554. {
  555. if (*s==_T('&'))
  556. {
  557. if ((lo>2)&&(s[1]==_T('#')))
  558. {
  559. s+=2; lo-=2;
  560. if ((*s==_T('X'))||(*s==_T('x'))) { s++; lo--; }
  561. while ((*s)&&(*s!=_T(';'))&&((lo--)>0)) s++;
  562. if (*s!=_T(';'))
  563. {
  564. pXML->error=eXMLErrorUnknownEscapeSequence;
  565. return NULL;
  566. }
  567. s++; lo--;
  568. } else
  569. {
  570. entity=XMLEntities;
  571. do
  572. {
  573. if ((lo>=entity->l)&&(_tcsnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }
  574. entity++;
  575. } while(entity->s);
  576. if (!entity->s)
  577. {
  578. pXML->error=eXMLErrorUnknownEscapeSequence;
  579. return NULL;
  580. }
  581. }
  582. } else
  583. {
  584. #ifdef _XMLUNICODE
  585. s++; lo--;
  586. #else
  587. j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;
  588. #endif
  589. }
  590. ll++;
  591. }
  592. d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));
  593. s=d;
  594. while (ll-->0)
  595. {
  596. if (*ss==_T('&'))
  597. {
  598. if (ss[1]==_T('#'))
  599. {
  600. ss+=2; j=0;
  601. if ((*ss==_T('X'))||(*ss==_T('x')))
  602. {
  603. ss++;
  604. while (*ss!=_T(';'))
  605. {
  606. if ((*ss>=_T('0'))&&(*ss<=_T('9'))) j=(j<<4)+*ss-_T('0');
  607. else if ((*ss>=_T('A'))&&(*ss<=_T('F'))) j=(j<<4)+*ss-_T('A')+10;
  608. else if ((*ss>=_T('a'))&&(*ss<=_T('f'))) j=(j<<4)+*ss-_T('a')+10;
  609. else { free(d); pXML->error=eXMLErrorUnknownEscapeSequence;return NULL;}
  610. ss++;
  611. }
  612. } else
  613. {
  614. while (*ss!=_T(';'))
  615. {
  616. if ((*ss>=_T('0'))&&(*ss<=_T('9'))) j=(j*10)+*ss-_T('0');
  617. else { free(d); pXML->error=eXMLErrorUnknownEscapeSequence;return NULL;}
  618. ss++;
  619. }
  620. }
  621. (*d++)=(XMLCHAR)j; ss++;
  622. } else
  623. {
  624. entity=XMLEntities;
  625. do
  626. {
  627. if (_tcsnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; }
  628. entity++;
  629. } while(entity->s);
  630. }
  631. } else
  632. {
  633. #ifdef _XMLUNICODE
  634. *(d++)=*(ss++);
  635. #else
  636. switch(XML_ByteTable[(unsigned char)*ss])
  637. {
  638. case 4: *(d++)=*(ss++); ll--;
  639. case 3: *(d++)=*(ss++); ll--;
  640. case 2: *(d++)=*(ss++); ll--;
  641. case 1: *(d++)=*(ss++);
  642. }
  643. #endif
  644. }
  645. }
  646. *d=0;
  647. return (XMLSTR)s;
  648. }
  649. #define XML_isSPACECHAR(ch) ((ch==_T('\n'))||(ch==_T(' '))||(ch== _T('\t'))||(ch==_T('\r')))
  650. // private:
  651. char myTagCompare(XMLCSTR cclose, XMLCSTR copen)
  652. // !!!! WARNING strange convention&:
  653. // return 0 if equals
  654. // return 1 if different
  655. {
  656. if (!cclose) return 1;
  657. int l=(int)_tcslen(cclose);
  658. if (_tcsnicmp(cclose, copen, l)!=0) return 1;
  659. const XMLCHAR c=copen[l];
  660. if (XML_isSPACECHAR(c)||
  661. (c==_T('/' ))||
  662. (c==_T('<' ))||
  663. (c==_T('>' ))||
  664. (c==_T('=' ))) return 0;
  665. return 1;
  666. }
  667. // private:
  668. // update "order" information when deleting a content of a XMLNode
  669. void XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
  670. {
  671. int j=(int)((index<<2)+t),i=0,n=nElement(d)+1, *o=d->pOrder;
  672. while ((o[i]!=j)&&(i<n)) i++;
  673. n--;
  674. memmove(o+i, o+i+1, (n-i)*sizeof(int));
  675. for (;i<n;i++)
  676. if ((o[i]&3)==(int)t) o[i]-=4;
  677. // We should normally do:
  678. // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));
  679. // but we skip reallocation because it's too time consuming.
  680. // Anyway, at the end, it will be free'd completely at once.
  681. }
  682. // Obtain the next character from the string.
  683. static inline XMLCHAR getNextChar(XML *pXML)
  684. {
  685. XMLCHAR ch = pXML->lpXML[pXML->nIndex];
  686. #ifdef _XMLUNICODE
  687. if (ch!=0) pXML->nIndex++;
  688. #else
  689. pXML->nIndex+=XML_ByteTable[(unsigned char)ch];
  690. #endif
  691. return ch;
  692. }
  693. // Find the next token in a string.
  694. // pcbToken contains the number of characters that have been read.
  695. static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType)
  696. {
  697. NextToken result;
  698. XMLCHAR ch;
  699. XMLCHAR chTemp;
  700. int indexStart,nFoundMatch,nIsText=FALSE;
  701. result.pClr=NULL; // prevent warning
  702. // Find next non-white space character
  703. do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch);
  704. if (ch)
  705. {
  706. // Cache the current string pointer
  707. result.pStr = &pXML->lpXML[indexStart];
  708. // First check whether the token is in the clear tag list (meaning it
  709. // does not need formatting).
  710. ALLXMLClearTag *ctag=XMLClearTags;
  711. do
  712. {
  713. if (_tcsnicmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0)
  714. {
  715. result.pClr=ctag;
  716. pXML->nIndex+=ctag->openTagLen-1;
  717. *pType=eTokenClear;
  718. return result;
  719. }
  720. ctag++;
  721. } while(ctag->lpszOpen);
  722. // If we didn't find a clear tag then check for standard tokens
  723. switch(ch)
  724. {
  725. // Check for quotes
  726. case _T('\''):
  727. case _T('\"'):
  728. // Type of token
  729. *pType = eTokenQuotedText;
  730. chTemp = ch;
  731. // Set the size
  732. nFoundMatch = FALSE;
  733. // Search through the string to find a matching quote
  734. while((ch = getNextChar(pXML)))
  735. {
  736. if (ch==chTemp) { nFoundMatch = TRUE; break; }
  737. if (ch==_T('<')) break;
  738. }
  739. // If we failed to find a matching quote
  740. if (nFoundMatch == FALSE)
  741. {
  742. pXML->nIndex=indexStart+1;
  743. nIsText=TRUE;
  744. break;
  745. }
  746. // 4.02.2002
  747. // if (FindNonWhiteSpace(pXML)) pXML->nIndex--;
  748. break;
  749. // Equals (used with attribute values)
  750. case _T('='):
  751. *pType = eTokenEquals;
  752. break;
  753. // Close tag
  754. case _T('>'):
  755. *pType = eTokenCloseTag;
  756. break;
  757. // Check for tag start and tag end
  758. case _T('<'):
  759. // Peek at the next character to see if we have an end tag '</',
  760. // or an xml declaration '<?'
  761. chTemp = pXML->lpXML[pXML->nIndex];
  762. // If we have a tag end...
  763. if (chTemp == _T('/'))
  764. {
  765. // Set the type and ensure we point at the next character
  766. getNextChar(pXML);
  767. *pType = eTokenTagEnd;
  768. }
  769. // If we have an XML declaration tag
  770. else if (chTemp == _T('?'))
  771. {
  772. // Set the type and ensure we point at the next character
  773. getNextChar(pXML);
  774. *pType = eTokenDeclaration;
  775. }
  776. // Otherwise we must have a start tag
  777. else
  778. {
  779. *pType = eTokenTagStart;
  780. }
  781. break;
  782. // Check to see if we have a short hand type end tag ('/>').
  783. case _T('/'):
  784. // Peek at the next character to see if we have a short end tag '/>'
  785. chTemp = pXML->lpXML[pXML->nIndex];
  786. // If we have a short hand end tag...
  787. if (chTemp == _T('>'))
  788. {
  789. // Set the type and ensure we point at the next character
  790. getNextChar(pXML);
  791. *pType = eTokenShortHandClose;
  792. break;
  793. }
  794. // If we haven't found a short hand closing tag then drop into the
  795. // text process
  796. // Other characters
  797. default:
  798. nIsText = TRUE;
  799. }
  800. // If this is a TEXT node
  801. if (nIsText)
  802. {
  803. // Indicate we are dealing with text
  804. *pType = eTokenText;
  805. while((ch = getNextChar(pXML)))
  806. {
  807. if XML_isSPACECHAR(ch)
  808. {
  809. indexStart++; break;
  810. } else if (ch==_T('/'))
  811. {
  812. // If we find a slash then this maybe text or a short hand end tag
  813. // Peek at the next character to see it we have short hand end tag
  814. ch=pXML->lpXML[pXML->nIndex];
  815. // If we found a short hand end tag then we need to exit the loop
  816. if (ch==_T('>')) { pXML->nIndex--; break; }
  817. } else if ((ch==_T('<'))||(ch==_T('>'))||(ch==_T('=')))
  818. {
  819. pXML->nIndex--; break;
  820. }
  821. }
  822. }
  823. *pcbToken = pXML->nIndex-indexStart;
  824. } else
  825. {
  826. // If we failed to obtain a valid character
  827. *pcbToken = 0;
  828. *pType = eTokenError;
  829. result.pStr=NULL;
  830. }
  831. return result;
  832. }
  833. XMLCSTR XMLNode::updateName_WOSD(XMLCSTR lpszName)
  834. {
  835. if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName);
  836. d->lpszName=lpszName;
  837. return lpszName;
  838. }
  839. // private:
  840. XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; }
  841. XMLNode::XMLNode(XMLNodeData *pParent, XMLCSTR lpszName, int isDeclaration)
  842. {
  843. d=(XMLNodeData*)malloc(sizeof(XMLNodeData));
  844. d->ref_count=1;
  845. d->lpszName=NULL;
  846. d->nChild= 0;
  847. d->nText = 0;
  848. d->nClear = 0;
  849. d->nAttribute = 0;
  850. d->isDeclaration = isDeclaration;
  851. d->pParent = pParent;
  852. d->pChild= NULL;
  853. d->pText= NULL;
  854. d->pClear= NULL;
  855. d->pAttribute= NULL;
  856. d->pOrder= NULL;
  857. updateName_WOSD(lpszName);
  858. }
  859. XMLNode XMLNode::createXMLTopNode_WOSD(XMLCSTR lpszName, int isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); }
  860. XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, int isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); }
  861. #define MEMORYINCREASE 50
  862. static int memoryIncrease=0;
  863. static void *myRealloc(void *p, int newsize, int memInc, int sizeofElem)
  864. {
  865. if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); }
  866. if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem);
  867. // if (!p)
  868. // {
  869. // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220);
  870. // }
  871. return p;
  872. }
  873. void XMLNode::addToOrder(int index, int type)
  874. {
  875. int n=nElement();
  876. d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int));
  877. d->pOrder[n]=(index<<2)+type;
  878. }
  879. // Add a child node to the given element.
  880. XMLNode XMLNode::addChild_WOSD(XMLCSTR lpszName, int isDeclaration)
  881. {
  882. if (!lpszName) return emptyXMLNode;
  883. int nc=d->nChild;
  884. d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode));
  885. d->pChild[nc].d=NULL;
  886. d->pChild[nc]=XMLNode(d,lpszName,isDeclaration);
  887. addToOrder(nc,eNodeChild);
  888. d->nChild++;
  889. return d->pChild[nc];
  890. }
  891. // Add an attribute to an element.
  892. XMLAttribute *XMLNode::addAttribute_WOSD(XMLCSTR lpszName, XMLCSTR lpszValuev)
  893. {
  894. if (!lpszName) return &emptyXMLAttribute;
  895. int na=d->nAttribute;
  896. d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(na+1),memoryIncrease,sizeof(XMLAttribute));
  897. XMLAttribute *pAttr=d->pAttribute+na;
  898. pAttr->lpszName = lpszName;
  899. pAttr->lpszValue = lpszValuev;
  900. addToOrder(na,eNodeAttribute);
  901. d->nAttribute++;
  902. return pAttr;
  903. }
  904. // Add text to the element.
  905. XMLCSTR XMLNode::addText_WOSD(XMLCSTR lpszValue)
  906. {
  907. if (!lpszValue) return NULL;
  908. int nt=d->nText;
  909. d->pText=(XMLCSTR*)myRealloc(d->pText,(nt+1),memoryIncrease,sizeof(XMLSTR));
  910. d->pText[nt]=lpszValue;
  911. addToOrder(nt,eNodeText);
  912. d->nText++;
  913. return lpszValue;
  914. }
  915. // Add clear (unformatted) text to the element.
  916. XMLClear *XMLNode::addClear_WOSD(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose)
  917. {
  918. if (!lpszValue) return &emptyXMLClear;
  919. int nc=d->nClear;
  920. d->pClear=(XMLClear *)myRealloc(d->pClear,(nc+1),memoryIncrease,sizeof(XMLClear));
  921. XMLClear *pNewClear=d->pClear+nc;
  922. pNewClear->lpszValue = lpszValue;
  923. pNewClear->lpszOpenTag = lpszOpen;
  924. pNewClear->lpszCloseTag = lpszClose;
  925. addToOrder(nc,eNodeClear);
  926. d->nClear++;
  927. return pNewClear;
  928. }
  929. // Trim the end of the text to remove white space characters.
  930. static void FindEndOfText(XMLCSTR lpszToken, int *pcbText)
  931. {
  932. XMLCHAR ch;
  933. int cbText;
  934. assert(lpszToken);
  935. assert(pcbText);
  936. cbText = (*pcbText)-1;
  937. while(TRUE)
  938. {
  939. assert(cbText >= 0);
  940. ch = lpszToken[cbText];
  941. if XML_isSPACECHAR(ch) cbText--;
  942. else { *pcbText = cbText+1; return; }
  943. }
  944. }
  945. // private:
  946. // Parse a clear (unformatted) type node.
  947. int XMLNode::ParseClearTag(void *px, void *pa)
  948. {
  949. XML *pXML=(XML *)px;
  950. ALLXMLClearTag *pClear=(ALLXMLClearTag *)pa;
  951. int cbTemp = 0;
  952. XMLCSTR lpszTemp;
  953. XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex];
  954. // Find the closing tag
  955. lpszTemp = _tcsstr(lpXML, pClear->lpszClose);
  956. // Iterate through the tokens until we find the closing tag.
  957. if (lpszTemp)
  958. {
  959. // Cache the size and increment the index
  960. cbTemp = (int)(lpszTemp - lpXML);
  961. pXML->nIndex += cbTemp+(int)_tcslen(pClear->lpszClose);
  962. // Add the clear node to the current element
  963. addClear_WOSD(stringDup(lpXML,cbTemp), pClear->lpszOpen, pClear->lpszClose);
  964. return TRUE;
  965. }
  966. // If we failed to find the end tag
  967. pXML->error = eXMLErrorUnmatchedEndClearTag;
  968. return FALSE;
  969. }
  970. void XMLNode::exactMemory(XMLNodeData *d)
  971. {
  972. if (memoryIncrease<=1) return;
  973. if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nAttribute+d->nText+d->nClear)*sizeof(int));
  974. if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode));
  975. if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute));
  976. if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR));
  977. if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear));
  978. }
  979. // private:
  980. // Recursively parse an XML element.
  981. int XMLNode::ParseXMLElement(void *pa)
  982. {
  983. XML *pXML=(XML *)pa;
  984. int cbToken;
  985. enum XMLTokenTypeTag type;
  986. NextToken token;
  987. XMLCSTR lpszTemp=NULL;
  988. int cbTemp;
  989. int nDeclaration;
  990. XMLCSTR lpszText=NULL;
  991. XMLNode pNew;
  992. enum Status status; // inside or outside a tag
  993. enum Attrib attrib = eAttribName;
  994. assert(pXML);
  995. // If this is the first call to the function
  996. if (pXML->nFirst)
  997. {
  998. // Assume we are outside of a tag definition
  999. pXML->nFirst = FALSE;
  1000. status = eOutsideTag;
  1001. } else
  1002. {
  1003. // If this is not the first call then we should only be called when inside a tag.
  1004. status = eInsideTag;
  1005. }
  1006. // Iterate through the tokens in the document
  1007. while(TRUE)
  1008. {
  1009. // Obtain the next token
  1010. token = GetNextToken(pXML, &cbToken, &type);
  1011. if (type != eTokenError)
  1012. {
  1013. // Check the current status
  1014. switch(status)
  1015. {
  1016. // If we are outside of a tag definition
  1017. case eOutsideTag:
  1018. // Check what type of token we obtained
  1019. switch(type)
  1020. {
  1021. // If we have found text or quoted text
  1022. case eTokenText:
  1023. case eTokenCloseTag: /* '>' */
  1024. case eTokenShortHandClose: /* '/>' */
  1025. case eTokenQuotedText:
  1026. case eTokenEquals:
  1027. if (!lpszText)
  1028. {
  1029. lpszText = token.pStr;
  1030. }
  1031. break;
  1032. // If we found a start tag '<' and declarations '<?'
  1033. case eTokenTagStart:
  1034. case eTokenDeclaration:
  1035. // Cache whether this new element is a declaration or not
  1036. nDeclaration = type == eTokenDeclaration;
  1037. // If we have node text then add this to the element
  1038. if (lpszText)
  1039. {
  1040. cbTemp = (int)(token.pStr - lpszText);
  1041. FindEndOfText(lpszText, &cbTemp);
  1042. lpszText=fromXMLString(lpszText,cbTemp,pXML);
  1043. if (!lpszText) return FALSE;
  1044. addText_WOSD(lpszText);
  1045. lpszText=NULL;
  1046. }
  1047. // Find the name of the tag
  1048. token = GetNextToken(pXML, &cbToken, &type);
  1049. // Return an error if we couldn't obtain the next token or
  1050. // it wasnt text
  1051. if (type != eTokenText)
  1052. {
  1053. pXML->error = eXMLErrorMissingTagName;
  1054. return FALSE;
  1055. }
  1056. // If we found a new element which is the same as this
  1057. // element then we need to pass this back to the caller..
  1058. #ifdef APPROXIMATE_PARSING
  1059. if (d->lpszName &&
  1060. myTagCompare(d->lpszName, token.pStr) == 0)
  1061. {
  1062. // Indicate to the caller that it needs to create a
  1063. // new element.
  1064. pXML->lpNewElement = token.pStr;
  1065. pXML->cbNewElement = cbToken;
  1066. return TRUE;
  1067. } else
  1068. #endif
  1069. {
  1070. // If the name of the new element differs from the name of
  1071. // the current element we need to add the new element to
  1072. // the current one and recurse
  1073. pNew = addChild_WOSD(stringDup(token.pStr,cbToken), nDeclaration);
  1074. while (!pNew.isEmpty())
  1075. {
  1076. // Callself to process the new node. If we return
  1077. // FALSE this means we dont have any more
  1078. // processing to do...
  1079. if (!pNew.ParseXMLElement(pXML)) return FALSE;
  1080. else
  1081. {
  1082. // If the call to recurse this function
  1083. // evented in a end tag specified in XML then
  1084. // we need to unwind the calls to this
  1085. // function until we find the appropriate node
  1086. // (the element name and end tag name must
  1087. // match)
  1088. if (pXML->cbEndTag)
  1089. {
  1090. // If we are back at the root node then we
  1091. // have an unmatched end tag
  1092. if (!d->lpszName)
  1093. {
  1094. pXML->error=eXMLErrorUnmatchedEndTag;
  1095. return FALSE;
  1096. }
  1097. // If the end tag matches the name of this
  1098. // element then we only need to unwind
  1099. // once more...
  1100. if (myTagCompare(d->lpszName, pXML->lpEndTag)==0)
  1101. {
  1102. pXML->cbEndTag = 0;
  1103. }
  1104. return TRUE;
  1105. } else
  1106. if (pXML->cbNewElement)
  1107. {
  1108. // If the call indicated a new element is to
  1109. // be created on THIS element.
  1110. // If the name of this element matches the
  1111. // name of the element we need to create
  1112. // then we need to return to the caller
  1113. // and let it process the element.
  1114. if (myTagCompare(d->lpszName, pXML->lpNewElement)==0)
  1115. {
  1116. return TRUE;
  1117. }
  1118. // Add the new element and recurse
  1119. pNew = addChild_WOSD(stringDup(pXML->lpNewElement,pXML->cbNewElement));
  1120. pXML->cbNewElement = 0;
  1121. }
  1122. else
  1123. {
  1124. // If we didn't have a new element to create
  1125. pNew = emptyXMLNode;
  1126. }
  1127. }
  1128. }
  1129. }
  1130. break;
  1131. // If we found an end tag
  1132. case eTokenTagEnd:
  1133. // If we have node text then add this to the element
  1134. if (lpszText)
  1135. {
  1136. cbTemp = (int)(token.pStr - lpszText);
  1137. FindEndOfText(lpszText, &cbTemp);
  1138. lpszText=fromXMLString(lpszText,cbTemp,pXML);
  1139. if (!lpszText) return FALSE;
  1140. addText_WOSD(lpszText);
  1141. lpszText = NULL;
  1142. }
  1143. // Find the name of the end tag
  1144. token = GetNextToken(pXML, &cbTemp, &type);
  1145. // The end tag should be text
  1146. if (type != eTokenText)
  1147. {
  1148. pXML->error = eXMLErrorMissingEndTagName;
  1149. return FALSE;
  1150. }
  1151. lpszTemp = token.pStr;
  1152. // After the end tag we should find a closing tag
  1153. token = GetNextToken(pXML, &cbToken, &type);
  1154. if (type != eTokenCloseTag)
  1155. {
  1156. pXML->error = eXMLErrorMissingEndTagName;
  1157. return FALSE;
  1158. }
  1159. // We need to return to the previous caller. If the name
  1160. // of the tag cannot be found we need to keep returning to
  1161. // caller until we find a match
  1162. if (myTagCompare(d->lpszName, lpszTemp) != 0)
  1163. #ifdef STRICT_PARSING
  1164. {
  1165. pXML->error=eXMLErrorUnmatchedEndTag;
  1166. pXML->nIndexMissigEndTag=pXML->nIndex;
  1167. return FALSE;
  1168. }
  1169. #else
  1170. {
  1171. pXML->error=eXMLErrorMissingEndTag;
  1172. pXML->nIndexMissigEndTag=pXML->nIndex;
  1173. pXML->lpEndTag = lpszTemp;
  1174. pXML->cbEndTag = cbTemp;
  1175. }
  1176. #endif
  1177. // Return to the caller
  1178. exactMemory(d);
  1179. return TRUE;
  1180. // If we found a clear (unformatted) token
  1181. case eTokenClear:
  1182. // If we have node text then add this to the element
  1183. if (lpszText)
  1184. {
  1185. cbTemp = (int)(token.pStr - lpszText);
  1186. FindEndOfText(lpszText, &cbTemp);
  1187. addText_WOSD(stringDup(lpszText,cbTemp));
  1188. lpszText = NULL;
  1189. }
  1190. if (!ParseClearTag(pXML, token.pClr))
  1191. {
  1192. return FALSE;
  1193. }
  1194. break;
  1195. default:
  1196. break;
  1197. }
  1198. break;
  1199. // If we are inside a tag definition we need to search for attributes
  1200. case eInsideTag:
  1201. // Check what part of the attribute (name, equals, value) we
  1202. // are looking for.
  1203. switch(attrib)
  1204. {
  1205. // If we are looking for a new attribute
  1206. case eAttribName:
  1207. // Check what the current token type is
  1208. switch(type)
  1209. {
  1210. // If the current type is text...
  1211. // Eg. 'attribute'
  1212. case eTokenText:
  1213. // Cache the token then indicate that we are next to
  1214. // look for the equals
  1215. lpszTemp = token.pStr;
  1216. cbTemp = cbToken;
  1217. attrib = eAttribEquals;
  1218. break;
  1219. // If we found a closing tag...
  1220. // Eg. '>'
  1221. case eTokenCloseTag:
  1222. // We are now outside the tag
  1223. status = eOutsideTag;
  1224. break;
  1225. // If we found a short hand '/>' closing tag then we can
  1226. // return to the caller
  1227. case eTokenShortHandClose:
  1228. exactMemory(d);
  1229. return TRUE;
  1230. // Errors...
  1231. case eTokenQuotedText: /* '"SomeText"' */
  1232. case eTokenTagStart: /* '<' */
  1233. case eTokenTagEnd: /* '</' */
  1234. case eTokenEquals: /* '=' */
  1235. case eTokenDeclaration: /* '<?' */
  1236. case eTokenClear:
  1237. pXML->error = eXMLErrorUnexpectedToken;
  1238. return FALSE;
  1239. default: break;
  1240. }
  1241. break;
  1242. // If we are looking for an equals
  1243. case eAttribEquals:
  1244. // Check what the current token type is
  1245. switch(type)
  1246. {
  1247. // If the current type is text...
  1248. // Eg. 'Attribute AnotherAttribute'
  1249. case eTokenText:
  1250. // Add the unvalued attribute to the list
  1251. addAttribute_WOSD(stringDup(lpszTemp,cbTemp), NULL);
  1252. // Cache the token then indicate. We are next to
  1253. // look for the equals attribute
  1254. lpszTemp = token.pStr;
  1255. cbTemp = cbToken;
  1256. break;
  1257. // If we found a closing tag 'Attribute >' or a short hand
  1258. // closing tag 'Attribute />'
  1259. case eTokenShortHandClose:
  1260. case eTokenCloseTag:
  1261. // If we are a declaration element '<?' then we need
  1262. // to remove extra closing '?' if it exists
  1263. if (d->isDeclaration &&
  1264. (lpszTemp[cbTemp-1]) == _T('?'))
  1265. {
  1266. cbTemp--;
  1267. }
  1268. if (cbTemp)
  1269. {
  1270. // Add the unvalued attribute to the list
  1271. addAttribute_WOSD(stringDup(lpszTemp,cbTemp), NULL);
  1272. }
  1273. // If this is the end of the tag then return to the caller
  1274. if (type == eTokenShortHandClose)
  1275. {
  1276. exactMemory(d);
  1277. return TRUE;
  1278. }
  1279. // We are now outside the tag
  1280. status = eOutsideTag;
  1281. break;
  1282. // If we found the equals token...
  1283. // Eg. 'Attribute ='
  1284. case eTokenEquals:
  1285. // Indicate that we next need to search for the value
  1286. // for the attribute
  1287. attrib = eAttribValue;
  1288. break;
  1289. // Errors...
  1290. case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/
  1291. case eTokenTagStart: /* 'Attribute <' */
  1292. case eTokenTagEnd: /* 'Attribute </' */
  1293. case eTokenDeclaration: /* 'Attribute <?' */
  1294. case eTokenClear:
  1295. pXML->error = eXMLErrorUnexpectedToken;
  1296. return FALSE;
  1297. default: break;
  1298. }
  1299. break;
  1300. // If we are looking for an attribute value
  1301. case eAttribValue:
  1302. // Check what the current token type is
  1303. switch(type)
  1304. {
  1305. // If the current type is text or quoted text...
  1306. // Eg. 'Attribute = "Value"' or 'Attribute = Value' or
  1307. // 'Attribute = 'Value''.
  1308. case eTokenText:
  1309. case eTokenQuotedText:
  1310. // If we are a declaration element '<?' then we need
  1311. // to remove extra closing '?' if it exists
  1312. if (d->isDeclaration &&
  1313. (token.pStr[cbToken-1]) == _T('?'))
  1314. {
  1315. cbToken--;
  1316. }
  1317. if (cbTemp)
  1318. {
  1319. // Add the valued attribute to the list
  1320. if (type==eTokenQuotedText) { token.pStr++; cbToken-=2; }
  1321. XMLCSTR attrVal=token.pStr;
  1322. if (attrVal)
  1323. {
  1324. attrVal=fromXMLString(attrVal,cbToken,pXML);
  1325. if (!attrVal) return FALSE;
  1326. }
  1327. addAttribute_WOSD(stringDup(lpszTemp,cbTemp),attrVal);
  1328. }
  1329. // Indicate we are searching for a new attribute
  1330. attrib = eAttribName;
  1331. break;
  1332. // Errors...
  1333. case eTokenTagStart: /* 'Attr = <' */
  1334. case eTokenTagEnd: /* 'Attr = </' */
  1335. case eTokenCloseTag: /* 'Attr = >' */
  1336. case eTokenShortHandClose: /* "Attr = />" */
  1337. case eTokenEquals: /* 'Attr = =' */
  1338. case eTokenDeclaration: /* 'Attr = <?' */
  1339. case eTokenClear:
  1340. pXML->error = eXMLErrorUnexpectedToken;
  1341. return FALSE;
  1342. break;
  1343. default: break;
  1344. }
  1345. }
  1346. }
  1347. }
  1348. // If we failed to obtain the next token
  1349. else
  1350. {
  1351. if ((!d->isDeclaration)&&(d->pParent))
  1352. {
  1353. #ifdef STRICT_PARSING
  1354. pXML->error=eXMLErrorUnmatchedEndTag;
  1355. #else
  1356. pXML->error=eXMLErrorMissingEndTag;
  1357. #endif
  1358. pXML->nIndexMissigEndTag=pXML->nIndex;
  1359. }
  1360. return FALSE;
  1361. }
  1362. }
  1363. }
  1364. // Count the number of lines and columns in an XML string.
  1365. static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults)
  1366. {
  1367. XMLCHAR ch;
  1368. assert(lpXML);
  1369. assert(pResults);
  1370. struct XML xml={ lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE };
  1371. pResults->nLine = 1;
  1372. pResults->nColumn = 1;
  1373. while (xml.nIndex<nUpto)
  1374. {
  1375. ch = getNextChar(&xml);
  1376. if (ch != _T('\n')) pResults->nColumn++;
  1377. else
  1378. {
  1379. pResults->nLine++;
  1380. pResults->nColumn=1;
  1381. }
  1382. }
  1383. }
  1384. // Parse XML and return the root element.
  1385. XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults)
  1386. {
  1387. if (!lpszXML)
  1388. {
  1389. if (pResults)
  1390. {
  1391. pResults->error=eXMLErrorNoElements;
  1392. pResults->nLine=0;
  1393. pResults->nColumn=0;
  1394. }
  1395. return emptyXMLNode;
  1396. }
  1397. XMLNode xnode(NULL,NULL,FALSE);
  1398. struct XML xml={ lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE };
  1399. // Create header element
  1400. memoryIncrease=MEMORYINCREASE; xnode.ParseXMLElement(&xml); memoryIncrease=0;
  1401. enum XMLError error = xml.error;
  1402. if ((xnode.nChildNode()==1)&&(xnode.nElement()==1)) xnode=xnode.getChildNode(); // skip the empty node
  1403. // If no error occurred
  1404. if ((error==eXMLErrorNone)||(error==eXMLErrorMissingEndTag))
  1405. {
  1406. if (tag&&_tcslen(tag)&&_tcsicmp(xnode.getName(),tag))
  1407. {
  1408. XMLNode nodeTmp;
  1409. int i=0;
  1410. while (i<xnode.nChildNode())
  1411. {
  1412. nodeTmp=xnode.getChildNode(i);
  1413. if (_tcsicmp(nodeTmp.getName(),tag)==0) break;
  1414. if (nodeTmp.isDeclaration()) { xnode=nodeTmp; i=0; } else i++;
  1415. }
  1416. if (i>=xnode.nChildNode())
  1417. {
  1418. if (pResults)
  1419. {
  1420. pResults->error=eXMLErrorFirstTagNotFound;
  1421. pResults->nLine=0;
  1422. pResults->nColumn=0;
  1423. }
  1424. return emptyXMLNode;
  1425. }
  1426. xnode=nodeTmp;
  1427. }
  1428. } else
  1429. {
  1430. // Cleanup: this will destroy all the nodes
  1431. xnode = emptyXMLNode;
  1432. }
  1433. // If we have been given somewhere to place results
  1434. if (pResults)
  1435. {
  1436. pResults->error = error;
  1437. // If we have an error
  1438. if (error!=eXMLErrorNone)
  1439. {
  1440. if (error==eXMLErrorMissingEndTag) xml.nIndex=xml.nIndexMissigEndTag;
  1441. // Find which line and column it starts on.
  1442. CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults);
  1443. }
  1444. }
  1445. return xnode;
  1446. }
  1447. XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults)
  1448. {
  1449. if (pResults) { pResults->nLine=0; pResults->nColumn=0; }
  1450. FILE *f=_tfopen(filename,_T("rb"));
  1451. if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; }
  1452. fseek(f,0,SEEK_END);
  1453. int l=ftell(f),headerSz=0;
  1454. if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; return emptyXMLNode; }
  1455. fseek(f,0,SEEK_SET);
  1456. unsigned char *buf=(unsigned char*)malloc(l+1);
  1457. fread(buf,l,1,f);
  1458. fclose(f);
  1459. buf[l]=0;
  1460. #ifdef _XMLUNICODE
  1461. if (guessUnicodeChars)
  1462. {
  1463. if (!myIsTextUnicode(buf,l))
  1464. {
  1465. if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3;
  1466. XMLSTR b2=myMultiByteToWideChar((const char*)(buf+headerSz),l-headerSz);
  1467. free(buf); buf=(unsigned char*)b2; headerSz=0;
  1468. } else
  1469. {
  1470. if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2;
  1471. if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2;
  1472. }
  1473. }
  1474. #else
  1475. if (guessUnicodeChars)
  1476. {
  1477. if (myIsTextUnicode(buf,l))
  1478. {
  1479. l/=sizeof(wchar_t);
  1480. if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2;
  1481. if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2;
  1482. char *b2=myWideCharToMultiByte((const wchar_t*)(buf+headerSz),l-headerSz);
  1483. free(buf); buf=(unsigned char*)b2; headerSz=0;
  1484. } else
  1485. {
  1486. if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3;
  1487. }
  1488. }
  1489. #endif
  1490. if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; }
  1491. XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults);
  1492. free(buf);
  1493. return x;
  1494. }
  1495. XMLNodeContents XMLNode::enumContents(int i)
  1496. {
  1497. XMLNodeContents c;
  1498. if (!d) { c.type=eNodeNULL; return c; }
  1499. c.type=(XMLElementType)(d->pOrder[i]&3);
  1500. i=(d->pOrder[i])>>2;
  1501. switch (c.type)
  1502. {
  1503. case eNodeChild: c.child = d->pChild[i]; break;
  1504. case eNodeAttribute: c.attrib= d->pAttribute[i]; break;
  1505. case eNodeText: c.text = d->pText[i]; break;
  1506. case eNodeClear: c.clear = d->pClear[i]; break;
  1507. default: break;
  1508. }
  1509. return c;
  1510. }
  1511. // private:
  1512. void *XMLNode::enumContent(XMLNodeData *pEntry, int i, XMLElementType *nodeType)
  1513. {
  1514. XMLElementType j=(XMLElementType)(pEntry->pOrder[i]&3);
  1515. *nodeType=j;
  1516. i=(pEntry->pOrder[i])>>2;
  1517. switch (j)
  1518. {
  1519. case eNodeChild: return pEntry->pChild[i].d;
  1520. case eNodeAttribute: return pEntry->pAttribute+i;
  1521. case eNodeText: return (void*)(pEntry->pText[i]);
  1522. case eNodeClear: return pEntry->pClear+i;
  1523. default: break;
  1524. }
  1525. return NULL;
  1526. }
  1527. // private:
  1528. int XMLNode::nElement(XMLNodeData *pEntry)
  1529. {
  1530. return pEntry->nChild+pEntry->nText+pEntry->nClear+pEntry->nAttribute;
  1531. }
  1532. static inline void charmemset(XMLSTR dest,XMLCHAR c,int l) { while (l--) *(dest++)=c; }
  1533. // private:
  1534. // Creates an user friendly XML string from a given element with
  1535. // appropriate white space and carriage returns.
  1536. //
  1537. // This recurses through all subnodes then adds contents of the nodes to the
  1538. // string.
  1539. int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat)
  1540. {
  1541. int nResult = 0;
  1542. int cb;
  1543. int cbElement;
  1544. int nIndex;
  1545. int nChildFormat=-1;
  1546. int bHasChildren=FALSE;
  1547. int i;
  1548. XMLAttribute * pAttr;
  1549. assert(pEntry);
  1550. #define LENSTR(lpsz) (lpsz ? _tcslen(lpsz) : 0)
  1551. // If the element has no name then assume this is the head node.
  1552. cbElement = (int)LENSTR(pEntry->lpszName);
  1553. if (cbElement)
  1554. {
  1555. // "<elementname "
  1556. cb = nFormat == -1 ? 0 : nFormat;
  1557. if (lpszMarker)
  1558. {
  1559. if (cb) charmemset(lpszMarker, INDENTCHAR, sizeof(XMLCHAR)*cb);
  1560. nResult = cb;
  1561. lpszMarker[nResult++]=_T('<');
  1562. if (pEntry->isDeclaration) lpszMarker[nResult++]=_T('?');
  1563. _tcscpy(&lpszMarker[nResult], pEntry->lpszName);
  1564. nResult+=cbElement;
  1565. lpszMarker[nResult++]=_T(' ');
  1566. } else
  1567. {
  1568. nResult+=cbElement+2+cb;
  1569. if (pEntry->isDeclaration) nResult++;
  1570. }
  1571. // Enumerate attributes and add them to the string
  1572. nIndex = pEntry->nAttribute; pAttr=pEntry->pAttribute;
  1573. for (i=0; i<nIndex; i++)
  1574. {
  1575. // "Attrib
  1576. cb = (int)LENSTR(pAttr->lpszName);
  1577. if (cb)
  1578. {
  1579. if (lpszMarker) _tcscpy(&lpszMarker[nResult], pAttr->lpszName);
  1580. nResult += cb;
  1581. // "Attrib=Value "
  1582. if (pAttr->lpszValue)
  1583. {
  1584. cb=(int)lengthXMLString(pAttr->lpszValue);
  1585. if (lpszMarker)
  1586. {
  1587. lpszMarker[nResult]=_T('=');
  1588. lpszMarker[nResult+1]=_T('"');
  1589. if (cb) toXMLString(&lpszMarker[nResult+2],pAttr->lpszValue);
  1590. lpszMarker[nResult+cb+2]=_T('"');
  1591. }
  1592. nResult+=cb+3;
  1593. }
  1594. if (lpszMarker) lpszMarker[nResult] = _T(' ');
  1595. nResult++;
  1596. }
  1597. pAttr++;
  1598. }
  1599. bHasChildren=(pEntry->nAttribute!=nElement(pEntry));
  1600. if (pEntry->isDeclaration)
  1601. {
  1602. if (lpszMarker)
  1603. {
  1604. lpszMarker[nResult-1]=_T('?');
  1605. lpszMarker[nResult]=_T('>');
  1606. }
  1607. nResult++;
  1608. if (nFormat!=-1)
  1609. {
  1610. if (lpszMarker) lpszMarker[nResult]=_T('\n');
  1611. nResult++;
  1612. }
  1613. } else
  1614. // If there are child nodes we need to terminate the start tag
  1615. if (bHasChildren)
  1616. {
  1617. if (lpszMarker) lpszMarker[nResult-1]=_T('>');
  1618. if (nFormat!=-1)
  1619. {
  1620. if (lpszMarker) lpszMarker[nResult]=_T('\n');
  1621. nResult++;
  1622. }
  1623. } else nResult--;
  1624. }
  1625. // Calculate the child format for when we recurse. This is used to
  1626. // determine the number of spaces used for prefixes.
  1627. if (nFormat!=-1)
  1628. {
  1629. if (cbElement&&(!pEntry->isDeclaration)) nChildFormat=nFormat+1;
  1630. else nChildFormat=nFormat;
  1631. }
  1632. // Enumerate through remaining children
  1633. nIndex = nElement(pEntry);
  1634. XMLElementType nodeType;
  1635. void *pChild;
  1636. for (i=0; i<nIndex; i++)
  1637. {
  1638. pChild=enumContent(pEntry, i, &nodeType);
  1639. switch(nodeType)
  1640. {
  1641. // Text nodes
  1642. case eNodeText:
  1643. // "Text"
  1644. cb = (int)lengthXMLString((XMLSTR)pChild);
  1645. if (cb)
  1646. {
  1647. if (nFormat!=-1)
  1648. {
  1649. if (lpszMarker)
  1650. {
  1651. charmemset(&lpszMarker[nResult],INDENTCHAR,sizeof(XMLCHAR)*(nFormat + 1));
  1652. toXMLString(&lpszMarker[nResult+nFormat+1],(XMLSTR)pChild);
  1653. lpszMarker[nResult+nFormat+1+cb]=_T('\n');
  1654. }
  1655. nResult+=cb+nFormat+2;
  1656. } else
  1657. {
  1658. if (lpszMarker) toXMLString(&lpszMarker[nResult], (XMLSTR)pChild);
  1659. nResult += cb;
  1660. }
  1661. }
  1662. break;
  1663. // Clear type nodes
  1664. case eNodeClear:
  1665. // "OpenTag"
  1666. cb = (int)LENSTR(((XMLClear*)pChild)->lpszOpenTag);
  1667. if (cb)
  1668. {
  1669. if (nFormat!=-1)
  1670. {
  1671. if (lpszMarker)
  1672. {
  1673. charmemset(&lpszMarker[nResult], INDENTCHAR, sizeof(XMLCHAR)*(nFormat + 1));
  1674. _tcscpy(&lpszMarker[nResult+nFormat+1], ((XMLClear*)pChild)->lpszOpenTag);
  1675. }
  1676. nResult+=cb+nFormat+1;
  1677. }
  1678. else
  1679. {
  1680. if (lpszMarker)_tcscpy(&lpszMarker[nResult], ((XMLClear*)pChild)->lpszOpenTag);
  1681. nResult += cb;
  1682. }
  1683. }
  1684. // "OpenTag Value"
  1685. cb = (int)LENSTR(((XMLClear*)pChild)->lpszValue);
  1686. if (cb)
  1687. {
  1688. if (lpszMarker) _tcscpy(&lpszMarker[nResult], ((XMLClear*)pChild)->lpszValue);
  1689. nResult += cb;
  1690. }
  1691. // "OpenTag Value CloseTag"
  1692. cb = (int)LENSTR(((XMLClear*)pChild)->lpszCloseTag);
  1693. if (cb)
  1694. {
  1695. if (lpszMarker) _tcscpy(&lpszMarker[nResult], ((XMLClear*)pChild)->lpszCloseTag);
  1696. nResult += cb;
  1697. }
  1698. if (nFormat!=-1)
  1699. {
  1700. if (lpszMarker) lpszMarker[nResult] = _T('\n');
  1701. nResult++;
  1702. }
  1703. break;
  1704. // Element nodes
  1705. case eNodeChild:
  1706. // Recursively add child nodes
  1707. nResult += CreateXMLStringR((XMLNodeData*)pChild,
  1708. lpszMarker ? lpszMarker + nResult : 0, nChildFormat);
  1709. break;
  1710. default: break;
  1711. }
  1712. }
  1713. if ((cbElement)&&(!pEntry->isDeclaration))
  1714. {
  1715. // If we have child entries we need to use long XML notation for
  1716. // closing the element - "<elementname>blah blah blah</elementname>"
  1717. if (bHasChildren)
  1718. {
  1719. // "</elementname>\0"
  1720. if (lpszMarker)
  1721. {
  1722. if (nFormat != -1)
  1723. {
  1724. if (nFormat)
  1725. {
  1726. charmemset(&lpszMarker[nResult], INDENTCHAR,sizeof(XMLCHAR)*nFormat);
  1727. nResult+=nFormat;
  1728. }
  1729. }
  1730. _tcscpy(&lpszMarker[nResult], _T("</"));
  1731. nResult += 2;
  1732. _tcscpy(&lpszMarker[nResult], pEntry->lpszName);
  1733. nResult += cbElement;
  1734. if (nFormat == -1)
  1735. {
  1736. _tcscpy(&lpszMarker[nResult], _T(">"));
  1737. nResult++;
  1738. } else
  1739. {
  1740. _tcscpy(&lpszMarker[nResult], _T(">\n"));
  1741. nResult+=2;
  1742. }
  1743. } else
  1744. {
  1745. if (nFormat != -1) nResult+=cbElement+4+nFormat;
  1746. else nResult+=cbElement+3;
  1747. }
  1748. } else
  1749. {
  1750. // If there are no children we can use shorthand XML notation -
  1751. // "<elementname/>"
  1752. // "/>\0"
  1753. if (lpszMarker)
  1754. {
  1755. if (nFormat == -1)
  1756. {
  1757. _tcscpy(&lpszMarker[nResult], _T("/>"));
  1758. nResult += 2;
  1759. }
  1760. else
  1761. {
  1762. _tcscpy(&lpszMarker[nResult], _T("/>\n"));
  1763. nResult += 3;
  1764. }
  1765. }
  1766. else
  1767. {
  1768. nResult += nFormat == -1 ? 2 : 3;
  1769. }
  1770. }
  1771. }
  1772. return nResult;
  1773. }
  1774. #undef LENSTR
  1775. // Create an XML string
  1776. // @param int nFormat - 0 if no formatting is required
  1777. // otherwise nonzero for formatted text
  1778. // with carriage returns and indentation.
  1779. // @param int *pnSize - [out] pointer to the size of the
  1780. // returned string not including the
  1781. // NULL terminator.
  1782. // @return XMLSTR - Allocated XML string, you must free
  1783. // this with free().
  1784. XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize)
  1785. {
  1786. if (!d) { if (pnSize) *pnSize=0; return NULL; }
  1787. XMLSTR lpszResult = NULL;
  1788. int cbStr;
  1789. // Recursively Calculate the size of the XML string
  1790. nFormat = nFormat ? 0 : -1;
  1791. cbStr = CreateXMLStringR(d, 0, nFormat);
  1792. assert(cbStr);
  1793. // Alllocate memory for the XML string + the NULL terminator and
  1794. // create the recursively XML string.
  1795. lpszResult=(XMLSTR)malloc((cbStr+1)*sizeof(XMLCHAR));
  1796. CreateXMLStringR(d, lpszResult, nFormat);
  1797. if (pnSize) *pnSize = cbStr;
  1798. return lpszResult;
  1799. }
  1800. XMLNode::~XMLNode() { deleteNodeContent(); }
  1801. void XMLNode::detachFromParent(XMLNodeData *d)
  1802. {
  1803. XMLNode *pa=d->pParent->pChild;
  1804. int i=0;
  1805. while (((void*)(pa[i].d))!=((void*)d)) i++;
  1806. d->pParent->nChild--;
  1807. if (d->pParent->nChild) memmove(pa+i,pa+i+1,(d->pParent->nChild-i)*sizeof(XMLNode));
  1808. else { free(pa); d->pParent->pChild=NULL; }
  1809. removeOrderElement(d->pParent,eNodeChild,i);
  1810. }
  1811. void XMLNode::deleteNodeContent(char force)
  1812. {
  1813. if (!d) return;
  1814. (d->ref_count) --;
  1815. if ((d->ref_count==0)||force)
  1816. {
  1817. int i;
  1818. if (d->pParent) detachFromParent(d);
  1819. for(i=0; i<d->nChild; i++) { d->pChild[i].d->pParent=NULL; d->pChild[i].deleteNodeContent(force); }
  1820. free(d->pChild);
  1821. for(i=0; i<d->nText; i++) free((void*)d->pText[i]);
  1822. free(d->pText);
  1823. for(i=0; i<d->nClear; i++) free((void*)d->pClear[i].lpszValue);
  1824. free(d->pClear);
  1825. for(i=0; i<d->nAttribute; i++)
  1826. {
  1827. free((void*)d->pAttribute[i].lpszName);
  1828. if (d->pAttribute[i].lpszValue) free((void*)d->pAttribute[i].lpszValue);
  1829. }
  1830. free(d->pAttribute);
  1831. free(d->pOrder);
  1832. free((void*)d->lpszName);
  1833. free(d);
  1834. d=NULL;
  1835. }
  1836. }
  1837. XMLNode XMLNode::addChild(XMLNode childNode)
  1838. {
  1839. XMLNodeData *dc=childNode.d;
  1840. if ((!dc)||(!d)) return childNode;
  1841. if (dc->pParent) detachFromParent(dc); else dc->ref_count++;
  1842. dc->pParent=d; dc->isDeclaration=0;
  1843. int nc=d->nChild;
  1844. d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode));
  1845. d->pChild[nc].d=dc;
  1846. addToOrder(nc,eNodeChild);
  1847. d->nChild++;
  1848. return childNode;
  1849. }
  1850. void XMLNode::deleteAttribute(int i)
  1851. {
  1852. if ((!d)||(i>=d->nAttribute)) return;
  1853. d->nAttribute--;
  1854. XMLAttribute *p=d->pAttribute+i;
  1855. free((void*)p->lpszName);
  1856. if (p->lpszValue) free((void*)p->lpszValue);
  1857. if (d->nAttribute) memmove(p,p+1,(d->nAttribute-i)*sizeof(XMLAttribute)); else { free(p); d->pAttribute=NULL; }
  1858. removeOrderElement(d,eNodeAttribute,i);
  1859. }
  1860. void XMLNode::deleteAttribute(XMLAttribute *a){ if (a) deleteAttribute(a->lpszName); }
  1861. void XMLNode::deleteAttribute(XMLCSTR lpszName)
  1862. {
  1863. int j=0;
  1864. getAttribute(lpszName,&j);
  1865. if (j) deleteAttribute(j-1);
  1866. }
  1867. XMLAttribute *XMLNode::updateAttribute_WOSD(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i)
  1868. {
  1869. if (!d) return NULL;
  1870. if (i>=d->nAttribute)
  1871. {
  1872. if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue);
  1873. return NULL;
  1874. }
  1875. XMLAttribute *p=d->pAttribute+i;
  1876. if (p->lpszValue&&p->lpszValue!=lpszNewValue) free((void*)p->lpszValue);
  1877. p->lpszValue=lpszNewValue;
  1878. if (lpszNewName&&p->lpszName!=lpszNewName) { free((void*)p->lpszName); p->lpszName=lpszNewName; };
  1879. return p;
  1880. }
  1881. XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute)
  1882. {
  1883. if (oldAttribute) return updateAttribute_WOSD(newAttribute->lpszValue,newAttribute->lpszName,oldAttribute->lpszName);
  1884. return NULL;
  1885. }
  1886. XMLAttribute *XMLNode::updateAttribute_WOSD(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName)
  1887. {
  1888. int j=0;
  1889. getAttribute(lpszOldName,&j);
  1890. if (j) return updateAttribute_WOSD(lpszNewValue,lpszNewName,j-1);
  1891. else
  1892. {
  1893. if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue);
  1894. else return addAttribute_WOSD(stringDup(lpszOldName),lpszNewValue);
  1895. }
  1896. }
  1897. void XMLNode::deleteText(int i)
  1898. {
  1899. if ((!d)||(i>=d->nText)) return;
  1900. d->nText--;
  1901. XMLCSTR *p=d->pText+i;
  1902. free((void*)*p);
  1903. if (d->nText) memmove(p,p+1,(d->nText-i)*sizeof(XMLCSTR)); else { free(p); d->pText=NULL; }
  1904. removeOrderElement(d,eNodeText,i);
  1905. }
  1906. void XMLNode::deleteText(XMLCSTR lpszValue)
  1907. {
  1908. if (!d) return;
  1909. int i,l=d->nText;
  1910. XMLCSTR *p=d->pText;
  1911. for (i=0; i<l; i++) if (lpszValue==p[i]) { deleteText(i); return; }
  1912. }
  1913. XMLCSTR XMLNode::updateText_WOSD(XMLCSTR lpszNewValue, int i)
  1914. {
  1915. if (!d) return NULL;
  1916. if (i>=d->nText) return addText_WOSD(lpszNewValue);
  1917. XMLCSTR *p=d->pText+i;
  1918. if (*p!=lpszNewValue) { free((void*)*p); *p=lpszNewValue; }
  1919. return lpszNewValue;
  1920. }
  1921. XMLCSTR XMLNode::updateText_WOSD(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue)
  1922. {
  1923. if (!d) return NULL;
  1924. int i,l=d->nText;
  1925. XMLCSTR *p=d->pText;
  1926. for (i=0; i<l; i++) if (lpszOldValue==p[i]) return updateText_WOSD(lpszNewValue,i);
  1927. return addText_WOSD(lpszNewValue);
  1928. }
  1929. void XMLNode::deleteClear(int i)
  1930. {
  1931. if ((!d)||(i>=d->nClear)) return;
  1932. d->nClear--;
  1933. XMLClear *p=d->pClear+i;
  1934. free((void*)p->lpszValue);
  1935. if (d->nClear) memmove(p,p+1,(d->nText-i)*sizeof(XMLClear)); else { free(p); d->pClear=NULL; }
  1936. removeOrderElement(d,eNodeClear,i);
  1937. }
  1938. void XMLNode::deleteClear(XMLCSTR lpszValue)
  1939. {
  1940. if (!d) return;
  1941. int i,l=d->nClear;
  1942. XMLClear *p=d->pClear;
  1943. for (i=0; i<l; i++) if (lpszValue==p[i].lpszValue) { deleteText(i); return; }
  1944. }
  1945. void XMLNode::deleteClear(XMLClear *a) { if (a) deleteClear(a->lpszValue); }
  1946. XMLClear *XMLNode::updateClear_WOSD(XMLCSTR lpszNewContent, int i)
  1947. {
  1948. if (!d) return NULL;
  1949. if (i>=d->nClear)
  1950. {
  1951. return addClear_WOSD(XMLClearTags[0].lpszOpen,lpszNewContent,XMLClearTags[0].lpszClose);
  1952. }
  1953. XMLClear *p=d->pClear+i;
  1954. if (lpszNewContent!=p->lpszValue) { free((void*)p->lpszValue); p->lpszValue=lpszNewContent; }
  1955. return p;
  1956. }
  1957. XMLClear *XMLNode::updateClear_WOSD(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue)
  1958. {
  1959. if (!d) return NULL;
  1960. int i,l=d->nClear;
  1961. XMLClear *p=d->pClear;
  1962. for (i=0; i<l; i++) if (lpszOldValue==p[i].lpszValue) return updateClear_WOSD(lpszNewValue,i);
  1963. return addClear_WOSD(lpszNewValue,XMLClearTags[0].lpszOpen,XMLClearTags[0].lpszClose);
  1964. }
  1965. XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP,XMLClear *oldP)
  1966. {
  1967. if (oldP) return updateClear_WOSD(newP->lpszValue,oldP->lpszValue);
  1968. return NULL;
  1969. }
  1970. XMLNode& XMLNode::operator=( const XMLNode& A )
  1971. {
  1972. // shallow copy
  1973. if (this != &A)
  1974. {
  1975. deleteNodeContent();
  1976. d=A.d;
  1977. if (d) (d->ref_count) ++ ;
  1978. }
  1979. return *this;
  1980. }
  1981. XMLNode::XMLNode(const XMLNode &A)
  1982. {
  1983. // shallow copy
  1984. d=A.d;
  1985. if (d) (d->ref_count)++ ;
  1986. }
  1987. int XMLNode::nChildNode(XMLCSTR name)
  1988. {
  1989. if (!d) return 0;
  1990. int i,j=0,n=d->nChild;
  1991. XMLNode *pc=d->pChild;
  1992. for (i=0; i<n; i++)
  1993. {
  1994. if (_tcsicmp(pc->d->lpszName, name)==0) j++;
  1995. pc++;
  1996. }
  1997. return j;
  1998. }
  1999. XMLNode XMLNode::getChildNode(XMLCSTR name, int *j)
  2000. {
  2001. if (!d) return emptyXMLNode;
  2002. int i=0,n=d->nChild;
  2003. if (j) i=*j;
  2004. XMLNode *pc=d->pChild+i;
  2005. for (; i<n; i++)
  2006. {
  2007. if (_tcsicmp(pc->d->lpszName, name)==0)
  2008. {
  2009. if (j) *j=i+1;
  2010. return *pc;
  2011. }
  2012. pc++;
  2013. }
  2014. return emptyXMLNode;
  2015. }
  2016. XMLNode XMLNode::getChildNode(XMLCSTR name, int j)
  2017. {
  2018. if (!d) return emptyXMLNode;
  2019. int i=0;
  2020. while (j-->0) getChildNode(name,&i);
  2021. return getChildNode(name,&i);
  2022. }
  2023. XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name,XMLCSTR attributeName,XMLCSTR attributeValue, int *k)
  2024. {
  2025. int i=0,j;
  2026. if (k) i=*k;
  2027. XMLNode x;
  2028. XMLCSTR t;
  2029. do
  2030. {
  2031. x=getChildNode(name,&i);
  2032. if (!x.isEmpty())
  2033. {
  2034. if (attributeValue)
  2035. {
  2036. j=0;
  2037. do
  2038. {
  2039. t=x.getAttribute(attributeName,&j);
  2040. if (t&&(_tcsicmp(attributeValue,t)==0)) { if (k) *k=i+1; return x; }
  2041. } while (t);
  2042. } else
  2043. {
  2044. if (x.isAttributeSet(attributeName)) { if (k) *k=i+1; return x; }
  2045. }
  2046. }
  2047. } while (!x.isEmpty());
  2048. return emptyXMLNode;
  2049. }
  2050. // Find an attribute on an node.
  2051. XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j)
  2052. {
  2053. if (!d) return NULL;
  2054. int i=0,n=d->nAttribute;
  2055. if (j) i=*j;
  2056. XMLAttribute *pAttr=d->pAttribute+i;
  2057. for (; i<n; i++)
  2058. {
  2059. if (_tcsicmp(pAttr->lpszName, lpszAttrib)==0)
  2060. {
  2061. if (j) *j=i+1;
  2062. return pAttr->lpszValue;
  2063. }
  2064. pAttr++;
  2065. }
  2066. return NULL;
  2067. }
  2068. char XMLNode::isAttributeSet(XMLCSTR lpszAttrib)
  2069. {
  2070. if (!d) return FALSE;
  2071. int i,n=d->nAttribute;
  2072. XMLAttribute *pAttr=d->pAttribute;
  2073. for (i=0; i<n; i++)
  2074. {
  2075. if (_tcsicmp(pAttr->lpszName, lpszAttrib)==0)
  2076. {
  2077. return TRUE;
  2078. }
  2079. pAttr++;
  2080. }
  2081. return FALSE;
  2082. }
  2083. XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j)
  2084. {
  2085. if (!d) return NULL;
  2086. int i=0;
  2087. while (j-->0) getAttribute(name,&i);
  2088. return getAttribute(name,&i);
  2089. }
  2090. XMLCSTR XMLNode::getName(){ if (!d) return NULL; return d->lpszName; }
  2091. int XMLNode::nText() { if (!d) return 0; return d->nText; }
  2092. int XMLNode::nChildNode() { if (!d) return 0; return d->nChild; }
  2093. int XMLNode::nAttribute() { if (!d) return 0; return d->nAttribute; }
  2094. int XMLNode::nClear() { if (!d) return 0; return d->nClear; }
  2095. XMLClear XMLNode::getClear (int i) { if ((!d)||(i>=d->nClear )) return emptyXMLClear; return d->pClear[i]; }
  2096. XMLAttribute XMLNode::getAttribute (int i) { if ((!d)||(i>=d->nAttribute)) return emptyXMLAttribute; return d->pAttribute[i]; }
  2097. XMLCSTR XMLNode::getAttributeName (int i) { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszName; }
  2098. XMLCSTR XMLNode::getAttributeValue(int i) { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszValue; }
  2099. XMLCSTR XMLNode::getText (int i) { if ((!d)||(i>=d->nText )) return NULL; return d->pText[i]; }
  2100. XMLNode XMLNode::getChildNode (int i) { if ((!d)||(i>=d->nChild )) return emptyXMLNode; return d->pChild[i]; }
  2101. XMLNode XMLNode::getParentNode ( ) { if ((!d)||(!d->pParent )) return emptyXMLNode; return XMLNode(d->pParent); }
  2102. char XMLNode::isDeclaration ( ) { if (!d) return 0; return d->isDeclaration; }
  2103. char XMLNode::isEmpty ( ) { return (d==NULL); }
  2104. int XMLNode::nElement ( ) { if (!d) return 0; return d->nChild+d->nText+d->nClear+d->nAttribute; }
  2105. XMLNode XMLNode::addChild(XMLCSTR lpszName, int isDeclaration)
  2106. { return addChild_WOSD(stringDup(lpszName),isDeclaration); }
  2107. XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue)
  2108. { return addAttribute_WOSD(stringDup(lpszName),stringDup(lpszValue)); }
  2109. XMLCSTR XMLNode::addText(XMLCSTR lpszValue)
  2110. { return addText_WOSD(stringDup(lpszValue)); }
  2111. XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose)
  2112. { return addClear_WOSD(stringDup(lpszValue),lpszOpen,lpszClose); }
  2113. XMLCSTR XMLNode::updateName(XMLCSTR lpszName)
  2114. { return updateName_WOSD(stringDup(lpszName)); }
  2115. XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute)
  2116. { return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),stringDup(newAttribute->lpszName),oldAttribute->lpszName); }
  2117. XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i)
  2118. { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),i); }
  2119. XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName)
  2120. { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),lpszOldName); }
  2121. XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i)
  2122. { return updateText_WOSD(stringDup(lpszNewValue),i); }
  2123. XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue)
  2124. { return updateText_WOSD(stringDup(lpszNewValue),lpszOldValue); }
  2125. XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i)
  2126. { return updateClear_WOSD(stringDup(lpszNewContent),i); }
  2127. XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue)
  2128. { return updateClear_WOSD(stringDup(lpszNewValue),lpszOldValue); }
  2129. XMLClear *XMLNode::updateClear(XMLClear *newP,XMLClear *oldP)
  2130. { return updateClear_WOSD(stringDup(newP->lpszValue),oldP->lpszValue); }
  2131. void XMLNode::setGlobalOptions(char _guessUnicodeChars, char strictUTF8Parsing)
  2132. {
  2133. guessUnicodeChars=_guessUnicodeChars;
  2134. #ifndef _XMLUNICODE
  2135. if (strictUTF8Parsing) XML_ByteTable=XML_utf8ByteTable; else XML_ByteTable=XML_asciiByteTable;
  2136. #endif
  2137. }
  2138. char XMLNode::guessUTF8ParsingParameterValue(void *buf,int l, char useXMLEncodingAttribute)
  2139. {
  2140. #ifdef _XMLUNICODE
  2141. return 0;
  2142. #else
  2143. if (l<25) return 0;
  2144. if (myIsTextUnicode(buf,l)) return 0;
  2145. unsigned char *b=(unsigned char*)buf;
  2146. if ((b[0]==0xef)&&(b[1]==0xbb)&&(b[2]==0xbf)) return 1;
  2147. // Match utf-8 model ?
  2148. int i=0;
  2149. while (i<l)
  2150. switch (XML_utf8ByteTable[b[i]])
  2151. {
  2152. case 4: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) return 0; // 10bbbbbb ?
  2153. case 3: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) return 0; // 10bbbbbb ?
  2154. case 2: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) return 0; // 10bbbbbb ?
  2155. case 1: i++; break;
  2156. case 0: i=l;
  2157. }
  2158. if (!useXMLEncodingAttribute) return 1;
  2159. // if encoding is specified and different from utf-8 than it's non-utf8
  2160. // otherwise it's utf-8
  2161. char bb[201];
  2162. l=mmin(l,200);
  2163. memcpy(bb,buf,l); // copy buf into bb to be able to do "bb[l]=0"
  2164. bb[l]=0;
  2165. b=(unsigned char*)strstr(bb,"encoding");
  2166. if (!b) return 1;
  2167. b+=8; while XML_isSPACECHAR(*b) b++; if (*b!='=') return 1;
  2168. b++; while XML_isSPACECHAR(*b) b++; if ((*b!='\'')&&(*b!='"')) return 1;
  2169. b++; while XML_isSPACECHAR(*b) b++; if ((_strnicmp((char*)b,"utf-8",5)==0)||
  2170. (_strnicmp((char*)b,"utf8",4)==0)) return 1;
  2171. return 0;
  2172. #endif
  2173. }
  2174. #undef XML_isSPACECHAR
  2175. //////////////////////////////////////////////////////////
  2176. // Here starts the base64 conversion functions. //
  2177. //////////////////////////////////////////////////////////
  2178. static const char base64Fillchar = _T('='); // used to mark partial words at the end
  2179. // this lookup table defines the base64 encoding
  2180. XMLCSTR base64EncodeTable=_T("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/");
  2181. // Decode Table gives the index of any valid base64 character in the Base64 table]
  2182. // 96: '=' - 97: space char - 98: illegal char - 99: end of string
  2183. const unsigned char base64DecodeTable[] = {
  2184. 99,98,98,98,98,98,98,98,98,97, 97,98,98,97,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //00 -29
  2185. 98,98,97,98,98,98,98,98,98,98, 98,98,98,62,98,98,98,63,52,53, 54,55,56,57,58,59,60,61,98,98, //30 -59
  2186. 98,96,98,98,98, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 15,16,17,18,19,20,21,22,23,24, //60 -89
  2187. 25,98,98,98,98,98,98,26,27,28, 29,30,31,32,33,34,35,36,37,38, 39,40,41,42,43,44,45,46,47,48, //90 -119
  2188. 49,50,51,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //120 -149
  2189. 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //150 -179
  2190. 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //180 -209
  2191. 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //210 -239
  2192. 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98 //240 -255
  2193. };
  2194. XMLParserBase64Tool::~XMLParserBase64Tool(){ if (buf) free(buf); }
  2195. int XMLParserBase64Tool::encodeLength(unsigned char *inbuf, int inlen, char formatted)
  2196. {
  2197. unsigned int i=((inlen-1)/3*4+4+1),eLen=inlen/3;
  2198. if (formatted) i+=eLen/18;
  2199. return i;
  2200. }
  2201. XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, unsigned int inlen, char formatted)
  2202. {
  2203. int i=encodeLength(inbuf,inlen,formatted),k=17,eLen=inlen/3,j;
  2204. alloc(i*sizeof(XMLCHAR));
  2205. XMLSTR curr=(XMLSTR)buf;
  2206. for(i=0;i<eLen;i++)
  2207. {
  2208. // Copy next three bytes into lower 24 bits of int, paying attention to sign.
  2209. j=(inbuf[0]<<16)|(inbuf[1]<<8)|inbuf[2]; inbuf+=3;
  2210. // Encode the int into four chars
  2211. *(curr++)=base64EncodeTable[ j>>18 ];
  2212. *(curr++)=base64EncodeTable[(j>>12)&0x3f];
  2213. *(curr++)=base64EncodeTable[(j>> 6)&0x3f];
  2214. *(curr++)=base64EncodeTable[(j )&0x3f];
  2215. if (formatted) { if (!k) { *(curr++)=_T('\n'); k=18; } k--; }
  2216. }
  2217. eLen=inlen-eLen*3; // 0 - 2.
  2218. if (eLen==1)
  2219. {
  2220. *(curr++)=base64EncodeTable[ inbuf[0]>>2 ];
  2221. *(curr++)=base64EncodeTable[(inbuf[0]<<4)&0x3F];
  2222. *(curr++)=base64Fillchar;
  2223. *(curr++)=base64Fillchar;
  2224. } else if (eLen==2)
  2225. {
  2226. j=(inbuf[0]<<8)|inbuf[1];
  2227. *(curr++)=base64EncodeTable[ j>>10 ];
  2228. *(curr++)=base64EncodeTable[(j>> 4)&0x3f];
  2229. *(curr++)=base64EncodeTable[(j<< 2)&0x3f];
  2230. *(curr++)=base64Fillchar;
  2231. }
  2232. *(curr++)=0;
  2233. return (XMLSTR)buf;
  2234. }
  2235. unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data,XMLError *xe)
  2236. {
  2237. if (xe) *xe=eXMLErrorNone;
  2238. int size=0;
  2239. unsigned char c;
  2240. //skip any extra characters (e.g. newlines or spaces)
  2241. while (*data)
  2242. {
  2243. #ifdef _XMLUNICODE
  2244. if (*data>255) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
  2245. #endif
  2246. c=base64DecodeTable[(unsigned char)(*data)];
  2247. if (c<97) size++;
  2248. else if (c==98) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
  2249. data++;
  2250. }
  2251. if (xe&&(size%4!=0)) *xe=eXMLErrorBase64DataSizeIsNotMultipleOf4;
  2252. if (size==0) return 0;
  2253. do { data--; size--; } while(*data==base64Fillchar); size++;
  2254. return (unsigned int)((size*3)/4);
  2255. }
  2256. unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int len, XMLError *xe)
  2257. {
  2258. if (xe) *xe=eXMLErrorNone;
  2259. int i=0,p=0;
  2260. unsigned char d,c;
  2261. for(;;)
  2262. {
  2263. #ifdef _XMLUNICODE
  2264. #define BASE64DECODE_READ_NEXT_CHAR(c) \
  2265. do { \
  2266. if (data[i]>255){ c=98; break; } \
  2267. c=base64DecodeTable[(unsigned char)data[i++]]; \
  2268. }while (c==97); \
  2269. if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
  2270. #else
  2271. #define BASE64DECODE_READ_NEXT_CHAR(c) \
  2272. do { c=base64DecodeTable[(unsigned char)data[i++]]; }while (c==97); \
  2273. if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; }
  2274. #endif
  2275. BASE64DECODE_READ_NEXT_CHAR(c)
  2276. if (c==99) { return 2; }
  2277. if (c==96)
  2278. {
  2279. if (p==(int)len) return 2;
  2280. if (xe) *xe=eXMLErrorBase64DecodeTruncatedData;
  2281. return 1;
  2282. }
  2283. BASE64DECODE_READ_NEXT_CHAR(d)
  2284. if ((d==99)||(d==96)) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
  2285. if (p==(int)len) { if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; return 0; }
  2286. buf[p++]=(c<<2)|((d>>4)&0x3);
  2287. BASE64DECODE_READ_NEXT_CHAR(c)
  2288. if (c==99) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
  2289. if (p==(int)len)
  2290. {
  2291. if (c==96) return 2;
  2292. if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall;
  2293. return 0;
  2294. }
  2295. if (c==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
  2296. buf[p++]=((d<<4)&0xf0)|((c>>2)&0xf);
  2297. BASE64DECODE_READ_NEXT_CHAR(d)
  2298. if (d==99 ) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
  2299. if (p==(int)len)
  2300. {
  2301. if (d==96) return 2;
  2302. if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall;
  2303. return 0;
  2304. }
  2305. if (d==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; }
  2306. buf[p++]=((c<<6)&0xc0)|d;
  2307. }
  2308. }
  2309. #undef BASE64DECODE_READ_NEXT_CHAR
  2310. void XMLParserBase64Tool::alloc(int newsize)
  2311. {
  2312. if (!buf) { buf=malloc(newsize); buflen=newsize; }
  2313. else { if (newsize>buflen) { buf=realloc(buf,newsize); buflen=newsize; } }
  2314. }
  2315. unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe)
  2316. {
  2317. if (xe) *xe=eXMLErrorNone;
  2318. unsigned int len=decodeSize(data,xe);
  2319. if (outlen) *outlen=len;
  2320. if (!len) return NULL;
  2321. alloc(len+1);
  2322. if(!decode(data,(unsigned char*)buf,len,xe)){ return NULL; }
  2323. return (unsigned char*)buf;
  2324. }