PageRenderTime 59ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/src/base/xmlParser.cpp

http://isibus.googlecode.com/
C++ | 1829 lines | 1319 code | 174 blank | 336 comment | 227 complexity | be504e0b291d15e4bdee7294aba4a5e2 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /**
  2. ****************************************************************************
  3. * <P> XML.c - implementation file for basic XML parser written in ANSI C++
  4. * for portability. It works by using recursion and a node tree for breaking
  5. * down the elements of an XML document. </P>
  6. *
  7. * @version V2.18
  8. * @author Frank Vanden Berghen
  9. *
  10. * NOTE:
  11. *
  12. * If you add "#define STRICT_PARSING", on the first line of this file
  13. * the parser will see the following XML-stream:
  14. * <a><b>some text</b><b>other text </a>
  15. * as an error. Otherwise, this tring will be equivalent to:
  16. * <a><b>some text</b><b>other text</b></a>
  17. *
  18. * NOTE:
  19. *
  20. * If you add "#define APPROXIMATE_PARSING" on the first line of this file
  21. * the parser will see the following XML-stream:
  22. * <data name="n1">
  23. * <data name="n2">
  24. * <data name="n3" />
  25. * as equivalent to the following XML-stream:
  26. * <data name="n1" />
  27. * <data name="n2" />
  28. * <data name="n3" />
  29. * This can be useful for badly-formed XML-streams but prevent the use
  30. * of the following XML-stream (problem is: tags at contiguous levels
  31. * have the same names):
  32. * <data name="n1">
  33. * <data name="n2">
  34. * <data name="n3" />
  35. * </data>
  36. * </data>
  37. *
  38. * NOTE:
  39. *
  40. * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file
  41. * the "openFileHelper" function will always display error messages inside the
  42. * console instead of inside a message-box-window. Message-box-windows are
  43. * available on windows only.
  44. *
  45. * BSD license:
  46. * Copyright (c) 2002, Frank Vanden Berghen
  47. * All rights reserved.
  48. * Redistribution and use in source and binary forms, with or without
  49. * modification, are permitted provided that the following conditions are met:
  50. *
  51. * * Redistributions of source code must retain the above copyright
  52. * notice, this list of conditions and the following disclaimer.
  53. * * Redistributions in binary form must reproduce the above copyright
  54. * notice, this list of conditions and the following disclaimer in the
  55. * documentation and/or other materials provided with the distribution.
  56. * * Neither the name of the Frank Vanden Berghen nor the
  57. * names of its contributors may be used to endorse or promote products
  58. * derived from this software without specific prior written permission.
  59. *
  60. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
  61. * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  62. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  63. * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
  64. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  65. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  66. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  67. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  68. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  69. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  70. *
  71. ****************************************************************************
  72. */
  73. #ifndef _CRT_SECURE_NO_DEPRECATE
  74. #define _CRT_SECURE_NO_DEPRECATE
  75. #endif
  76. #include "xmlParser.h"
  77. #ifdef WIN32
  78. //#ifdef _DEBUG
  79. //#define _CRTDBG_MAP_ALLOC
  80. //#include <crtdbg.h>
  81. //#endif
  82. #define WIN32_LEAN_AND_MEAN
  83. #include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files
  84. // to have "MessageBoxA" to display error messages for openFilHelper
  85. #endif
  86. #include <memory.h>
  87. #include <assert.h>
  88. #include <stdio.h>
  89. #include <string.h>
  90. #include <stdlib.h>
  91. inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; }
  92. // You can modify the initialization of the variable "XMLClearTags" below
  93. // to change the clearTags that are currently recognized by the library.
  94. // The number on the second columns is the length of the string inside the
  95. // first column.
  96. ALLXMLClearTag XMLClearTags[] =
  97. {
  98. { _T("<![CDATA["),9, _T("]]>") },
  99. { _T("<PRE>") ,5, _T("</PRE>") },
  100. { _T("<Script>") ,8, _T("</Script>")},
  101. { _T("<!--") ,4, _T("-->") },
  102. { _T("<!DOCTYPE"),9, _T(">") },
  103. { NULL ,0, NULL }
  104. };
  105. // You can modify the initialization of the variable "XMLEntities" below
  106. // to change the character entities that are currently recognized by the library.
  107. // The number on the second columns is the length of the string inside the
  108. // first column. Additionally, the syntaxes "&#xA0;" and "&#160;" are recognized.
  109. typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity;
  110. static XMLCharacterEntity XMLEntities[] =
  111. {
  112. { _T("&amp;" ), 5, _T('&' )},
  113. { _T("&lt;" ), 4, _T('<' )},
  114. { _T("&gt;" ), 4, _T('>' )},
  115. { _T("&quot;"), 6, _T('\"')},
  116. { _T("&apos;"), 6, _T('\'')},
  117. { NULL , 0, '\0' }
  118. };
  119. // When rendering the XMLNode to a string (using the "createXMLString" function),
  120. // you can ask for a beautiful formatting. This formatting is using the
  121. // following indentation character:
  122. #define INDENTCHAR _T('\t')
  123. // The following function parses the XML errors into a user friendly string.
  124. // You can edit this to change the output language of the library to something else.
  125. XMLCSTR XMLNode::getError(XMLError xerror)
  126. {
  127. switch (xerror)
  128. {
  129. case eXMLErrorNone: return _T("No error");
  130. case eXMLErrorMissingEndTag: return _T("Warning: Unmatched end tag");
  131. case eXMLErrorEmpty: return _T("Error: No XML data");
  132. case eXMLErrorFirstNotStartTag: return _T("Error: First token not start tag");
  133. case eXMLErrorMissingTagName: return _T("Error: Missing start tag name");
  134. case eXMLErrorMissingEndTagName: return _T("Error: Missing end tag name");
  135. case eXMLErrorNoMatchingQuote: return _T("Error: Unmatched quote");
  136. case eXMLErrorUnmatchedEndTag: return _T("Error: Unmatched end tag");
  137. case eXMLErrorUnmatchedEndClearTag: return _T("Error: Unmatched clear tag end");
  138. case eXMLErrorUnexpectedToken: return _T("Error: Unexpected token found");
  139. case eXMLErrorInvalidTag: return _T("Error: Invalid tag found");
  140. case eXMLErrorNoElements: return _T("Error: No elements found");
  141. case eXMLErrorFileNotFound: return _T("Error: File not found");
  142. case eXMLErrorFirstTagNotFound: return _T("Error: First Tag not found");
  143. case eXMLErrorUnknownEscapeSequence: return _T("Error: Unknown character entity");
  144. case eXMLErrorCharConversionError: return _T("Error: unable to convert between UNICODE and MultiByte chars");
  145. case eXMLErrorCannotOpenWriteFile: return _T("Error: unable to open file for writing");
  146. case eXMLErrorCannotWriteFile: return _T("Error: cannot write into file");
  147. case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _T("Warning: Base64-string length is not a multiple of 4");
  148. case eXMLErrorBase64DecodeTruncatedData: return _T("Warning: Base64-string is truncated");
  149. case eXMLErrorBase64DecodeIllegalCharacter: return _T("Error: Base64-string contains an illegal character");
  150. case eXMLErrorBase64DecodeBufferTooSmall: return _T("Error: Base64 decode output buffer is too small");
  151. };
  152. return _T("Unknown");
  153. }
  154. #ifndef _XMLUNICODE
  155. // If "strictUTF8Parsing=0" then we assume that all characters have the same length of 1 byte.
  156. // If "strictUTF8Parsing=1" then the characters have different lengths (from 1 byte to 4 bytes).
  157. // This table is used as lookup-table to know the length of a character (in byte) based on the
  158. // content of the first byte of the character.
  159. // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ).
  160. static const char XML_utf8ByteTable[256] =
  161. {
  162. // 0 1 2 3 4 5 6 7 8 9 a b c d e f
  163. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00
  164. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10
  165. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20
  166. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30
  167. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40
  168. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50
  169. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60
  170. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70End of ASCII range
  171. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid
  172. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90
  173. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0
  174. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0
  175. 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte
  176. 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0
  177. 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte
  178. 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
  179. };
  180. #endif
  181. // Here is an abstraction layer to access some common string manipulation functions.
  182. // The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0,
  183. // Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++.
  184. // If you plan to "port" the library to a new system/compiler, all you have to do is
  185. // to edit the following lines.
  186. #ifdef _XMLWINDOWS
  187. // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET,
  188. char myIsTextUnicode(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); };
  189. #ifdef _XMLUNICODE
  190. wchar_t *myMultiByteToWideChar(const char *s,int l)
  191. {
  192. int i=(int)MultiByteToWideChar(CP_ACP, // code page
  193. MB_PRECOMPOSED, // character-type options
  194. s, // string to map
  195. l, // number of bytes in string
  196. NULL, // wide-character buffer
  197. 0); // size of buffer
  198. if (i<0) return NULL;
  199. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR));
  200. MultiByteToWideChar(CP_ACP, // code page
  201. MB_PRECOMPOSED, // character-type options
  202. s, // string to map
  203. l, // number of bytes in string
  204. d, // wide-character buffer
  205. i); // size of buffer
  206. d[i]=0;
  207. return d;
  208. }
  209. #else
  210. char *myWideCharToMultiByte(const wchar_t *s,int l)
  211. {
  212. int i=(int)WideCharToMultiByte(CP_ACP, // code page
  213. 0, // performance and mapping flags
  214. s, // wide-character string
  215. l, // number of chars in string
  216. NULL, // buffer for new string
  217. 0, // size of buffer
  218. NULL, // default for unmappable chars
  219. NULL // set when default char used
  220. );
  221. if (i<0) return NULL;
  222. char *d=(char*)malloc(i+1);
  223. WideCharToMultiByte(CP_ACP, // code page
  224. 0, // performance and mapping flags
  225. s, // wide-character string
  226. l, // number of chars in string
  227. d, // buffer for new string
  228. i, // size of buffer
  229. NULL, // default for unmappable chars
  230. NULL // set when default char used
  231. );
  232. d[i]=0;
  233. return d;
  234. }
  235. #endif
  236. #ifdef __BORLANDC__
  237. int _strnicmp(char *c1, char *c2, int l){ return strnicmp(c1,c2,l);}
  238. #endif
  239. #else
  240. // for gcc and CC
  241. #ifdef XML_NO_WIDE_CHAR
  242. char myIsTextUnicode(const void *b, int len) { return FALSE; }
  243. char *myWideCharToMultiByte(const wchar_t *s, int l) { return NULL; }
  244. #else
  245. char myIsTextUnicode(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode
  246. {
  247. const wchar_t *s=(const wchar_t*)b;
  248. // buffer too small:
  249. if (len<(int)sizeof(wchar_t)) return FALSE;
  250. // odd length test
  251. if (len&1) return FALSE;
  252. /* only checks the first 256 characters */
  253. len=mmin(256,len/sizeof(wchar_t));
  254. // Check for the special byte order:
  255. if (*s == 0xFFFE) return FALSE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE;
  256. if (*s == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE
  257. // checks for ASCII characters in the UNICODE stream
  258. int i,stats=0;
  259. for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++;
  260. if (stats>len/2) return TRUE;
  261. // Check for UNICODE NULL chars
  262. for (i=0; i<len; i++) if (!s[i]) return TRUE;
  263. return FALSE;
  264. }
  265. char *myWideCharToMultiByte(const wchar_t *s, int l)
  266. {
  267. const wchar_t *ss=s;
  268. int i=(int)wcsrtombs(NULL,&ss,0,NULL);
  269. if (i<0) return NULL;
  270. char *d=(char *)malloc(i+1);
  271. wcsrtombs(d,&s,i,NULL);
  272. d[i]=0;
  273. return d;
  274. }
  275. #endif
  276. #ifdef _XMLUNICODE
  277. wchar_t *myMultiByteToWideChar(const char *s, int l)
  278. {
  279. const char *ss=s;
  280. int i=(int)mbsrtowcs(NULL,&ss,0,NULL);
  281. if (i<0) return NULL;
  282. wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t));
  283. mbsrtowcs(d,&s,l,NULL);
  284. d[i]=0;
  285. return d;
  286. }
  287. int _tcslen(XMLCSTR c) { return wcslen(c); }
  288. #include <widec.h>
  289. #ifdef sun
  290. // for CC
  291. int _tcsnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);}
  292. int _tcsicmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); }
  293. #else
  294. // for gcc
  295. int _tcsnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);}
  296. int _tcsicmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); }
  297. #endif
  298. XMLSTR _tcsstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); }
  299. XMLSTR _tcscpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); }
  300. FILE *_tfopen(XMLCSTR filename,XMLCSTR mode)
  301. {
  302. char *filenameAscii=myWideCharToMultiByte(filename,0);
  303. FILE *f;
  304. if (mode[0]==_T('r')) f=fopen(filenameAscii,"rb");
  305. else f=fopen(filenameAscii,"wb");
  306. free(filenameAscii);
  307. return f;
  308. }
  309. #else
  310. FILE *_tfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); }
  311. int _tcslen(XMLCSTR c) { return strlen(c); }
  312. int _tcsnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);}
  313. int _tcsicmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); }
  314. XMLSTR _tcsstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); }
  315. XMLSTR _tcscpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); }
  316. #endif
  317. int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);}
  318. #endif
  319. /////////////////////////////////////////////////////////////////////////
  320. // Here start the core implementation of the XMLParser library //
  321. /////////////////////////////////////////////////////////////////////////
  322. // You should normally not change anything below this point.
  323. // For your own information, I suggest that you read the openFileHelper below:
  324. XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag)
  325. {
  326. // guess the value of the global parameter "strictUTF8Parsing"
  327. // (the guess is based on the first 200 bytes of the file).
  328. FILE *f=_tfopen(filename,_T("rb"));
  329. if (f)
  330. {
  331. char bb[205];
  332. int l=(int)fread(bb,1,200,f);
  333. setGlobalOptions(1,guessUTF8ParsingParameterValue(bb,l,1));
  334. fclose(f);
  335. }
  336. // parse the file
  337. XMLResults pResults;
  338. XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults);
  339. // display error message (if any)
  340. if (pResults.error != eXMLErrorNone)
  341. {
  342. // create message
  343. char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_T("");
  344. if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; }
  345. sprintf(message,
  346. #ifdef _XMLUNICODE
  347. "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s"
  348. #else
  349. "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s"
  350. #endif
  351. ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3);
  352. // display message
  353. #ifdef WIN32
  354. #ifndef _XMLPARSER_NO_MESSAGEBOX_
  355. MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST);
  356. #else
  357. printf("%s",message);
  358. #endif
  359. #else
  360. printf("%s",message);
  361. #endif
  362. exit(255);
  363. }
  364. return xnode;
  365. }
  366. static char guessUnicodeChars=1;
  367. #ifndef _XMLUNICODE
  368. static const char XML_asciiByteTable[256] =
  369. {
  370. 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  371. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  372. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  373. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  374. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  375. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
  376. };
  377. static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "strictUTF8Parsing=1"
  378. #endif
  379. XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const
  380. {
  381. int i;
  382. XMLSTR t=createXMLString(nFormat,&i);
  383. FILE *f=_tfopen(filename,_T("wb"));
  384. if (!f) return eXMLErrorCannotOpenWriteFile;
  385. #ifdef _XMLUNICODE
  386. unsigned char h[2]={ 0xFF, 0xFE };
  387. if (!fwrite(h,2,1,f)) return eXMLErrorCannotWriteFile;
  388. if (!isDeclaration())
  389. {
  390. if (!fwrite(_T("<?xml version=\"1.0\" encoding=\"utf-16\"?>\n"),sizeof(wchar_t)*40,1,f))
  391. return eXMLErrorCannotWriteFile;
  392. }
  393. #else
  394. if (!isDeclaration())
  395. {
  396. if ((!encoding)||(XML_ByteTable==XML_utf8ByteTable))
  397. {
  398. // header so that windows recognize the file as UTF-8:
  399. unsigned char h[3]={0xEF,0xBB,0xBF};
  400. if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
  401. if (!fwrite("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n",39,1,f)) return eXMLErrorCannotWriteFile;
  402. }
  403. else
  404. if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) return eXMLErrorCannotWriteFile;
  405. } else
  406. {
  407. if (XML_ByteTable==XML_utf8ByteTable) // test if strictUTF8Parsing==1"
  408. {
  409. unsigned char h[3]={0xEF,0xBB,0xBF}; if (!fwrite(h,3,1,f)) return eXMLErrorCannotWriteFile;
  410. }
  411. }
  412. #endif
  413. if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) return eXMLErrorCannotWriteFile;
  414. if (fclose(f)!=0) return eXMLErrorCannotWriteFile;
  415. free(t);
  416. return eXMLErrorNone;
  417. }
  418. // Duplicate a given string.
  419. XMLSTR stringDup(XMLCSTR lpszData, int cbData)
  420. {
  421. if (lpszData==NULL) return NULL;
  422. XMLSTR lpszNew;
  423. if (cbData==0) cbData=(int)_tcslen(lpszData);
  424. lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR));
  425. if (lpszNew)
  426. {
  427. memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR));
  428. lpszNew[cbData] = (XMLCHAR)NULL;
  429. }
  430. return lpszNew;
  431. }
  432. XMLNode XMLNode::emptyXMLNode;
  433. XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL};
  434. XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL};
  435. // Enumeration used to decipher what type a token is
  436. typedef enum XMLTokenTypeTag
  437. {
  438. eTokenText = 0,
  439. eTokenQuotedText,
  440. eTokenTagStart, /* "<" */
  441. eTokenTagEnd, /* "</" */
  442. eTokenCloseTag, /* ">" */
  443. eTokenEquals, /* "=" */
  444. eTokenDeclaration, /* "<?" */
  445. eTokenShortHandClose, /* "/>" */
  446. eTokenClear,
  447. eTokenError
  448. } XMLTokenType;
  449. // Main structure used for parsing XML
  450. typedef struct XML
  451. {
  452. XMLCSTR lpXML;
  453. int nIndex,nIndexMissigEndTag;
  454. enum XMLError error;
  455. XMLCSTR lpEndTag;
  456. int cbEndTag;
  457. XMLCSTR lpNewElement;
  458. int cbNewElement;
  459. int nFirst;
  460. } XML;
  461. typedef struct
  462. {
  463. ALLXMLClearTag *pClr;
  464. XMLCSTR pStr;
  465. } NextToken;
  466. // Enumeration used when parsing attributes
  467. typedef enum Attrib
  468. {
  469. eAttribName = 0,
  470. eAttribEquals,
  471. eAttribValue
  472. } Attrib;
  473. // Enumeration used when parsing elements to dictate whether we are currently
  474. // inside a tag
  475. typedef enum Status
  476. {
  477. eInsideTag = 0,
  478. eOutsideTag
  479. } Status;
  480. // private (used while rendering):
  481. XMLSTR toXMLString(XMLSTR dest,XMLCSTR source)
  482. {
  483. XMLSTR dd=dest;
  484. XMLCHAR ch;
  485. XMLCharacterEntity *entity;
  486. while ((ch=*source))
  487. {
  488. entity=XMLEntities;
  489. do
  490. {
  491. if (ch==entity->c) {_tcscpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; }
  492. entity++;
  493. } while(entity->s);
  494. #ifdef _XMLUNICODE
  495. *(dest++)=*(source++);
  496. #else
  497. switch(XML_ByteTable[(unsigned char)ch])
  498. {
  499. case 4: *(dest++)=*(source++);
  500. case 3: *(dest++)=*(source++);
  501. case 2: *(dest++)=*(source++);
  502. case 1: *(dest++)=*(source++);
  503. }
  504. #endif
  505. out_of_loop1:
  506. ;
  507. }
  508. *dest=0;
  509. return dd;
  510. }
  511. // private (used while rendering):
  512. int lengthXMLString(XMLCSTR source)
  513. {
  514. int r=0;
  515. XMLCharacterEntity *entity;
  516. XMLCHAR ch;
  517. while ((ch=*source))
  518. {
  519. entity=XMLEntities;
  520. do
  521. {
  522. if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; }
  523. entity++;
  524. } while(entity->s);
  525. #ifdef _XMLUNICODE
  526. r++; source++;
  527. #else
  528. ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch;
  529. #endif
  530. out_of_loop1:
  531. ;
  532. }
  533. return r;
  534. }
  535. XMLSTR toXMLString(XMLCSTR source)
  536. {
  537. XMLSTR dest=(XMLSTR)malloc((lengthXMLString(source)+1)*sizeof(XMLCHAR));
  538. return toXMLString(dest,source);
  539. }
  540. XMLSTR toXMLStringFast(XMLSTR *dest,int *destSz, XMLCSTR source)
  541. {
  542. int l=lengthXMLString(source)+1;
  543. if (l>*destSz) { *destSz=l; *dest=(XMLSTR)realloc(*dest,l*sizeof(XMLCHAR)); }
  544. return toXMLString(*dest,source);
  545. }
  546. // private:
  547. XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML)
  548. {
  549. // This function is the opposite of the function "toXMLString". It decodes the escape
  550. // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
  551. // &,",',<,>. This function is used internally by the XML Parser. All the calls to
  552. // the XML library will always gives you back "decoded" strings.
  553. //
  554. // in: string (s) and length (lo) of string
  555. // out: new allocated string converted from xml
  556. if (!s) return NULL;
  557. int ll=0,j;
  558. XMLSTR d;
  559. XMLCSTR ss=s;
  560. XMLCharacterEntity *entity;
  561. while ((lo>0)&&(*s))
  562. {
  563. if (*s==_T('&'))
  564. {
  565. if ((lo>2)&&(s[1]==_T('#')))
  566. {
  567. s+=2; lo-=2;
  568. if ((*s==_T('X'))||(*s==_T('x'))) { s++; lo--; }
  569. while ((*s)&&(*s!=_T(';'))&&((lo--)>0)) s++;
  570. if (*s!=_T(';'))
  571. {
  572. pXML->error=eXMLErrorUnknownEscapeSequence;
  573. return NULL;
  574. }
  575. s++; lo--;
  576. } else
  577. {
  578. entity=XMLEntities;
  579. do
  580. {
  581. if ((lo>=entity->l)&&(_tcsnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; }
  582. entity++;
  583. } while(entity->s);
  584. if (!entity->s)
  585. {
  586. pXML->error=eXMLErrorUnknownEscapeSequence;
  587. return NULL;
  588. }
  589. }
  590. } else
  591. {
  592. #ifdef _XMLUNICODE
  593. s++; lo--;
  594. #else
  595. j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1;
  596. #endif
  597. }
  598. ll++;
  599. }
  600. d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR));
  601. s=d;
  602. while (ll-->0)
  603. {
  604. if (*ss==_T('&'))
  605. {
  606. if (ss[1]==_T('#'))
  607. {
  608. ss+=2; j=0;
  609. if ((*ss==_T('X'))||(*ss==_T('x')))
  610. {
  611. ss++;
  612. while (*ss!=_T(';'))
  613. {
  614. if ((*ss>=_T('0'))&&(*ss<=_T('9'))) j=(j<<4)+*ss-_T('0');
  615. else if ((*ss>=_T('A'))&&(*ss<=_T('F'))) j=(j<<4)+*ss-_T('A')+10;
  616. else if ((*ss>=_T('a'))&&(*ss<=_T('f'))) j=(j<<4)+*ss-_T('a')+10;
  617. else { free(d); pXML->error=eXMLErrorUnknownEscapeSequence;return NULL;}
  618. ss++;
  619. }
  620. } else
  621. {
  622. while (*ss!=_T(';'))
  623. {
  624. if ((*ss>=_T('0'))&&(*ss<=_T('9'))) j=(j*10)+*ss-_T('0');
  625. else { free(d); pXML->error=eXMLErrorUnknownEscapeSequence;return NULL;}
  626. ss++;
  627. }
  628. }
  629. (*d++)=(XMLCHAR)j; ss++;
  630. } else
  631. {
  632. entity=XMLEntities;
  633. do
  634. {
  635. if (_tcsnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; }
  636. entity++;
  637. } while(entity->s);
  638. }
  639. } else
  640. {
  641. #ifdef _XMLUNICODE
  642. *(d++)=*(ss++);
  643. #else
  644. switch(XML_ByteTable[(unsigned char)*ss])
  645. {
  646. case 4: *(d++)=*(ss++); ll--;
  647. case 3: *(d++)=*(ss++); ll--;
  648. case 2: *(d++)=*(ss++); ll--;
  649. case 1: *(d++)=*(ss++);
  650. }
  651. #endif
  652. }
  653. }
  654. *d=0;
  655. return (XMLSTR)s;
  656. }
  657. #define XML_isSPACECHAR(ch) ((ch==_T('\n'))||(ch==_T(' '))||(ch== _T('\t'))||(ch==_T('\r')))
  658. // private:
  659. char myTagCompare(XMLCSTR cclose, XMLCSTR copen)
  660. // !!!! WARNING strange convention&:
  661. // return 0 if equals
  662. // return 1 if different
  663. {
  664. if (!cclose) return 1;
  665. int l=(int)_tcslen(cclose);
  666. if (_tcsnicmp(cclose, copen, l)!=0) return 1;
  667. const XMLCHAR c=copen[l];
  668. if (XML_isSPACECHAR(c)||
  669. (c==_T('/' ))||
  670. (c==_T('<' ))||
  671. (c==_T('>' ))||
  672. (c==_T('=' ))) return 0;
  673. return 1;
  674. }
  675. // Obtain the next character from the string.
  676. static inline XMLCHAR getNextChar(XML *pXML)
  677. {
  678. XMLCHAR ch = pXML->lpXML[pXML->nIndex];
  679. #ifdef _XMLUNICODE
  680. if (ch!=0) pXML->nIndex++;
  681. #else
  682. pXML->nIndex+=XML_ByteTable[(unsigned char)ch];
  683. #endif
  684. return ch;
  685. }
  686. // Find the next token in a string.
  687. // pcbToken contains the number of characters that have been read.
  688. static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType)
  689. {
  690. NextToken result;
  691. XMLCHAR ch;
  692. XMLCHAR chTemp;
  693. int indexStart,nFoundMatch,nIsText=FALSE;
  694. result.pClr=NULL; // prevent warning
  695. // Find next non-white space character
  696. do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch);
  697. if (ch)
  698. {
  699. // Cache the current string pointer
  700. result.pStr = &pXML->lpXML[indexStart];
  701. // First check whether the token is in the clear tag list (meaning it
  702. // does not need formatting).
  703. ALLXMLClearTag *ctag=XMLClearTags;
  704. do
  705. {
  706. if (_tcsnicmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)==0)
  707. {
  708. result.pClr=ctag;
  709. pXML->nIndex+=ctag->openTagLen-1;
  710. *pType=eTokenClear;
  711. return result;
  712. }
  713. ctag++;
  714. } while(ctag->lpszOpen);
  715. // If we didn't find a clear tag then check for standard tokens
  716. switch(ch)
  717. {
  718. // Check for quotes
  719. case _T('\''):
  720. case _T('\"'):
  721. // Type of token
  722. *pType = eTokenQuotedText;
  723. chTemp = ch;
  724. // Set the size
  725. nFoundMatch = FALSE;
  726. // Search through the string to find a matching quote
  727. while((ch = getNextChar(pXML)))
  728. {
  729. if (ch==chTemp) { nFoundMatch = TRUE; break; }
  730. if (ch==_T('<')) break;
  731. }
  732. // If we failed to find a matching quote
  733. if (nFoundMatch == FALSE)
  734. {
  735. pXML->nIndex=indexStart+1;
  736. nIsText=TRUE;
  737. break;
  738. }
  739. // 4.02.2002
  740. // if (FindNonWhiteSpace(pXML)) pXML->nIndex--;
  741. break;
  742. // Equals (used with attribute values)
  743. case _T('='):
  744. *pType = eTokenEquals;
  745. break;
  746. // Close tag
  747. case _T('>'):
  748. *pType = eTokenCloseTag;
  749. break;
  750. // Check for tag start and tag end
  751. case _T('<'):
  752. // Peek at the next character to see if we have an end tag '</',
  753. // or an xml declaration '<?'
  754. chTemp = pXML->lpXML[pXML->nIndex];
  755. // If we have a tag end...
  756. if (chTemp == _T('/'))
  757. {
  758. // Set the type and ensure we point at the next character
  759. getNextChar(pXML);
  760. *pType = eTokenTagEnd;
  761. }
  762. // If we have an XML declaration tag
  763. else if (chTemp == _T('?'))
  764. {
  765. // Set the type and ensure we point at the next character
  766. getNextChar(pXML);
  767. *pType = eTokenDeclaration;
  768. }
  769. // Otherwise we must have a start tag
  770. else
  771. {
  772. *pType = eTokenTagStart;
  773. }
  774. break;
  775. // Check to see if we have a short hand type end tag ('/>').
  776. case _T('/'):
  777. // Peek at the next character to see if we have a short end tag '/>'
  778. chTemp = pXML->lpXML[pXML->nIndex];
  779. // If we have a short hand end tag...
  780. if (chTemp == _T('>'))
  781. {
  782. // Set the type and ensure we point at the next character
  783. getNextChar(pXML);
  784. *pType = eTokenShortHandClose;
  785. break;
  786. }
  787. // If we haven't found a short hand closing tag then drop into the
  788. // text process
  789. // Other characters
  790. default:
  791. nIsText = TRUE;
  792. }
  793. // If this is a TEXT node
  794. if (nIsText)
  795. {
  796. // Indicate we are dealing with text
  797. *pType = eTokenText;
  798. while((ch = getNextChar(pXML)))
  799. {
  800. if XML_isSPACECHAR(ch)
  801. {
  802. indexStart++; break;
  803. } else if (ch==_T('/'))
  804. {
  805. // If we find a slash then this maybe text or a short hand end tag
  806. // Peek at the next character to see it we have short hand end tag
  807. ch=pXML->lpXML[pXML->nIndex];
  808. // If we found a short hand end tag then we need to exit the loop
  809. if (ch==_T('>')) { pXML->nIndex--; break; }
  810. } else if ((ch==_T('<'))||(ch==_T('>'))||(ch==_T('=')))
  811. {
  812. pXML->nIndex--; break;
  813. }
  814. }
  815. }
  816. *pcbToken = pXML->nIndex-indexStart;
  817. } else
  818. {
  819. // If we failed to obtain a valid character
  820. *pcbToken = 0;
  821. *pType = eTokenError;
  822. result.pStr=NULL;
  823. }
  824. return result;
  825. }
  826. XMLCSTR XMLNode::updateName_WOSD(XMLCSTR lpszName)
  827. {
  828. if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName);
  829. d->lpszName=lpszName;
  830. return lpszName;
  831. }
  832. // private:
  833. XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; }
  834. XMLNode::XMLNode(XMLNodeData *pParent, XMLCSTR lpszName, char isDeclaration)
  835. {
  836. d=(XMLNodeData*)malloc(sizeof(XMLNodeData));
  837. d->ref_count=1;
  838. d->lpszName=NULL;
  839. d->nChild= 0;
  840. d->nText = 0;
  841. d->nClear = 0;
  842. d->nAttribute = 0;
  843. d->isDeclaration = isDeclaration;
  844. d->pParent = pParent;
  845. d->pChild= NULL;
  846. d->pText= NULL;
  847. d->pClear= NULL;
  848. d->pAttribute= NULL;
  849. d->pOrder= NULL;
  850. updateName_WOSD(lpszName);
  851. }
  852. XMLNode XMLNode::createXMLTopNode_WOSD(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); }
  853. XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); }
  854. #define MEMORYINCREASE 50
  855. static int memoryIncrease=0;
  856. static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem)
  857. {
  858. if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); }
  859. if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem);
  860. // if (!p)
  861. // {
  862. // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220);
  863. // }
  864. return p;
  865. }
  866. // private:
  867. int XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xtype)
  868. {
  869. if (index<0) return -1;
  870. int i=0,j=(int)((index<<2)+xtype),*o=d->pOrder; while (o[i]!=j) i++; return i;
  871. }
  872. // private:
  873. // update "order" information when deleting a content of a XMLNode
  874. int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
  875. {
  876. int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t);
  877. memmove(o+i, o+i+1, (n-i)*sizeof(int));
  878. for (;i<n;i++)
  879. if ((o[i]&3)==(int)t) o[i]-=4;
  880. // We should normally do:
  881. // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));
  882. // but we skip reallocation because it's too time consuming.
  883. // Anyway, at the end, it will be free'd completely at once.
  884. return i;
  885. }
  886. void *XMLNode::addToOrder(int *_pos, int nc, void *p, int size, XMLElementType xtype)
  887. {
  888. // in: *_pos is the position inside d->pOrder ("-1" means "EndOf")
  889. // out: *_pos is the index inside p
  890. p=myRealloc(p,(nc+1),memoryIncrease,size);
  891. int n=d->nChild+d->nText+d->nClear;
  892. d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int));
  893. int pos=*_pos,*o=d->pOrder;
  894. if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
  895. int i=pos;
  896. memmove(o+i+1, o+i, (n-i)*sizeof(int));
  897. while ((pos<n)&&((o[pos]&3)!=(int)xtype)) pos++;
  898. if (pos==n) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; }
  899. o[i]=o[pos];
  900. for (i=pos+1;i<=n;i++) if ((o[i]&3)==(int)xtype) o[i]+=4;
  901. *_pos=pos=o[pos]>>2;
  902. memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size);
  903. return p;
  904. }
  905. // Add a child node to the given element.
  906. XMLNode XMLNode::addChild_WOSD(XMLCSTR lpszName, char isDeclaration, int pos)
  907. {
  908. if (!lpszName) return emptyXMLNode;
  909. d->pChild=(XMLNode*)addToOrder(&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild);
  910. d->pChild[pos].d=NULL;
  911. d->pChild[pos]=XMLNode(d,lpszName,isDeclaration);
  912. d->nChild++;
  913. return d->pChild[pos];
  914. }
  915. // Add an attribute to an element.
  916. XMLAttribute *XMLNode::addAttribute_WOSD(XMLCSTR lpszName, XMLCSTR lpszValuev)
  917. {
  918. if (!lpszName) return &emptyXMLAttribute;
  919. int nc=d->nAttribute;
  920. d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute));
  921. XMLAttribute *pAttr=d->pAttribute+nc;
  922. pAttr->lpszName = lpszName;
  923. pAttr->lpszValue = lpszValuev;
  924. d->nAttribute++;
  925. return pAttr;
  926. }
  927. // Add text to the element.
  928. XMLCSTR XMLNode::addText_WOSD(XMLCSTR lpszValue, int pos)
  929. {
  930. if (!lpszValue) return NULL;
  931. d->pText=(XMLCSTR*)addToOrder(&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText);
  932. d->pText[pos]=lpszValue;
  933. d->nText++;
  934. return lpszValue;
  935. }
  936. // Add clear (unformatted) text to the element.
  937. XMLClear *XMLNode::addClear_WOSD(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos)
  938. {
  939. if (!lpszValue) return &emptyXMLClear;
  940. d->pClear=(XMLClear *)addToOrder(&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear);
  941. XMLClear *pNewClear=d->pClear+pos;
  942. pNewClear->lpszValue = lpszValue;
  943. pNewClear->lpszOpenTag = lpszOpen;
  944. pNewClear->lpszCloseTag = lpszClose;
  945. d->nClear++;
  946. return pNewClear;
  947. }
  948. // Trim the end of the text to remove white space characters.
  949. static void FindEndOfText(XMLCSTR lpszToken, int *pcbText)
  950. {
  951. XMLCHAR ch;
  952. int cbText;
  953. assert(lpszToken);
  954. assert(pcbText);
  955. cbText = (*pcbText)-1;
  956. for(;;)
  957. {
  958. assert(cbText >= 0);
  959. ch = lpszToken[cbText];
  960. if XML_isSPACECHAR(ch) cbText--;
  961. else { *pcbText = cbText+1; return; }
  962. }
  963. }
  964. // private:
  965. // Parse a clear (unformatted) type node.
  966. int XMLNode::ParseClearTag(void *px, void *pa)
  967. {
  968. XML *pXML=(XML *)px;
  969. ALLXMLClearTag *pClear=(ALLXMLClearTag *)pa;
  970. int cbTemp = 0;
  971. XMLCSTR lpszTemp;
  972. XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex];
  973. // Find the closing tag
  974. lpszTemp = _tcsstr(lpXML, pClear->lpszClose);
  975. // Iterate through the tokens until we find the closing tag.
  976. if (lpszTemp)
  977. {
  978. // Cache the size and increment the index
  979. cbTemp = (int)(lpszTemp - lpXML);
  980. pXML->nIndex += cbTemp+(int)_tcslen(pClear->lpszClose);
  981. // Add the clear node to the current element
  982. addClear_WOSD(stringDup(lpXML,cbTemp), pClear->lpszOpen, pClear->lpszClose);
  983. return TRUE;
  984. }
  985. // If we failed to find the end tag
  986. pXML->error = eXMLErrorUnmatchedEndClearTag;
  987. return FALSE;
  988. }
  989. void XMLNode::exactMemory(XMLNodeData *d)
  990. {
  991. if (memoryIncrease<=1) return;
  992. if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nAttribute+d->nText+d->nClear)*sizeof(int));
  993. if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode));
  994. if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute));
  995. if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR));
  996. if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear));
  997. }
  998. // private:
  999. // Recursively parse an XML element.
  1000. int XMLNode::ParseXMLElement(void *pa)
  1001. {
  1002. XML *pXML=(XML *)pa;
  1003. int cbToken;
  1004. enum XMLTokenTypeTag type;
  1005. NextToken token;
  1006. XMLCSTR lpszTemp=NULL;
  1007. int cbTemp=0;
  1008. char nDeclaration;
  1009. XMLCSTR lpszText=NULL;
  1010. XMLNode pNew;
  1011. enum Status status; // inside or outside a tag
  1012. enum Attrib attrib = eAttribName;
  1013. assert(pXML);
  1014. // If this is the first call to the function
  1015. if (pXML->nFirst)
  1016. {
  1017. // Assume we are outside of a tag definition
  1018. pXML->nFirst = FALSE;
  1019. status = eOutsideTag;
  1020. } else
  1021. {
  1022. // If this is not the first call then we should only be called when inside a tag.
  1023. status = eInsideTag;
  1024. }
  1025. // Iterate through the tokens in the document
  1026. for(;;)
  1027. {
  1028. // Obtain the next token
  1029. token = GetNextToken(pXML, &cbToken, &type);
  1030. if (type != eTokenError)
  1031. {
  1032. // Check the current status
  1033. switch(status)
  1034. {
  1035. // If we are outside of a tag definition
  1036. case eOutsideTag:
  1037. // Check what type of token we obtained
  1038. switch(type)
  1039. {
  1040. // If we have found text or quoted text
  1041. case eTokenText:
  1042. case eTokenCloseTag: /* '>' */
  1043. case eTokenShortHandClose: /* '/>' */
  1044. case eTokenQuotedText:
  1045. case eTokenEquals:
  1046. if (!lpszText)
  1047. {
  1048. lpszText = token.pStr;
  1049. }
  1050. break;
  1051. // If we found a start tag '<' and declarations '<?'
  1052. case eTokenTagStart:
  1053. case eTokenDeclaration:
  1054. // Cache whether this new element is a declaration or not
  1055. nDeclaration = (type == eTokenDeclaration);
  1056. // If we have node text then add this to the element
  1057. if (lpszText)
  1058. {
  1059. cbTemp = (int)(token.pStr - lpszText);
  1060. FindEndOfText(lpszText, &cbTemp);
  1061. lpszText=fromXMLString(lpszText,cbTemp,pXML);
  1062. if (!lpszText) return FALSE;
  1063. addText_WOSD(lpszText);
  1064. lpszText=NULL;
  1065. }
  1066. // Find the name of the tag
  1067. token = GetNextToken(pXML, &cbToken, &type);
  1068. // Return an error if we couldn't obtain the next token or
  1069. // it wasnt text
  1070. if (type != eTokenText)
  1071. {
  1072. pXML->error = eXMLErrorMissingTagName;
  1073. return FALSE;
  1074. }
  1075. // If we found a new element which is the same as this
  1076. // element then we need to pass this back to the caller..
  1077. #ifdef APPROXIMATE_PARSING
  1078. if (d->lpszName &&
  1079. myTagCompare(d->lpszName, token.pStr) == 0)
  1080. {
  1081. // Indicate to the caller that it needs to create a
  1082. // new element.
  1083. pXML->lpNewElement = token.pStr;
  1084. pXML->cbNewElement = cbToken;
  1085. return TRUE;
  1086. } else
  1087. #endif
  1088. {
  1089. // If the name of the new element differs from the name of
  1090. // the current element we need to add the new element to
  1091. // the current one and recurse
  1092. pNew = addChild_WOSD(stringDup(token.pStr,cbToken), nDeclaration);
  1093. while (!pNew.isEmpty())
  1094. {
  1095. // Callself to process the new node. If we return
  1096. // FALSE this means we dont have any more
  1097. // processing to do...
  1098. if (!pNew.ParseXMLElement(pXML)) return FALSE;
  1099. else
  1100. {
  1101. // If the call to recurse this function
  1102. // evented in a end tag specified in XML then
  1103. // we need to unwind the calls to this
  1104. // function until we find the appropriate node
  1105. // (the element name and end tag name must
  1106. // match)
  1107. if (pXML->cbEndTag)
  1108. {
  1109. // If we are back at the root node then we
  1110. // have an unmatched end tag
  1111. if (!d->lpszName)
  1112. {
  1113. pXML->error=eXMLErrorUnmatchedEndTag;
  1114. return FALSE;
  1115. }
  1116. // If the end tag matches the name of this
  1117. // element then we only need to unwind
  1118. // once more...
  1119. if (myTagCompare(d->lpszName, pXML->lpEndTag)==0)
  1120. {
  1121. pXML->cbEndTag = 0;
  1122. }
  1123. return TRUE;
  1124. } else
  1125. if (pXML->cbNewElement)
  1126. {
  1127. // If the call indicated a new element is to
  1128. // be created on THIS element.
  1129. // If the name of this element matches the
  1130. // name of the element we need to create
  1131. // then we need to return to the caller
  1132. // and let it process the element.
  1133. if (myTagCompare(d->lpszName, pXML->lpNewElement)==0)
  1134. {
  1135. return TRUE;
  1136. }
  1137. // Add the new element and recurse
  1138. pNew = addChild_WOSD(stringDup(pXML->lpNewElement,pXML->cbNewElement));
  1139. pXML->cbNewElement = 0;
  1140. }
  1141. else
  1142. {
  1143. // If we didn't have a new element to create
  1144. pNew = emptyXMLNode;
  1145. }
  1146. }
  1147. }
  1148. }
  1149. break;
  1150. // If we found an end tag
  1151. case eTokenTagEnd:
  1152. // If we have node text then add this to the element
  1153. if (lpszText)
  1154. {
  1155. cbTemp = (int)(token.pStr - lpszText);
  1156. FindEndOfText(lpszText, &cbTemp);
  1157. lpszText=fromXMLString(lpszText,cbTemp,pXML);
  1158. if (!lpszText) return FALSE;
  1159. addText_WOSD(lpszText);
  1160. lpszText = NULL;
  1161. }
  1162. // Find the name of the end tag
  1163. token = GetNextToken(pXML, &cbTemp, &type);
  1164. // The end tag should be text
  1165. if (type != eTokenText)
  1166. {
  1167. pXML->error = eXMLErrorMissingEndTagName;
  1168. return FALSE;
  1169. }
  1170. lpszTemp = token.pStr;
  1171. // After the end tag we should find a closing tag
  1172. token = GetNextToken(pXML, &cbToken, &type);
  1173. if (type != eTokenCloseTag)
  1174. {
  1175. pXML->error = eXMLErrorMissingEndTagName;
  1176. return FALSE;
  1177. }
  1178. // We need to return to the previous caller. If the name
  1179. // of the tag cannot be found we need to keep returning to
  1180. // caller until we find a match
  1181. if (myTagCompare(d->lpszName, lpszTemp) != 0)
  1182. #ifdef STRICT_PARSING
  1183. {
  1184. pXML->error=eXMLErrorUnmatchedEndTag;
  1185. pXML->nIndexMissigEndTag=pXML->nIndex;
  1186. return FALSE;
  1187. }
  1188. #else
  1189. {
  1190. pXML->error=eXMLErrorMissingEndTag;
  1191. pXML->nIndexMissigEndTag=pXML->nIndex;
  1192. pXML->lpEndTag = lpszTemp;
  1193. pXML->cbEndTag = cbTemp;
  1194. }
  1195. #endif
  1196. // Return to the caller
  1197. exactMemory(d);
  1198. return TRUE;
  1199. // If we found a clear (unformatted) token
  1200. case eTokenClear:
  1201. // If we have node text then add this to the element
  1202. if (lpszText)
  1203. {
  1204. cbTemp = (int)(token.pStr - lpszText);
  1205. FindEndOfText(lpszText, &cbTemp);
  1206. addText_WOSD(stringDup(lpszText,cbTemp));
  1207. lpszText = NULL;
  1208. }
  1209. if (!ParseClearTag(pXML, token.pClr))
  1210. {
  1211. return FALSE;
  1212. }
  1213. break;
  1214. default:
  1215. break;
  1216. }
  1217. break;
  1218. // If we are inside a tag definition we need to search for attributes
  1219. case eInsideTag:
  1220. // Check what part of the attribute (name, equals, value) we
  1221. // are looking for.
  1222. switch(attrib)
  1223. {
  1224. // If we are looking for a new attribute
  1225. case eAttribName:
  1226. // Check what the current token type is
  1227. switch(type)
  1228. {
  1229. // If the current type is text...
  1230. // Eg. 'attribute'
  1231. case eTokenText:
  1232. // Cache the token then indicate that we are next to
  1233. // look for the equals
  1234. lpszTemp = token.pStr;
  1235. cbTemp = cbToken;
  1236. attrib = eAttribEquals;
  1237. break;
  1238. // If we found a closing tag...
  1239. // Eg. '>'
  1240. case eTokenCloseTag:
  1241. // We are now outside the tag
  1242. status = eOutsideTag;
  1243. break;
  1244. // If we found a short hand '/>' closing tag then we can
  1245. // return to the caller
  1246. case eTokenShortHandClose:
  1247. exactMemory(d);
  1248. return TRUE;
  1249. // Errors...
  1250. case eTokenQuotedText

Large files files are truncated, but you can click here to view the full file