PageRenderTime 64ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/src/xmlParser.cpp

https://github.com/Plantain/XCSoar
C++ | 2088 lines | 1906 code | 87 blank | 95 comment | 161 complexity | 6cb0ec278345a9dba1f2b9f48139e680 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. ****************************************************************************
  3. * <P> XML.c - implementation file for basic XML parser written in ANSI C++
  4. * for portability. It works by using recursion and a node tree for breaking
  5. * down the elements of an XML document. </P>
  6. *
  7. * @version V1.08
  8. *
  9. * @author Frank Vanden Berghen
  10. * based on original implementation by Martyn C Brown
  11. *
  12. * NOTE:
  13. *
  14. * If you add "#define APPROXIMATE_PARSING", on the first line of this file
  15. * the parser will see the following XML-stream:
  16. * <data name="n1">
  17. * <data name="n2">
  18. * <data name="n3" />
  19. * as equivalent to the following XML-stream:
  20. * <data name="n1" />
  21. * <data name="n2" />
  22. * <data name="n3" />
  23. * This can be useful for badly-formed XML-streams but prevent the use
  24. * of the following XML-stream:
  25. * <data name="n1">
  26. * <data name="n2">
  27. * <data name="n3" />
  28. * </data>
  29. * </data>
  30. *
  31. *
  32. * This library is free software; you can redistribute it and/or
  33. * modify it under the terms of the GNU Lesser General Public
  34. * License version 2.1 as published by the Free Software Foundation
  35. *
  36. * This library is distributed in the hope that it will be useful,
  37. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  38. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  39. * Lesser General Public License for more details.
  40. *
  41. * You should have received a copy of the GNU Lesser General Public
  42. * License along with this library; if not, write to the Free Software
  43. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  44. *
  45. ****************************************************************************
  46. */
  47. #if defined(WIN32)
  48. #include <windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte
  49. // to handle unicode files
  50. #endif
  51. #include "xmlParser.h"
  52. #include "Compatibility/string.h"
  53. #include <assert.h>
  54. #include <memory.h>
  55. #include <stdio.h>
  56. #include <stdlib.h>
  57. bool XMLNode::GlobalError = false;
  58. XMLNode XMLNode::emptyXMLNode;
  59. XMLClear XMLNode::emptyXMLClear = { NULL, NULL, NULL };
  60. XMLAttribute XMLNode::emptyXMLAttribute = { NULL, NULL };
  61. inline int mmin(const int t1, const int t2) { return t1 < t2 ? t1 : t2; }
  62. // Enumeration used to decipher what type a token is
  63. typedef enum TokenTypeTag
  64. {
  65. eTokenText = 0,
  66. eTokenQuotedText,
  67. eTokenTagStart, /* "<" */
  68. eTokenTagEnd, /* "</" */
  69. eTokenCloseTag, /* ">" */
  70. eTokenEquals, /* "=" */
  71. eTokenDeclaration, /* "<?" */
  72. eTokenShortHandClose, /* "/>" */
  73. eTokenClear,
  74. eTokenError
  75. } TokenTypeTag;
  76. #define INDENTCHAR _T('\t')
  77. typedef struct ClearTag
  78. {
  79. LPCTSTR lpszOpen;
  80. LPCTSTR lpszClose;
  81. } ClearTag;
  82. // Main structure used for parsing XML
  83. typedef struct XML
  84. {
  85. LPCTSTR lpXML;
  86. int nIndex;
  87. enum XMLError error;
  88. LPCTSTR lpEndTag;
  89. int cbEndTag;
  90. LPCTSTR lpNewElement;
  91. int cbNewElement;
  92. int nFirst;
  93. ClearTag *pClrTags;
  94. } XML;
  95. typedef struct
  96. {
  97. ClearTag *pClr;
  98. LPCTSTR pStr;
  99. } NextToken;
  100. // Enumeration used when parsing attributes
  101. typedef enum Attrib
  102. {
  103. eAttribName = 0,
  104. eAttribEquals,
  105. eAttribValue
  106. } Attrib;
  107. // Enumeration used when parsing elements to dictate whether we are currently
  108. // inside a tag
  109. typedef enum Status
  110. {
  111. eInsideTag = 0,
  112. eOutsideTag
  113. } Status;
  114. static LPTSTR
  115. toXMLString(LPTSTR dest, LPCTSTR source)
  116. {
  117. LPTSTR dd = dest;
  118. while (*source) {
  119. switch (*source) {
  120. case '<':
  121. _tcscpy(dest, _T("&lt;"));
  122. dest += 4;
  123. break;
  124. case '>':
  125. _tcscpy(dest, _T("&gt;"));
  126. dest += 4;
  127. break;
  128. case '&':
  129. _tcscpy(dest, _T("&amp;"));
  130. dest += 5;
  131. break;
  132. case '\'':
  133. _tcscpy(dest, _T("&apos;"));
  134. dest += 6;
  135. break;
  136. case '"':
  137. _tcscpy(dest, _T("&quot;"));
  138. dest += 6;
  139. break;
  140. default:
  141. *dest = *source;
  142. dest++;
  143. break;
  144. }
  145. source++;
  146. }
  147. *dest = 0;
  148. return dd;
  149. }
  150. static int
  151. lengthXMLString(LPCTSTR source)
  152. {
  153. int r = 0;
  154. while (*source) {
  155. switch (*source) {
  156. case '<':
  157. r += 3;
  158. break;
  159. case '>':
  160. r += 3;
  161. break;
  162. case '&':
  163. r += 4;
  164. break;
  165. case '\'':
  166. r += 5;
  167. break;
  168. case '"':
  169. r += 5;
  170. break;
  171. }
  172. source++;
  173. r++;
  174. }
  175. return r;
  176. }
  177. LPTSTR
  178. toXMLString(LPCTSTR source)
  179. {
  180. LPTSTR dest = (LPTSTR)malloc((lengthXMLString(source) + 1) * sizeof(TCHAR));
  181. assert(dest);
  182. return toXMLString(dest, source);
  183. }
  184. LPTSTR
  185. toXMLStringFast(LPTSTR *dest, int *destSz, LPCTSTR source)
  186. {
  187. int l = lengthXMLString(source) + 1;
  188. if (l > *destSz) {
  189. *destSz = l;
  190. *dest = (LPTSTR)realloc(*dest, l * sizeof(TCHAR));
  191. }
  192. return toXMLString(*dest, source);
  193. }
  194. static LPTSTR
  195. fromXMLString(LPCTSTR s, int lo)
  196. {
  197. // This function is the opposite of the function "toXMLString". It decodes the escape
  198. // sequences &amp;, &quot;, &apos;, &lt;, &gt; and replace them by the characters
  199. // &,",',<,>. This function is used internally by the XML Parser. All the calls to
  200. // the XML library will always gives you back "decoded" strings.
  201. //
  202. // in: string (s) and length (lo) of string
  203. // out: new allocated string converted from xml
  204. if (!s)
  205. return NULL;
  206. int ll = 0;
  207. LPTSTR d;
  208. LPCTSTR ss = s;
  209. while (((lo--) > 0) && (*s)) {
  210. if (*s == _T('&')) {
  211. s++;
  212. if (_tcsnicmp(s, _T("lt;"), 3) == 0) {
  213. s += 2;
  214. lo -= 3;
  215. } else if (_tcsnicmp(s, _T("gt;"), 3) == 0) {
  216. s += 2;
  217. lo -= 3;
  218. } else if (_tcsnicmp(s, _T("amp;"), 4) == 0) {
  219. s += 3;
  220. lo -= 4;
  221. } else if (_tcsnicmp(s, _T("apos;"), 5) == 0) {
  222. s += 4;
  223. lo -= 5;
  224. } else if (_tcsnicmp(s, _T("quot;"), 5) == 0) {
  225. s += 4;
  226. lo -= 5;
  227. } else {
  228. ll = 0;
  229. while (s[ll] && (s[ll] != _T(';')) && (ll < 10))
  230. ll++;
  231. ll++;
  232. d = (LPTSTR)malloc((ll + 1) * sizeof(TCHAR));
  233. assert(d);
  234. d[ll] = 0;
  235. while (ll--)
  236. d[ll] = s[ll];
  237. #ifndef NDEBUG
  238. #ifdef _UNICODE
  239. printf("unknown escape character: '&%S'",d);
  240. #else
  241. printf("unknown escape character: '&%s'", d);
  242. #endif
  243. #endif
  244. free(d);
  245. XMLNode::GlobalError = true;
  246. return (LPTSTR)NULL;
  247. }
  248. }
  249. ll++;
  250. s++;
  251. }
  252. d = (LPTSTR)malloc((ll + 1) * sizeof(TCHAR));
  253. assert(d);
  254. TCHAR *result = d;
  255. while (ll--) {
  256. if (*ss == _T('&')) {
  257. ss++;
  258. if (_tcsnicmp(ss, _T("lt;" ), 3) == 0) {
  259. *(d++) = _T('<' );
  260. ss += 3;
  261. } else if (_tcsnicmp(ss, _T("gt;" ), 3) == 0) {
  262. *(d++) = _T('>' );
  263. ss += 3;
  264. } else if (_tcsnicmp(ss, _T("amp;" ), 4) == 0) {
  265. *(d++) = _T('&' );
  266. ss += 4;
  267. } else if (_tcsnicmp(ss, _T("apos;"), 5) == 0) {
  268. *(d++) = _T('\'');
  269. ss += 5;
  270. } else {
  271. *(d++) = _T('"' );
  272. ss += 5;
  273. }
  274. } else {
  275. *(d++) = *ss;
  276. ss++;
  277. }
  278. }
  279. *d = 0;
  280. return result;
  281. }
  282. static char
  283. myTagCompare(LPCTSTR cclose, LPCTSTR copen)
  284. {
  285. // !!!! WARNING strange convention&:
  286. // return 0 if equals
  287. // return 1 if different
  288. if (!cclose)
  289. return 1;
  290. int l = (int)_tcslen(cclose);
  291. if (_tcsnicmp(cclose, copen, l) != 0)
  292. return 1;
  293. const TCHAR c = copen[l];
  294. if ((c == _T('\n')) ||
  295. (c == _T(' ')) ||
  296. (c == _T('\t')) ||
  297. (c == _T('\r')) ||
  298. (c == _T('/')) ||
  299. (c == _T('<')) ||
  300. (c == _T('>')) ||
  301. (c == _T('=')))
  302. return 0;
  303. return 1;
  304. }
  305. // update "order" information when deleting a content of a XMLNode
  306. void
  307. XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index)
  308. {
  309. int j = (int)((index << 2) + t), i = 0, n = nElement(d) + 1, *o = d->pOrder;
  310. while ((o[i] != j) && (i < n))
  311. i++;
  312. n--;
  313. memmove(o + i, o + i + 1, (n - i) * sizeof(int));
  314. for (; i < n; i++)
  315. if ((o[i] & 3) == (int)t)
  316. o[i] -= 4;
  317. // We should normally do:
  318. // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int));
  319. // but we skip reallocation because it's too time consuming.
  320. // Anyway, at the end, it will be free'd completely at once.
  321. }
  322. // Obtain the next character from the string.
  323. static inline TCHAR
  324. getNextChar(XML *pXML)
  325. {
  326. TCHAR ch = pXML->lpXML[pXML->nIndex];
  327. if (ch != 0)
  328. pXML->nIndex++;
  329. return ch;
  330. }
  331. // Find next non-white space character.
  332. static TCHAR
  333. FindNonWhiteSpace(XML *pXML)
  334. {
  335. TCHAR ch = 0; // VENTA3 fix initialize
  336. int nExit = FALSE;
  337. assert(pXML);
  338. // Iterate through characters in the string until we find a NULL or a
  339. // non-white space character
  340. while ((nExit == FALSE) && ((ch = getNextChar(pXML)) != 0)) {
  341. switch (ch) {
  342. // Ignore white space
  343. case _T('\n'):
  344. case _T(' '):
  345. case _T('\t'):
  346. case _T('\r'):
  347. continue;
  348. default:
  349. nExit = TRUE;
  350. }
  351. }
  352. return ch;
  353. }
  354. // Find the next token in a string.
  355. // pcbToken contains the number of characters that have been read.
  356. static NextToken
  357. GetNextToken(XML *pXML, int *pcbToken, enum TokenTypeTag *pType)
  358. {
  359. NextToken result;
  360. LPCTSTR lpXML;
  361. TCHAR ch;
  362. TCHAR chTemp;
  363. int nSize;
  364. int nFoundMatch;
  365. int nExit;
  366. int n;
  367. LPCTSTR lpszOpen;
  368. int cbOpen;
  369. int nIsText = FALSE;
  370. result.pClr = NULL; // avoid compiler warning for uninitialised variable
  371. // Find next non-white space character
  372. ch = FindNonWhiteSpace(pXML);
  373. if (ch) {
  374. // Cache the current string pointer
  375. lpXML = pXML->lpXML;
  376. result.pStr = &lpXML[pXML->nIndex - 1];
  377. // First check whether the token is in the clear tag list (meaning it
  378. // does not need formatting).
  379. n = 0;
  380. while (TRUE) {
  381. // Obtain the name of the open part of the clear tag
  382. lpszOpen = pXML->pClrTags[n].lpszOpen;
  383. if (lpszOpen) {
  384. // Compare the open tag with the current token
  385. cbOpen = (int)_tcslen(lpszOpen);
  386. // if (myTagCompare(lpszOpen, result.pStr) == 0)
  387. if (_tcsnicmp(lpszOpen, result.pStr, cbOpen) == 0) {
  388. result.pClr = &pXML->pClrTags[n];
  389. pXML->nIndex += (int)(_tcslen(lpszOpen) - 1);
  390. *pType = eTokenClear;
  391. return result;
  392. }
  393. n++;
  394. } else
  395. break;
  396. }
  397. // If we didn't find a clear tag then check for standard tokens
  398. chTemp = 0;
  399. lpXML = pXML->lpXML;
  400. switch (ch) {
  401. // Check for quotes
  402. case _T('\''):
  403. case _T('\"'):
  404. // Type of token
  405. *pType = eTokenQuotedText;
  406. chTemp = ch;
  407. n = pXML->nIndex;
  408. // Set the size
  409. nSize = 1;
  410. nFoundMatch = FALSE;
  411. // Search through the string to find a matching quote
  412. while (((ch = getNextChar(pXML))) != 0) {
  413. nSize++;
  414. if (ch == chTemp) {
  415. nFoundMatch = TRUE;
  416. break;
  417. }
  418. if (ch == _T('<'))
  419. break;
  420. }
  421. // If we failed to find a matching quote
  422. if (nFoundMatch == FALSE) {
  423. pXML->nIndex = n - 1;
  424. ch = getNextChar(pXML);
  425. nIsText = TRUE;
  426. break;
  427. }
  428. // 4.02.2002
  429. if (FindNonWhiteSpace(pXML)) {
  430. pXML->nIndex--;
  431. }
  432. break;
  433. // Equals (used with attribute values)
  434. case _T('='):
  435. nSize = 1;
  436. *pType = eTokenEquals;
  437. break;
  438. // Close tag
  439. case _T('>'):
  440. nSize = 1;
  441. *pType = eTokenCloseTag;
  442. break;
  443. // Check for tag start and tag end
  444. case _T('<'):
  445. // Peek at the next character to see if we have an end tag '</',
  446. // or an xml declaration '<?'
  447. chTemp = pXML->lpXML[pXML->nIndex];
  448. // If we have a tag end...
  449. if (chTemp == _T('/')) {
  450. // Set the type and ensure we point at the next character
  451. getNextChar(pXML);
  452. *pType = eTokenTagEnd;
  453. nSize = 2;
  454. }
  455. // If we have an XML declaration tag
  456. else if (chTemp == _T('?')) {
  457. // Set the type and ensure we point at the next character
  458. getNextChar(pXML);
  459. *pType = eTokenDeclaration;
  460. nSize = 2;
  461. }
  462. // Otherwise we must have a start tag
  463. else {
  464. *pType = eTokenTagStart;
  465. nSize = 1;
  466. }
  467. break;
  468. // Check to see if we have a short hand type end tag ('/>').
  469. case _T('/'):
  470. // Peek at the next character to see if we have a short end tag '/>'
  471. chTemp = pXML->lpXML[pXML->nIndex];
  472. // If we have a short hand end tag...
  473. if (chTemp == _T('>')) {
  474. // Set the type and ensure we point at the next character
  475. getNextChar(pXML);
  476. *pType = eTokenShortHandClose;
  477. nSize = 2;
  478. break;
  479. }
  480. // If we haven't found a short hand closing tag then drop into the
  481. // text process
  482. // Other characters
  483. default:
  484. nIsText = TRUE;
  485. }
  486. // If this is a TEXT node
  487. if (nIsText) {
  488. // Indicate we are dealing with text
  489. *pType = eTokenText;
  490. nSize = 1;
  491. nExit = FALSE;
  492. while ((nExit == FALSE) && ((ch = getNextChar(pXML)) != 0)) {
  493. switch (ch) {
  494. // Break when we find white space
  495. case _T('\n'):
  496. case _T(' '):
  497. case _T('\t'):
  498. case _T('\r'):
  499. nExit = TRUE;
  500. break;
  501. // If we find a slash then this maybe text or a short hand end tag.
  502. case _T('/'):
  503. // Peek at the next character to see it we have short hand end tag
  504. chTemp = pXML->lpXML[pXML->nIndex];
  505. // If we found a short hand end tag then we need to exit the loop
  506. if (chTemp == _T('>')) {
  507. pXML->nIndex--; // 03.02.2002
  508. nExit = TRUE;
  509. } else {
  510. nSize++;
  511. }
  512. break;
  513. // Break when we find a terminator and decrement the index and
  514. // column count so that we are pointing at the right character
  515. // the next time we are called.
  516. case _T('<'):
  517. case _T('>'):
  518. case _T('='):
  519. pXML->nIndex--;
  520. nExit = TRUE;
  521. break;
  522. case 0:
  523. nExit = TRUE;
  524. break;
  525. default:
  526. nSize++;
  527. }
  528. }
  529. }
  530. *pcbToken = nSize;
  531. } else {
  532. // If we failed to obtain a valid character
  533. *pcbToken = 0;
  534. *pType = eTokenError;
  535. result.pStr = NULL;
  536. }
  537. return result;
  538. }
  539. // Parse XML errors into a user friendly string.
  540. LPCTSTR XMLNode::getError(XMLError error)
  541. {
  542. switch (error) {
  543. case eXMLErrorNone:
  544. return _T("No error");
  545. case eXMLErrorEmpty:
  546. return _T("No XML data");
  547. case eXMLErrorFirstNotStartTag:
  548. return _T("First token not start tag");
  549. case eXMLErrorMissingTagName:
  550. return _T("Missing start tag name");
  551. case eXMLErrorMissingEndTagName:
  552. return _T("Missing end tag name");
  553. case eXMLErrorNoMatchingQuote:
  554. return _T("Unmatched quote");
  555. case eXMLErrorUnmatchedEndTag:
  556. return _T("Unmatched end tag");
  557. case eXMLErrorUnexpectedToken:
  558. return _T("Unexpected token found");
  559. case eXMLErrorInvalidTag:
  560. return _T("Invalid tag found");
  561. case eXMLErrorNoElements:
  562. return _T("No elements found");
  563. case eXMLErrorFileNotFound:
  564. return _T("File not found");
  565. }
  566. return _T("Unknown");
  567. }
  568. XMLNode XMLNode::createRoot(LPCTSTR lpszName)
  569. {
  570. return XMLNode(NULL, lpszName, false);
  571. }
  572. XMLNode::XMLNode(XMLNode *pParent, LPCTSTR lpszName, int isDeclaration)
  573. {
  574. d = (XMLNodeData*)malloc(sizeof(XMLNodeData));
  575. assert(d);
  576. d->ref_count = 1;
  577. d->lpszName = lpszName;
  578. d->nChild = 0;
  579. d->nText = 0;
  580. d->nClear = 0;
  581. d->nAttribute = 0;
  582. d->isDeclaration = isDeclaration;
  583. d->pParent = pParent;
  584. d->pChild = NULL;
  585. d->pText = NULL;
  586. d->pClear = NULL;
  587. d->pAttribute = NULL;
  588. d->pOrder = NULL;
  589. }
  590. const int memoryIncrease = 50;
  591. static void *
  592. myRealloc(void *p, int newsize, int memInc, int sizeofElem)
  593. {
  594. int blocks = newsize / memInc + 1;
  595. if (p == NULL) {
  596. void* v = malloc(blocks * memInc * sizeofElem);
  597. assert(v);
  598. return v;
  599. }
  600. if ((newsize % memInc) == 0) {
  601. p = realloc(p, blocks * memInc * sizeofElem);
  602. assert(p);
  603. }
  604. return p;
  605. }
  606. void
  607. XMLNode::addToOrder(int index, int type)
  608. {
  609. int n = nElement();
  610. d->pOrder = (int*)myRealloc(d->pOrder, n + 1, memoryIncrease * 3, sizeof(int));
  611. assert(d->pOrder);
  612. d->pOrder[n] = (index << 2) + type;
  613. }
  614. // Add a child node to the given element.
  615. XMLNode
  616. XMLNode::AddChild(LPCTSTR lpszName, int isDeclaration)
  617. {
  618. if (!lpszName)
  619. return emptyXMLNode;
  620. int nc = d->nChild;
  621. d->pChild = (XMLNode*)myRealloc(d->pChild, (nc + 1), memoryIncrease,
  622. sizeof(XMLNode));
  623. assert(d->pChild);
  624. d->pChild[nc].d = NULL;
  625. d->pChild[nc] = XMLNode(this, lpszName, isDeclaration);
  626. addToOrder(nc, eNodeChild);
  627. d->nChild++;
  628. return d->pChild[nc];
  629. }
  630. // Add an attribute to an element.
  631. XMLAttribute *
  632. XMLNode::AddAttribute(LPCTSTR lpszName, LPCTSTR lpszValuev)
  633. {
  634. if (!lpszName)
  635. return &emptyXMLAttribute;
  636. int na = d->nAttribute;
  637. d->pAttribute = (XMLAttribute*)myRealloc(d->pAttribute, (na + 1),
  638. memoryIncrease, sizeof(XMLAttribute));
  639. XMLAttribute *pAttr = d->pAttribute + na;
  640. pAttr->lpszName = lpszName;
  641. pAttr->lpszValue = lpszValuev;
  642. addToOrder(na, eNodeAttribute);
  643. d->nAttribute++;
  644. return pAttr;
  645. }
  646. // Add text to the element.
  647. LPCTSTR XMLNode::AddText(LPCTSTR lpszValue)
  648. {
  649. if (!lpszValue)
  650. return NULL;
  651. int nt = d->nText;
  652. d->pText = (LPCTSTR*)myRealloc(d->pText, (nt + 1), memoryIncrease,
  653. sizeof(LPTSTR));
  654. d->pText[nt] = lpszValue;
  655. addToOrder(nt, eNodeText);
  656. d->nText++;
  657. return d->pText[nt];
  658. }
  659. // Add clear (unformatted) text to the element.
  660. XMLClear *XMLNode::AddClear(LPCTSTR lpszValue, LPCTSTR lpszOpen, LPCTSTR lpszClose)
  661. {
  662. if (!lpszValue)
  663. return &emptyXMLClear;
  664. int nc = d->nClear;
  665. d->pClear = (XMLClear *)myRealloc(d->pClear, (nc + 1), memoryIncrease,
  666. sizeof(XMLClear));
  667. XMLClear *pNewClear = d->pClear + nc;
  668. pNewClear->lpszValue = lpszValue;
  669. pNewClear->lpszOpenTag = lpszOpen;
  670. pNewClear->lpszCloseTag = lpszClose;
  671. addToOrder(nc, eNodeClear);
  672. d->nClear++;
  673. return pNewClear;
  674. }
  675. // Trim the end of the text to remove white space characters.
  676. static void
  677. FindEndOfText(LPCTSTR lpszToken, int *pcbText)
  678. {
  679. TCHAR ch;
  680. int cbText;
  681. assert(lpszToken);
  682. assert(pcbText);
  683. cbText = (*pcbText) - 1;
  684. while (1) {
  685. assert(cbText >= 0);
  686. ch = lpszToken[cbText];
  687. switch (ch) {
  688. case _T('\r'):
  689. case _T('\n'):
  690. case _T('\t'):
  691. case _T(' '):
  692. cbText--;
  693. break;
  694. default:
  695. *pcbText = cbText + 1;
  696. return;
  697. }
  698. }
  699. }
  700. // Duplicate a given string.
  701. LPTSTR
  702. stringDup(LPCTSTR lpszData, int cbData)
  703. {
  704. if (lpszData == NULL)
  705. return NULL;
  706. LPTSTR lpszNew;
  707. if (cbData == 0)
  708. cbData = (int)_tcslen(lpszData);
  709. lpszNew = (LPTSTR)malloc((cbData + 1) * sizeof(TCHAR));
  710. assert(lpszNew);
  711. if (lpszNew) {
  712. memcpy(lpszNew, lpszData, (cbData) * sizeof(TCHAR));
  713. lpszNew[cbData] = (TCHAR)NULL;
  714. }
  715. return lpszNew;
  716. }
  717. // Parse a clear (unformatted) type node.
  718. int
  719. XMLNode::ParseClearTag(void *px, void *pa)
  720. {
  721. XML *pXML = (XML *)px;
  722. ClearTag *pClear = (ClearTag *)pa;
  723. int cbTemp = 0;
  724. LPCTSTR lpszTemp;
  725. LPCTSTR lpszXML = &pXML->lpXML[pXML->nIndex];
  726. // Find the closing tag
  727. lpszTemp = _tcsstr(lpszXML, pClear->lpszClose);
  728. // Iterate through the tokens until we find the closing tag.
  729. if (lpszTemp) {
  730. // Cache the size and increment the index
  731. cbTemp = (int)(lpszTemp - lpszXML);
  732. pXML->nIndex += cbTemp;
  733. pXML->nIndex += (int)_tcslen(pClear->lpszClose);
  734. // Add the clear node to the current element
  735. AddClear(stringDup(lpszXML, cbTemp), pClear->lpszOpen, pClear->lpszClose);
  736. return TRUE;
  737. }
  738. // If we failed to find the end tag
  739. pXML->error = eXMLErrorUnmatchedEndTag;
  740. return FALSE;
  741. }
  742. // Recursively parse an XML element.
  743. int
  744. XMLNode::ParseXMLElement(void *pa)
  745. {
  746. XML *pXML = (XML *)pa;
  747. int cbToken;
  748. enum TokenTypeTag type;
  749. NextToken token;
  750. LPCTSTR lpszTemp = NULL;
  751. int cbTemp;
  752. int nDeclaration;
  753. LPCTSTR lpszText = NULL;
  754. XMLNode pNew;
  755. enum Status status; // inside or outside a tag
  756. enum Attrib attrib = eAttribName;
  757. assert(pXML);
  758. // If this is the first call to the function
  759. if (pXML->nFirst) {
  760. // Assume we are outside of a tag definition
  761. pXML->nFirst = FALSE;
  762. status = eOutsideTag;
  763. } else {
  764. // If this is not the first call then we should only be called when inside a tag.
  765. status = eInsideTag;
  766. }
  767. // Iterate through the tokens in the document
  768. while (TRUE) {
  769. // Obtain the next token
  770. token = GetNextToken(pXML, &cbToken, &type);
  771. if (type != eTokenError) {
  772. // Check the current status
  773. switch (status) {
  774. // If we are outside of a tag definition
  775. case eOutsideTag:
  776. // Check what type of token we obtained
  777. switch (type) {
  778. // If we have found text or quoted text
  779. case eTokenText:
  780. case eTokenQuotedText:
  781. case eTokenEquals:
  782. if (!lpszText)
  783. lpszText = token.pStr;
  784. break;
  785. // If we found a start tag '<' and declarations '<?'
  786. case eTokenTagStart:
  787. case eTokenDeclaration:
  788. // Cache whether this new element is a declaration or not
  789. nDeclaration = type == eTokenDeclaration;
  790. // If we have node text then add this to the element
  791. if (lpszText) {
  792. cbTemp = (int)(token.pStr - lpszText);
  793. FindEndOfText(lpszText, &cbTemp);
  794. AddText(stringDup(lpszText, cbTemp));
  795. lpszText = NULL;
  796. }
  797. // Find the name of the tag
  798. token = GetNextToken(pXML, &cbToken, &type);
  799. // Return an error if we couldn't obtain the next token or
  800. // it wasnt text
  801. if (type != eTokenText) {
  802. pXML->error = eXMLErrorMissingTagName;
  803. return FALSE;
  804. }
  805. // If we found a new element which is the same as this
  806. // element then we need to pass this back to the caller..
  807. #ifdef APPROXIMATE_PARSING
  808. if (d->lpszName && myTagCompare(d->lpszName, token.pStr) == 0) {
  809. // Indicate to the caller that it needs to create a
  810. // new element.
  811. pXML->lpNewElement = token.pStr;
  812. pXML->cbNewElement = cbToken;
  813. return TRUE;
  814. } else
  815. #endif
  816. {
  817. // If the name of the new element differs from the name of
  818. // the current element we need to add the new element to
  819. // the current one and recurse
  820. pNew = AddChild(stringDup(token.pStr, cbToken), nDeclaration);
  821. while (!pNew.isEmpty()) {
  822. // Callself to process the new node. If we return
  823. // FALSE this means we dont have any more
  824. // processing to do...
  825. if (!pNew.ParseXMLElement(pXML)) {
  826. d->pOrder = (int*)myRealloc(d->pOrder, nElement(),
  827. memoryIncrease * 3, sizeof(int));
  828. d->pChild = (XMLNode*)myRealloc(d->pChild, d->nChild,
  829. memoryIncrease, sizeof(XMLNode));
  830. if (d->nAttribute > 0)
  831. d->pAttribute = (XMLAttribute*)myRealloc(d->pAttribute,
  832. d->nAttribute,
  833. memoryIncrease,
  834. sizeof(XMLAttribute));
  835. if (d->nText > 0)
  836. d->pText = (LPCTSTR*)myRealloc(d->pText, d->nText,
  837. memoryIncrease, sizeof(LPTSTR));
  838. if (d->nClear > 0)
  839. d->pClear = (XMLClear *)myRealloc(d->pClear, d->nClear,
  840. memoryIncrease,
  841. sizeof(XMLClear));
  842. return FALSE;
  843. } else {
  844. // If the call to recurse this function
  845. // evented in a end tag specified in XML then
  846. // we need to unwind the calls to this
  847. // function until we find the appropriate node
  848. // (the element name and end tag name must
  849. // match)
  850. if (pXML->cbEndTag) {
  851. // If we are back at the root node then we
  852. // have an unmatched end tag
  853. if (!d->lpszName) {
  854. pXML->error = eXMLErrorUnmatchedEndTag;
  855. return FALSE;
  856. }
  857. // If the end tag matches the name of this
  858. // element then we only need to unwind
  859. // once more...
  860. if (myTagCompare(d->lpszName, pXML->lpEndTag) == 0) {
  861. pXML->cbEndTag = 0;
  862. }
  863. return TRUE;
  864. } else if (pXML->cbNewElement) {
  865. // If the call indicated a new element is to
  866. // be created on THIS element.
  867. // If the name of this element matches the
  868. // name of the element we need to create
  869. // then we need to return to the caller
  870. // and let it process the element.
  871. if (myTagCompare(d->lpszName, pXML->lpNewElement) == 0)
  872. return TRUE;
  873. // Add the new element and recurse
  874. pNew = AddChild(stringDup(pXML->lpNewElement,
  875. pXML->cbNewElement), FALSE);
  876. pXML->cbNewElement = 0;
  877. } else {
  878. // If we didn't have a new element to create
  879. pNew = emptyXMLNode;
  880. }
  881. }
  882. }
  883. }
  884. break;
  885. // If we found an end tag
  886. case eTokenTagEnd:
  887. // If we have node text then add this to the element
  888. if (lpszText) {
  889. cbTemp = (int)(token.pStr - lpszText);
  890. FindEndOfText(lpszText, &cbTemp);
  891. AddText(fromXMLString(lpszText, cbTemp));
  892. lpszText = NULL;
  893. }
  894. // Find the name of the end tag
  895. token = GetNextToken(pXML, &cbTemp, &type);
  896. // The end tag should be text
  897. if (type != eTokenText) {
  898. pXML->error = eXMLErrorMissingEndTagName;
  899. return FALSE;
  900. }
  901. lpszTemp = token.pStr;
  902. // After the end tag we should find a closing tag
  903. token = GetNextToken(pXML, &cbToken, &type);
  904. if (type != eTokenCloseTag) {
  905. pXML->error = eXMLErrorMissingEndTagName;
  906. return FALSE;
  907. }
  908. // We need to return to the previous caller. If the name
  909. // of the tag cannot be found we need to keep returning to
  910. // caller until we find a match
  911. if (myTagCompare(d->lpszName, lpszTemp) != 0) {
  912. pXML->lpEndTag = lpszTemp;
  913. pXML->cbEndTag = cbTemp;
  914. }
  915. // Return to the caller
  916. return TRUE;
  917. // If we found a clear (unformatted) token
  918. case eTokenClear:
  919. // If we have node text then add this to the element
  920. if (lpszText) {
  921. cbTemp = (int)(token.pStr - lpszText);
  922. FindEndOfText(lpszText, &cbTemp);
  923. AddText(stringDup(lpszText, cbTemp));
  924. lpszText = NULL;
  925. }
  926. if (!ParseClearTag(pXML, token.pClr)) {
  927. return FALSE;
  928. }
  929. break;
  930. // Errors...
  931. case eTokenCloseTag: /* '>' */
  932. case eTokenShortHandClose: /* '/>' */
  933. pXML->error = eXMLErrorUnexpectedToken;
  934. return FALSE;
  935. default:
  936. break;
  937. }
  938. break;
  939. // If we are inside a tag definition we need to search for attributes
  940. case eInsideTag:
  941. // Check what part of the attribute (name, equals, value) we
  942. // are looking for.
  943. switch (attrib) {
  944. // If we are looking for a new attribute
  945. case eAttribName:
  946. // Check what the current token type is
  947. switch (type) {
  948. // If the current type is text...
  949. // Eg. 'attribute'
  950. case eTokenText:
  951. // Cache the token then indicate that we are next to
  952. // look for the equals
  953. lpszTemp = token.pStr;
  954. cbTemp = cbToken;
  955. attrib = eAttribEquals;
  956. break;
  957. // If we found a closing tag...
  958. // Eg. '>'
  959. case eTokenCloseTag:
  960. // We are now outside the tag
  961. status = eOutsideTag;
  962. break;
  963. // If we found a short hand '/>' closing tag then we can
  964. // return to the caller
  965. case eTokenShortHandClose:
  966. return TRUE;
  967. // Errors...
  968. case eTokenQuotedText: /* '"SomeText"' */
  969. case eTokenTagStart: /* '<' */
  970. case eTokenTagEnd: /* '</' */
  971. case eTokenEquals: /* '=' */
  972. case eTokenDeclaration: /* '<?' */
  973. case eTokenClear:
  974. pXML->error = eXMLErrorUnexpectedToken;
  975. return FALSE;
  976. default:
  977. break;
  978. }
  979. break;
  980. // If we are looking for an equals
  981. case eAttribEquals:
  982. // Check what the current token type is
  983. switch (type) {
  984. // If the current type is text...
  985. // Eg. 'Attribute AnotherAttribute'
  986. case eTokenText:
  987. // Add the unvalued attribute to the list
  988. AddAttribute(stringDup(lpszTemp, cbTemp), NULL);
  989. // Cache the token then indicate. We are next to
  990. // look for the equals attribute
  991. lpszTemp = token.pStr;
  992. cbTemp = cbToken;
  993. break;
  994. // If we found a closing tag 'Attribute >' or a short hand
  995. // closing tag 'Attribute />'
  996. case eTokenShortHandClose:
  997. case eTokenCloseTag:
  998. // If we are a declaration element '<?' then we need
  999. // to remove extra closing '?' if it exists
  1000. if (d->isDeclaration && (lpszTemp[cbTemp - 1]) == _T('?'))
  1001. cbTemp--;
  1002. if (cbTemp)
  1003. // Add the unvalued attribute to the list
  1004. AddAttribute(stringDup(lpszTemp, cbTemp), NULL);
  1005. // If this is the end of the tag then return to the caller
  1006. if (type == eTokenShortHandClose)
  1007. return TRUE;
  1008. // We are now outside the tag
  1009. status = eOutsideTag;
  1010. break;
  1011. // If we found the equals token...
  1012. // Eg. 'Attribute ='
  1013. case eTokenEquals:
  1014. // Indicate that we next need to search for the value
  1015. // for the attribute
  1016. attrib = eAttribValue;
  1017. break;
  1018. // Errors...
  1019. case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/
  1020. case eTokenTagStart: /* 'Attribute <' */
  1021. case eTokenTagEnd: /* 'Attribute </' */
  1022. case eTokenDeclaration: /* 'Attribute <?' */
  1023. case eTokenClear:
  1024. pXML->error = eXMLErrorUnexpectedToken;
  1025. return FALSE;
  1026. default:
  1027. break;
  1028. }
  1029. break;
  1030. // If we are looking for an attribute value
  1031. case eAttribValue:
  1032. // Check what the current token type is
  1033. switch (type) {
  1034. // If the current type is text or quoted text...
  1035. // Eg. 'Attribute = "Value"' or 'Attribute = Value' or
  1036. // 'Attribute = 'Value''.
  1037. case eTokenText:
  1038. case eTokenQuotedText:
  1039. // If we are a declaration element '<?' then we need
  1040. // to remove extra closing '?' if it exists
  1041. if (d->isDeclaration && (token.pStr[cbToken - 1]) == _T('?')) {
  1042. cbToken--;
  1043. }
  1044. if (cbTemp) {
  1045. // Add the valued attribute to the list
  1046. if (type == eTokenQuotedText) {
  1047. token.pStr++;
  1048. cbToken -= 2;
  1049. }
  1050. AddAttribute(stringDup(lpszTemp, cbTemp), fromXMLString(
  1051. token.pStr, cbToken));
  1052. }
  1053. // Indicate we are searching for a new attribute
  1054. attrib = eAttribName;
  1055. break;
  1056. // Errors...
  1057. case eTokenTagStart: /* 'Attr = <' */
  1058. case eTokenTagEnd: /* 'Attr = </' */
  1059. case eTokenCloseTag: /* 'Attr = >' */
  1060. case eTokenShortHandClose: /* "Attr = />" */
  1061. case eTokenEquals: /* 'Attr = =' */
  1062. case eTokenDeclaration: /* 'Attr = <?' */
  1063. case eTokenClear:
  1064. pXML->error = eXMLErrorUnexpectedToken;
  1065. return FALSE;
  1066. break;
  1067. default:
  1068. break;
  1069. }
  1070. }
  1071. }
  1072. }
  1073. // If we failed to obtain the next token
  1074. else
  1075. return FALSE;
  1076. }
  1077. }
  1078. // Count the number of lines and columns in an XML string.
  1079. static void
  1080. CountLinesAndColumns(LPCTSTR lpXML, int nUpto, XMLResults *pResults)
  1081. {
  1082. TCHAR ch;
  1083. int n;
  1084. assert(lpXML);
  1085. assert(pResults);
  1086. pResults->nLine = 1;
  1087. pResults->nColumn = 1;
  1088. for (n = 0; n < nUpto; n++) {
  1089. ch = lpXML[n];
  1090. assert(ch);
  1091. if (ch == _T('\n')) {
  1092. pResults->nLine++;
  1093. pResults->nColumn = 1;
  1094. } else
  1095. pResults->nColumn++;
  1096. }
  1097. }
  1098. /**
  1099. * Parses the given XML String (lpszXML) and returns the main XMLNode
  1100. * @param lpszXML XML String
  1101. * @param tag (?)
  1102. * @param pResults XMLResult object to write in on error or success
  1103. * @return The main XMLNode or empty XMLNode on error
  1104. */
  1105. XMLNode
  1106. XMLNode::parseString(LPCTSTR lpszXML, XMLResults *pResults)
  1107. {
  1108. // If String is empty
  1109. if (!lpszXML) {
  1110. // If XMLResults object exists
  1111. if (pResults) {
  1112. // -> Save the error type
  1113. pResults->error = eXMLErrorNoElements;
  1114. pResults->nLine = 0;
  1115. pResults->nColumn = 0;
  1116. }
  1117. // -> Return empty XMLNode
  1118. return emptyXMLNode;
  1119. }
  1120. static struct ClearTag tags[] = {
  1121. { _T("<![CDATA["), _T("]]>") },
  1122. { _T("<PRE>"), _T("</PRE>") },
  1123. { _T("<Script>"), _T("</Script>") },
  1124. { _T("<!--"), _T("-->") },
  1125. { _T("<!DOCTYPE"), _T(">") },
  1126. { NULL, NULL }
  1127. };
  1128. enum XMLError error;
  1129. XMLNode xnode(NULL, NULL, FALSE);
  1130. struct XML xml = { NULL, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE, NULL };
  1131. xml.lpXML = lpszXML;
  1132. xml.pClrTags = tags;
  1133. // Fill the XMLNode xnode with the parsed data of xml
  1134. // note: xnode is now the document node, not the main XMLNode
  1135. xnode.ParseXMLElement(&xml);
  1136. error = xml.error;
  1137. // If the document node does not have childnodes
  1138. if (xnode.nChildNode() < 1) {
  1139. // If XMLResults object exists
  1140. if (pResults) {
  1141. // -> Save the error type
  1142. pResults->error = eXMLErrorNoElements;
  1143. pResults->nLine = 0;
  1144. pResults->nColumn = 0;
  1145. }
  1146. // -> Return empty XMLNode
  1147. return emptyXMLNode;
  1148. } else {
  1149. // Set the document's first childnode as new main node
  1150. xnode = xnode.getChildNode(0);
  1151. }
  1152. // If the new main node is the xml declaration
  1153. // -> try to take the first childnode again
  1154. if (xnode.isDeclaration()) {
  1155. // If the declaration does not have childnodes
  1156. if (xnode.nChildNode() < 1) {
  1157. // If XMLResults object exists
  1158. if (pResults) {
  1159. // -> Save the error type
  1160. pResults->error = eXMLErrorNoElements;
  1161. pResults->nLine = 0;
  1162. pResults->nColumn = 0;
  1163. }
  1164. // -> Return empty XMLNode
  1165. return emptyXMLNode;
  1166. } else {
  1167. // Set the declaration's first childnode as new main node
  1168. xnode = xnode.getChildNode(0);
  1169. }
  1170. }
  1171. // If error occurred -> set node to empty
  1172. if (error != eXMLErrorNone)
  1173. xnode = emptyXMLNode;
  1174. // If an XMLResults object exists
  1175. // -> save the result (error/success)
  1176. if (pResults) {
  1177. pResults->error = error;
  1178. // If we have an error
  1179. if (error != eXMLErrorNone) {
  1180. // Find which line and column it starts on and
  1181. // save it in the XMLResults object
  1182. CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults);
  1183. }
  1184. }
  1185. // Return the node (empty, main or child of main that equals tag)
  1186. return xnode;
  1187. }
  1188. /**
  1189. * Opens the file given by the filepath in lpszXML and returns the main node.
  1190. * (Includes error handling)
  1191. * @param filename Filepath to the XML file to parse
  1192. * @param tag (?)
  1193. * @param pResults Pointer to the XMLResults object to fill on error or success
  1194. * @return The main XMLNode or an empty node on error
  1195. */
  1196. XMLNode
  1197. XMLNode::parseFile(const char *filename, XMLResults *pResults)
  1198. {
  1199. // Open the file for reading
  1200. FILE *f = fopen(filename, "rb");
  1201. // If file can't be read
  1202. if (f == NULL) {
  1203. // If XMLResults object exists
  1204. if (pResults) {
  1205. // -> Save the error type into it
  1206. pResults->error = eXMLErrorFileNotFound;
  1207. pResults->nLine = 0;
  1208. pResults->nColumn = 0;
  1209. }
  1210. // -> Return empty XMLNode
  1211. return emptyXMLNode;
  1212. }
  1213. // Get filelength (l)
  1214. fseek(f, 0, SEEK_END);
  1215. int l = ftell(f);
  1216. // Read the whole(!) file into a buffer string
  1217. fseek(f, 0, SEEK_SET);
  1218. char *raw = new char[l + 1];
  1219. fread(raw, l, 1, f);
  1220. // Close the file
  1221. fclose(f);
  1222. TCHAR *text;
  1223. #ifdef _UNICODE
  1224. text = new TCHAR[l + 1];
  1225. l = MultiByteToWideChar(CP_ACP, // code page
  1226. MB_PRECOMPOSED, // character-type options
  1227. raw, // string to map
  1228. l, // number of bytes in string
  1229. text, // wide-character buffer
  1230. l); // size of buffer
  1231. delete[] raw;
  1232. if (l <= 0) {
  1233. /* conversion has failed */
  1234. delete[] text;
  1235. return emptyXMLNode;
  1236. }
  1237. #else
  1238. text = raw;
  1239. #endif
  1240. // Terminate the buffer string
  1241. text[l] = 0;
  1242. // Parse the string and get the main XMLNode
  1243. XMLNode x = parseString(text, pResults);
  1244. // Free the buffer memory
  1245. delete[] text;
  1246. // Return the main XMLNode
  1247. return x;
  1248. }
  1249. /**
  1250. * Opens the file given by the filepath in lpszXML and returns the main node.
  1251. * (Includes error handling)
  1252. * @param lpszXML Filepath to the XML file to parse
  1253. * @param tag (?)
  1254. * @return The main XMLNode
  1255. */
  1256. XMLNode
  1257. XMLNode::openFileHelper(const char *lpszXML)
  1258. {
  1259. XMLResults pResults;
  1260. XMLNode::GlobalError = false;
  1261. // Parse the file and get the main XMLNode
  1262. XMLNode xnode = XMLNode::parseFile(lpszXML, &pResults);
  1263. // If error appeared
  1264. if (pResults.error != eXMLErrorNone) {
  1265. // In debug mode -> Log error to stdout
  1266. #ifndef NDEBUG
  1267. printf("XML Parsing error inside file '%s'.\n"
  1268. #ifdef _UNICODE
  1269. "Error: %S\n"
  1270. #else
  1271. "Error: %s\n"
  1272. #endif
  1273. "At line %i, column %i.\n", lpszXML,
  1274. XMLNode::getError(pResults.error), pResults.nLine, pResults.nColumn);
  1275. #endif
  1276. // Remember Error
  1277. XMLNode::GlobalError = true;
  1278. }
  1279. // Return the parsed node or empty node on error
  1280. return xnode;
  1281. }
  1282. XMLNodeContents
  1283. XMLNode::enumContents(int i)
  1284. {
  1285. XMLNodeContents c;
  1286. if (!d) {
  1287. c.type = eNodeNULL;
  1288. return c;
  1289. }
  1290. c.type = (XMLElementType)(d->pOrder[i] & 3);
  1291. i = (d->pOrder[i]) >> 2;
  1292. switch (c.type) {
  1293. case eNodeChild:
  1294. c.child = d->pChild[i];
  1295. break;
  1296. case eNodeAttribute:
  1297. c.attrib = d->pAttribute[i];
  1298. break;
  1299. case eNodeText:
  1300. c.text = d->pText[i];
  1301. break;
  1302. case eNodeClear:
  1303. c.clear = d->pClear[i];
  1304. break;
  1305. default:
  1306. break;
  1307. }
  1308. return c;
  1309. }
  1310. void *
  1311. XMLNode::enumContent(XMLNodeData *pEntry, int i, XMLElementType *nodeType)
  1312. {
  1313. XMLElementType j = (XMLElementType)(pEntry->pOrder[i] & 3);
  1314. *nodeType = j;
  1315. i = (pEntry->pOrder[i]) >> 2;
  1316. switch (j) {
  1317. case eNodeChild:
  1318. return pEntry->pChild[i].d;
  1319. case eNodeAttribute:
  1320. return pEntry->pAttribute + i;
  1321. case eNodeText:
  1322. return (void*)(pEntry->pText[i]);
  1323. case eNodeClear:
  1324. return pEntry->pClear + i;
  1325. default:
  1326. break;
  1327. }
  1328. return NULL;
  1329. }
  1330. int
  1331. XMLNode::nElement(XMLNodeData *pEntry)
  1332. {
  1333. return pEntry->nChild + pEntry->nText + pEntry->nClear + pEntry->nAttribute;
  1334. }
  1335. static inline void
  1336. charmemset(LPTSTR dest, TCHAR c, int l)
  1337. {
  1338. while (l--)
  1339. *(dest++) = c;
  1340. }
  1341. // Creates an user friendly XML string from a given element with
  1342. // appropriate white space and carriage returns.
  1343. //
  1344. // This recurses through all subnodes then adds contents of the nodes to the
  1345. // string.
  1346. int
  1347. XMLNode::CreateXMLStringR(XMLNodeData *pEntry, LPTSTR lpszMarker, int nFormat)
  1348. {
  1349. int nResult = 0;
  1350. int cb;
  1351. int cbElement;
  1352. int nIndex;
  1353. int nChildFormat = -1;
  1354. int bHasChildren = FALSE;
  1355. int i;
  1356. XMLAttribute * pAttr;
  1357. assert(pEntry);
  1358. #define LENSTR(lpsz) (lpsz ? _tcslen(lpsz) : 0)
  1359. // If the element has no name then assume this is the head node.
  1360. cbElement = (int)LENSTR(pEntry->lpszName);
  1361. if (cbElement) {
  1362. // "<elementname "
  1363. cb = nFormat == -1 ? 0 : nFormat;
  1364. if (lpszMarker) {
  1365. if (cb)
  1366. charmemset(lpszMarker, INDENTCHAR, sizeof(TCHAR) * cb);
  1367. nResult = cb;
  1368. lpszMarker[nResult++] = _T('<');
  1369. if (pEntry->isDeclaration)
  1370. lpszMarker[nResult++] = _T('?');
  1371. _tcscpy(&lpszMarker[nResult], pEntry->lpszName);
  1372. nResult += cbElement;
  1373. lpszMarker[nResult++] = _T(' ');
  1374. } else {
  1375. nResult += cbElement + 2 + cb;
  1376. if (pEntry->isDeclaration)
  1377. nResult++;
  1378. }
  1379. // Enumerate attributes and add them to the string
  1380. nIndex = pEntry->nAttribute;
  1381. pAttr = pEntry->pAttribute;
  1382. for (i = 0; i < nIndex; i++) {
  1383. // "Attrib
  1384. cb = (int)LENSTR(pAttr->lpszName);
  1385. if (cb) {
  1386. if (lpszMarker)
  1387. _tcscpy(&lpszMarker[nResult], pAttr->lpszName);
  1388. nResult += cb;
  1389. // "Attrib=Value "
  1390. cb = (int)lengthXMLString(pAttr->lpszValue);
  1391. if (cb) {
  1392. if (lpszMarker) {
  1393. lpszMarker[nResult] = _T('=');
  1394. lpszMarker[nResult + 1] = _T('"');
  1395. toXMLString(&lpszMarker[nResult + 2], pAttr->lpszValue);
  1396. lpszMarker[nResult + cb + 2] = _T('"');
  1397. }
  1398. nResult += cb + 3;
  1399. }
  1400. if (lpszMarker)
  1401. lpszMarker[nResult] = _T(' ');
  1402. nResult++;
  1403. }
  1404. pAttr++;
  1405. }
  1406. bHasChildren = (pEntry->nAttribute != nElement(pEntry));
  1407. if (pEntry->isDeclaration) {
  1408. if (lpszMarker) {
  1409. lpszMarker[nResult - 1] = _T('?');
  1410. lpszMarker[nResult] = _T('>');
  1411. }
  1412. nResult++;
  1413. if (nFormat != -1) {
  1414. if (lpszMarker)
  1415. lpszMarker[nResult] = _T('\n');
  1416. nResult++;
  1417. }
  1418. } else
  1419. // If there are child nodes we need to terminate the start tag
  1420. if (bHasChildren) {
  1421. if (lpszMarker)
  1422. lpszMarker[nResult - 1] = _T('>');
  1423. if (nFormat != -1) {
  1424. if (lpszMarker)
  1425. lpszMarker[nResult] = _T('\n');
  1426. nResult++;
  1427. }
  1428. } else
  1429. nResult--;
  1430. }
  1431. // Calculate the child format for when we recurse. This is used to
  1432. // determine the number of spaces used for prefixes.
  1433. if (nFormat != -1) {
  1434. if (cbElement)
  1435. nChildFormat = nFormat + 1;
  1436. else
  1437. nChildFormat = nFormat;
  1438. }
  1439. // Enumerate through remaining children
  1440. nIndex = nElement(pEntry);
  1441. XMLElementType nodeType;
  1442. void *pChild;
  1443. for (i = 0; i < nIndex; i++) {
  1444. pChild = enumContent(pEntry, i, &nodeType);
  1445. switch (nodeType) {
  1446. // Text nodes
  1447. case eNodeText:
  1448. // "Text"
  1449. cb = (int)lengthXMLString((LPTSTR)pChild);
  1450. if (cb) {
  1451. if (nFormat != -1) {
  1452. if (lpszMarker) {
  1453. charmemset(&lpszMarker[nResult], INDENTCHAR,
  1454. sizeof(TCHAR) * (nFormat + 1));
  1455. toXMLString(&lpszMarker[nResult + nFormat + 1], (LPTSTR)pChild);
  1456. lpszMarker[nResult + nFormat + 1 + cb] = _T('\n');
  1457. }
  1458. nResult += cb + nFormat + 2;
  1459. } else {
  1460. if (lpszMarker)
  1461. toXMLString(&lpszMarker[nResult], (LPTSTR)pChild);
  1462. nResult += cb;
  1463. }
  1464. }
  1465. break;
  1466. // Clear type nodes
  1467. case eNodeClear:
  1468. // "OpenTag"
  1469. cb = (int)LENSTR(((XMLClear*)pChild)->lpszOpenTag);
  1470. if (cb) {
  1471. if (nFormat != -1) {
  1472. if (lpszMarker) {
  1473. charmemset(&lpszMarker[nResult], INDENTCHAR,
  1474. sizeof(TCHAR) * (nFormat + 1));
  1475. _tcscpy(&lpszMarker[nResult + nFormat + 1],
  1476. ((XMLClear*)pChild)->lpszOpenTag);
  1477. }
  1478. nResult += cb + nFormat + 1;
  1479. } else {
  1480. if (lpszMarker)
  1481. _tcscpy(&lpszMarker[nResult], ((XMLClear*)pChild)->lpszOpenTag);
  1482. nResult += cb;
  1483. }
  1484. }
  1485. // "OpenTag Value"
  1486. cb = (int)LENSTR(((XMLClear*)pChild)->lpszValue);
  1487. if (cb) {
  1488. if (lpszMarker)
  1489. _tcscpy(&lpszMarker[nResult], ((XMLClear*)pChild)->lpszValue);
  1490. nResult += cb;
  1491. }
  1492. // "OpenTag Value CloseTag"
  1493. cb = (int)LENSTR(((XMLClear*)pChild)->lpszCloseTag);
  1494. if (cb) {
  1495. if (lpszMarker)
  1496. _tcscpy(&lpszMarker[nResult], ((XMLClear*)pChild)->lpszCloseTag);
  1497. nResult += cb;
  1498. }
  1499. if (nFormat != -1) {
  1500. if (lpszMarker)
  1501. lpszMarker[nResult] = _T('\n');
  1502. nResult++;
  1503. }
  1504. break;
  1505. // Element nodes
  1506. case eNodeChild:
  1507. // Recursively add child nodes
  1508. nResult += CreateXMLStringR((XMLNodeData*)pChild,
  1509. lpszMarker ? lpszMarker + nResult : 0,
  1510. nChildFormat);
  1511. break;
  1512. default:
  1513. break;
  1514. }
  1515. }
  1516. if ((cbElement) && (!pEntry->isDeclaration)) {
  1517. // If we have child entries we need to use long XML notation for
  1518. // closing the element - "<elementname>blah blah blah</elementname>"
  1519. if (bHasChildren) {
  1520. // "</elementname>\0"
  1521. if (lpszMarker) {
  1522. if (nFormat != -1) {
  1523. if (nFormat) {
  1524. charmemset(&lpszMarker[nResult], INDENTCHAR, sizeof(TCHAR)
  1525. * nFormat);
  1526. nResult += nFormat;
  1527. }
  1528. }
  1529. _tcscpy(&lpszMarker[nResult], _T("</"));
  1530. nResult += 2;
  1531. _tcscpy(&lpszMarker[nResult], pEntry->lpszName);
  1532. nResult += cbElement;
  1533. if (nFormat == -1) {
  1534. _tcscpy(&lpszMarker[nResult], _T(">"));
  1535. nResult++;
  1536. } else {
  1537. _tcscpy(&lpszMarker[nResult], _T(">\n"));
  1538. nResult += 2;
  1539. }
  1540. } else {
  1541. if (nFormat != -1)
  1542. nResult += cbElement + 4 + nFormat;
  1543. else
  1544. nResult += cbElement + 3;
  1545. }
  1546. } else {
  1547. // If there are no children we can use shorthand XML notation -
  1548. // "<elementname/>"
  1549. // "/>\0"
  1550. if (lpszMarker) {
  1551. if (nFormat == -1) {
  1552. _tcscpy(&lpszMarker[nResult], _T("/>"));
  1553. nResult += 2;
  1554. } else {
  1555. _tcscpy(&lpszMarker[nResult], _T("/>\n"));
  1556. nResult += 3;
  1557. }
  1558. } else {
  1559. nResult += nFormat == -1 ? 2 : 3;
  1560. }
  1561. }
  1562. }
  1563. return nResult;
  1564. }
  1565. #undef LENSTR
  1566. // Create an XML string from the head element.
  1567. // @param XMLElement * pHead - head element
  1568. // @param int nFormat - 0 if no formatting is required
  1569. // otherwise nonzero for formatted text
  1570. // with carriage returns and indentation.
  1571. // @param int *pnSize - [out] pointer to the size of the
  1572. // returned string not including the
  1573. // NULL terminator.
  1574. //
  1575. // @return LPTSTR - Allocated XML string, you must free
  1576. // this with free().
  1577. LPTSTR
  1578. XMLNode::createXMLString(int nFormat, int *pnSize)
  1579. {
  1580. if (!d) {
  1581. if (pnSize)
  1582. *pnSize = 0;
  1583. return NULL;
  1584. }
  1585. LPTSTR lpszResult = NULL;
  1586. int cbStr;
  1587. // Recursively Calculate the size of the XML string
  1588. nFormat = nFormat ? 0 : -1;
  1589. cbStr = CreateXMLStringR(d, 0, nFormat);
  1590. assert(cbStr);
  1591. // Alllocate memory for the XML string + the NULL terminator and
  1592. // create the recursively XML string.
  1593. lpszResult = (LPTSTR)malloc((cbStr + 1) * sizeof(TCHAR));
  1594. assert(lpszResult);
  1595. CreateXMLStringR(d, lpszResult, nFormat);
  1596. if (pnSize)
  1597. *pnSize = cbStr;
  1598. return lpszResult;
  1599. }
  1600. XMLNode::~XMLNode()
  1601. {
  1602. destroyCurrentBuffer(d);
  1603. }
  1604. void
  1605. XMLNode::destroyCurrentBuffer(XMLNodeData *d)
  1606. {
  1607. if (!d)
  1608. return;
  1609. (d->ref_count)--;
  1610. if (d->ref_count == 0) {
  1611. int i = 0;
  1612. if (d->pParent) {
  1613. XMLNode *pa = d->pParent->d->pChild;
  1614. while (((void*)(pa[i].d)) != ((void*)d))
  1615. i++;
  1616. d->pParent->d->nChild--;
  1617. memmove(pa + i, pa + i + 1, (d->pParent->d->nChild - i) * sizeof(XMLNode));
  1618. removeOrderElement(d->pParent->d, eNodeChild, i);
  1619. }
  1620. for (i = 0; i < d->nChild; i++) {
  1621. d->pChild[i].d->pParent = NULL;
  1622. destroyCurrentBuffer(d->pChild[i].d);
  1623. }
  1624. free(d->pChild);
  1625. for (i = 0; i < d->nText; i++)
  1626. free((void*)d->pText[i]);
  1627. free(d->pText);
  1628. for (i = 0; i < d->nClear; i++)
  1629. free((void*)d->pClear[i].lpszValue);
  1630. free(d->pClear);
  1631. for (i = 0; i < d->nAttribute; i++) {
  1632. free((void*)d->pAttribute[i].lpszName);
  1633. if (d->pAttribute[i].lpszValue)
  1634. free((void*)d->pAttribute[i].lpszValue);
  1635. }
  1636. free(d->pAttribute);
  1637. free(d->pOrder);
  1638. free((void*)d->lpszName);
  1639. free(d);
  1640. }
  1641. }
  1642. XMLNode&
  1643. XMLNode::operator=(const XMLNode& A)
  1644. {
  1645. // shallow copy
  1646. if (this != &A) {
  1647. destroyCurrentBuffer(d);
  1648. d = A.d;
  1649. if (d)
  1650. (d->ref_count)++;
  1651. }
  1652. return *this;
  1653. }
  1654. XMLNode::XMLNode(const XMLNode &A)
  1655. {
  1656. // shallow copy
  1657. d = A.d;
  1658. if (d)
  1659. (d->ref_count)++;
  1660. }
  1661. int
  1662. XMLNode::nChildNode(LPCTSTR name) const
  1663. {
  1664. if (!d)
  1665. return 0;
  1666. int i, j = 0, n = d->nChild;
  1667. XMLNode *pc = d->pChild;
  1668. for (i = 0; i <

Large files files are truncated, but you can click here to view the full file