PageRenderTime 24ms CodeModel.GetById 24ms RepoModel.GetById 6ms app.codeStats 0ms

/branches/aleph-source/Expat/xmlwf/xmlwf.cpp

#
C++ | 657 lines | 604 code | 19 blank | 34 comment | 59 complexity | d648f6330266d8305c44bcc4b1a65ebd MD5 | raw file
Possible License(s): LGPL-2.0, LGPL-2.1, BSD-3-Clause, GPL-3.0, LGPL-3.0, MPL-2.0-no-copyleft-exception, Zlib, GPL-2.0
  1. /*
  2. The contents of this file are subject to the Mozilla Public License
  3. Version 1.1 (the "License"); you may not use this file except in
  4. compliance with the License. You may obtain a copy of the License at
  5. http://www.mozilla.org/MPL/
  6. Software distributed under the License is distributed on an "AS IS"
  7. basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
  8. License for the specific language governing rights and limitations
  9. under the License.
  10. The Original Code is expat.
  11. The Initial Developer of the Original Code is James Clark.
  12. Portions created by James Clark are Copyright (C) 1998, 1999
  13. James Clark. All Rights Reserved.
  14. Contributor(s):
  15. Alternatively, the contents of this file may be used under the terms
  16. of the GNU General Public License (the "GPL"), in which case the
  17. provisions of the GPL are applicable instead of those above. If you
  18. wish to allow use of your version of this file only under the terms of
  19. the GPL and not to allow others to use your version of this file under
  20. the MPL, indicate your decision by deleting the provisions above and
  21. replace them with the notice and other provisions required by the
  22. GPL. If you do not delete the provisions above, a recipient may use
  23. your version of this file under either the MPL or the GPL.
  24. */
  25. #include <stdio.h>
  26. #include <stdlib.h>
  27. #include <stddef.h>
  28. #include <string.h>
  29. #include "xmlparse.h"
  30. #include "codepage.h"
  31. #include "xmlfile.h"
  32. #include "xmltchar.h"
  33. #ifdef _MSC_VER
  34. #include <crtdbg.h>
  35. #endif
  36. #define NSSEP T('#')
  37. static void characterData(void *userData, const XML_Char *s, int len)
  38. {
  39. FILE *fp = userData;
  40. for (; len > 0; --len, ++s) {
  41. switch (*s) {
  42. case T('&'):
  43. fputts(T("&amp;"), fp);
  44. break;
  45. case T('<'):
  46. fputts(T("&lt;"), fp);
  47. break;
  48. case T('>'):
  49. fputts(T("&gt;"), fp);
  50. break;
  51. case T('"'):
  52. fputts(T("&quot;"), fp);
  53. break;
  54. case 9:
  55. case 10:
  56. case 13:
  57. ftprintf(fp, T("&#%d;"), *s);
  58. break;
  59. default:
  60. puttc(*s, fp);
  61. break;
  62. }
  63. }
  64. }
  65. /* Lexicographically comparing UTF-8 encoded attribute values,
  66. is equivalent to lexicographically comparing based on the character number. */
  67. static int attcmp(const void *att1, const void *att2)
  68. {
  69. return tcscmp(*(const XML_Char **)att1, *(const XML_Char **)att2);
  70. }
  71. static void startElement(void *userData, const XML_Char *name, const XML_Char **atts)
  72. {
  73. int nAtts;
  74. const XML_Char **p;
  75. FILE *fp = userData;
  76. puttc(T('<'), fp);
  77. fputts(name, fp);
  78. p = atts;
  79. while (*p)
  80. ++p;
  81. nAtts = (p - atts) >> 1;
  82. if (nAtts > 1)
  83. qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
  84. while (*atts) {
  85. puttc(T(' '), fp);
  86. fputts(*atts++, fp);
  87. puttc(T('='), fp);
  88. puttc(T('"'), fp);
  89. characterData(userData, *atts, tcslen(*atts));
  90. puttc(T('"'), fp);
  91. atts++;
  92. }
  93. puttc(T('>'), fp);
  94. }
  95. static void endElement(void *userData, const XML_Char *name)
  96. {
  97. FILE *fp = userData;
  98. puttc(T('<'), fp);
  99. puttc(T('/'), fp);
  100. fputts(name, fp);
  101. puttc(T('>'), fp);
  102. }
  103. static void startElementNS(void *userData, const XML_Char *name, const XML_Char **atts)
  104. {
  105. int nAtts;
  106. int nsi;
  107. const XML_Char **p;
  108. FILE *fp = userData;
  109. const XML_Char *sep;
  110. puttc(T('<'), fp);
  111. sep = tcsrchr(name, NSSEP);
  112. if (sep) {
  113. fputts(T("ns0:"), fp);
  114. fputts(sep + 1, fp);
  115. fputts(T(" xmlns:ns0=\""), fp);
  116. characterData(userData, name, sep - name);
  117. puttc(T('"'), fp);
  118. nsi = 1;
  119. }
  120. else {
  121. fputts(name, fp);
  122. nsi = 0;
  123. }
  124. p = atts;
  125. while (*p)
  126. ++p;
  127. nAtts = (p - atts) >> 1;
  128. if (nAtts > 1)
  129. qsort((void *)atts, nAtts, sizeof(XML_Char *) * 2, attcmp);
  130. while (*atts) {
  131. name = *atts++;
  132. sep = tcsrchr(name, NSSEP);
  133. if (sep) {
  134. ftprintf(fp, T(" xmlns:ns%d=\""), nsi);
  135. characterData(userData, name, sep - name);
  136. puttc(T('"'), fp);
  137. name = sep + 1;
  138. ftprintf(fp, T(" ns%d:"), nsi++);
  139. }
  140. else
  141. puttc(T(' '), fp);
  142. fputts(name, fp);
  143. puttc(T('='), fp);
  144. puttc(T('"'), fp);
  145. characterData(userData, *atts, tcslen(*atts));
  146. puttc(T('"'), fp);
  147. atts++;
  148. }
  149. puttc(T('>'), fp);
  150. }
  151. static void endElementNS(void *userData, const XML_Char *name)
  152. {
  153. FILE *fp = userData;
  154. const XML_Char *sep;
  155. puttc(T('<'), fp);
  156. puttc(T('/'), fp);
  157. sep = tcsrchr(name, NSSEP);
  158. if (sep) {
  159. fputts(T("ns0:"), fp);
  160. fputts(sep + 1, fp);
  161. }
  162. else
  163. fputts(name, fp);
  164. puttc(T('>'), fp);
  165. }
  166. static void processingInstruction(void *userData, const XML_Char *target, const XML_Char *data)
  167. {
  168. FILE *fp = userData;
  169. puttc(T('<'), fp);
  170. puttc(T('?'), fp);
  171. fputts(target, fp);
  172. puttc(T(' '), fp);
  173. fputts(data, fp);
  174. puttc(T('?'), fp);
  175. puttc(T('>'), fp);
  176. }
  177. static void defaultCharacterData(XML_Parser parser, const XML_Char *s, int len)
  178. {
  179. XML_DefaultCurrent(parser);
  180. }
  181. static void defaultStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts)
  182. {
  183. XML_DefaultCurrent(parser);
  184. }
  185. static void defaultEndElement(XML_Parser parser, const XML_Char *name)
  186. {
  187. XML_DefaultCurrent(parser);
  188. }
  189. static void defaultProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data)
  190. {
  191. XML_DefaultCurrent(parser);
  192. }
  193. static void nopCharacterData(XML_Parser parser, const XML_Char *s, int len)
  194. {
  195. }
  196. static void nopStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts)
  197. {
  198. }
  199. static void nopEndElement(XML_Parser parser, const XML_Char *name)
  200. {
  201. }
  202. static void nopProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data)
  203. {
  204. }
  205. static void markup(XML_Parser parser, const XML_Char *s, int len)
  206. {
  207. FILE *fp = XML_GetUserData(parser);
  208. for (; len > 0; --len, ++s)
  209. puttc(*s, fp);
  210. }
  211. static
  212. void metaLocation(XML_Parser parser)
  213. {
  214. const XML_Char *uri = XML_GetBase(parser);
  215. if (uri)
  216. ftprintf(XML_GetUserData(parser), T(" uri=\"%s\""), uri);
  217. ftprintf(XML_GetUserData(parser),
  218. T(" byte=\"%ld\" nbytes=\"%d\" line=\"%d\" col=\"%d\""),
  219. XML_GetCurrentByteIndex(parser),
  220. XML_GetCurrentByteCount(parser),
  221. XML_GetCurrentLineNumber(parser),
  222. XML_GetCurrentColumnNumber(parser));
  223. }
  224. static
  225. void metaStartDocument(XML_Parser parser)
  226. {
  227. fputts(T("<document>\n"), XML_GetUserData(parser));
  228. }
  229. static
  230. void metaEndDocument(XML_Parser parser)
  231. {
  232. fputts(T("</document>\n"), XML_GetUserData(parser));
  233. }
  234. static
  235. void metaStartElement(XML_Parser parser, const XML_Char *name, const XML_Char **atts)
  236. {
  237. FILE *fp = XML_GetUserData(parser);
  238. const XML_Char **specifiedAttsEnd
  239. = atts + 2*XML_GetSpecifiedAttributeCount(parser);
  240. ftprintf(fp, T("<starttag name=\"%s\""), name);
  241. metaLocation(parser);
  242. if (*atts) {
  243. fputts(T(">\n"), fp);
  244. do {
  245. ftprintf(fp, T("<attribute name=\"%s\" value=\""), atts[0]);
  246. characterData(fp, atts[1], tcslen(atts[1]));
  247. if (atts >= specifiedAttsEnd)
  248. fputs(T("\" defaulted=\"yes\"/>\n"), fp);
  249. else
  250. fputts(T("\"/>\n"), fp);
  251. } while (*(atts += 2));
  252. fputts(T("</starttag>\n"), fp);
  253. }
  254. else
  255. fputts(T("/>\n"), fp);
  256. }
  257. static
  258. void metaEndElement(XML_Parser parser, const XML_Char *name)
  259. {
  260. FILE *fp = XML_GetUserData(parser);
  261. ftprintf(fp, T("<endtag name=\"%s\""), name);
  262. metaLocation(parser);
  263. fputts(T("/>\n"), fp);
  264. }
  265. static
  266. void metaProcessingInstruction(XML_Parser parser, const XML_Char *target, const XML_Char *data)
  267. {
  268. FILE *fp = XML_GetUserData(parser);
  269. ftprintf(fp, T("<pi target=\"%s\" data=\""), target);
  270. characterData(fp, data, tcslen(data));
  271. puttc(T('"'), fp);
  272. metaLocation(parser);
  273. fputts(T("/>\n"), fp);
  274. }
  275. static
  276. void metaComment(XML_Parser parser, const XML_Char *data)
  277. {
  278. FILE *fp = XML_GetUserData(parser);
  279. fputts(T("<comment data=\""), fp);
  280. characterData(fp, data, tcslen(data));
  281. puttc(T('"'), fp);
  282. metaLocation(parser);
  283. fputts(T("/>\n"), fp);
  284. }
  285. static
  286. void metaStartCdataSection(XML_Parser parser)
  287. {
  288. FILE *fp = XML_GetUserData(parser);
  289. fputts(T("<startcdata"), fp);
  290. metaLocation(parser);
  291. fputts(T("/>\n"), fp);
  292. }
  293. static
  294. void metaEndCdataSection(XML_Parser parser)
  295. {
  296. FILE *fp = XML_GetUserData(parser);
  297. fputts(T("<endcdata"), fp);
  298. metaLocation(parser);
  299. fputts(T("/>\n"), fp);
  300. }
  301. static
  302. void metaCharacterData(XML_Parser parser, const XML_Char *s, int len)
  303. {
  304. FILE *fp = XML_GetUserData(parser);
  305. fputts(T("<chars str=\""), fp);
  306. characterData(fp, s, len);
  307. puttc(T('"'), fp);
  308. metaLocation(parser);
  309. fputts(T("/>\n"), fp);
  310. }
  311. static
  312. void metaUnparsedEntityDecl(XML_Parser parser,
  313. const XML_Char *entityName,
  314. const XML_Char *base,
  315. const XML_Char *systemId,
  316. const XML_Char *publicId,
  317. const XML_Char *notationName)
  318. {
  319. FILE *fp = XML_GetUserData(parser);
  320. ftprintf(fp, T("<entity name=\"%s\""), entityName);
  321. if (publicId)
  322. ftprintf(fp, T(" public=\"%s\""), publicId);
  323. fputts(T(" system=\""), fp);
  324. characterData(fp, systemId, tcslen(systemId));
  325. puttc(T('"'), fp);
  326. ftprintf(fp, T(" notation=\"%s\""), notationName);
  327. metaLocation(parser);
  328. fputts(T("/>\n"), fp);
  329. }
  330. static
  331. void metaNotationDecl(XML_Parser parser,
  332. const XML_Char *notationName,
  333. const XML_Char *base,
  334. const XML_Char *systemId,
  335. const XML_Char *publicId)
  336. {
  337. FILE *fp = XML_GetUserData(parser);
  338. ftprintf(fp, T("<notation name=\"%s\""), notationName);
  339. if (publicId)
  340. ftprintf(fp, T(" public=\"%s\""), publicId);
  341. if (systemId) {
  342. fputts(T(" system=\""), fp);
  343. characterData(fp, systemId, tcslen(systemId));
  344. puttc(T('"'), fp);
  345. }
  346. metaLocation(parser);
  347. fputts(T("/>\n"), fp);
  348. }
  349. static
  350. void metaStartNamespaceDecl(XML_Parser parser,
  351. const XML_Char *prefix,
  352. const XML_Char *uri)
  353. {
  354. FILE *fp = XML_GetUserData(parser);
  355. fputts(T("<startns"), fp);
  356. if (prefix)
  357. ftprintf(fp, T(" prefix=\"%s\""), prefix);
  358. if (uri) {
  359. fputts(T(" ns=\""), fp);
  360. characterData(fp, uri, tcslen(uri));
  361. fputts(T("\"/>\n"), fp);
  362. }
  363. else
  364. fputts(T("/>\n"), fp);
  365. }
  366. static
  367. void metaEndNamespaceDecl(XML_Parser parser, const XML_Char *prefix)
  368. {
  369. FILE *fp = XML_GetUserData(parser);
  370. if (!prefix)
  371. fputts(T("<endns/>\n"), fp);
  372. else
  373. ftprintf(fp, T("<endns prefix=\"%s\"/>\n"), prefix);
  374. }
  375. static
  376. int unknownEncodingConvert(void *data, const char *p)
  377. {
  378. return codepageConvert(*(int *)data, p);
  379. }
  380. static
  381. int unknownEncoding(void *userData,
  382. const XML_Char *name,
  383. XML_Encoding *info)
  384. {
  385. int cp;
  386. static const XML_Char prefixL[] = T("windows-");
  387. static const XML_Char prefixU[] = T("WINDOWS-");
  388. int i;
  389. for (i = 0; prefixU[i]; i++)
  390. if (name[i] != prefixU[i] && name[i] != prefixL[i])
  391. return 0;
  392. cp = 0;
  393. for (; name[i]; i++) {
  394. static const XML_Char digits[] = T("0123456789");
  395. const XML_Char *s = tcschr(digits, name[i]);
  396. if (!s)
  397. return 0;
  398. cp *= 10;
  399. cp += s - digits;
  400. if (cp >= 0x10000)
  401. return 0;
  402. }
  403. if (!codepageMap(cp, info->map))
  404. return 0;
  405. info->convert = unknownEncodingConvert;
  406. /* We could just cast the code page integer to a void *,
  407. and avoid the use of release. */
  408. info->release = free;
  409. info->data = malloc(sizeof(int));
  410. if (!info->data)
  411. return 0;
  412. *(int *)info->data = cp;
  413. return 1;
  414. }
  415. static
  416. int notStandalone(void *userData)
  417. {
  418. return 0;
  419. }
  420. static
  421. void usage(const XML_Char *prog)
  422. {
  423. ftprintf(stderr, T("usage: %s [-n] [-r] [-s] [-w] [-x] [-d output-dir] [-e encoding] file ...\n"), prog);
  424. exit(1);
  425. }
  426. int tmain(int argc, XML_Char **argv)
  427. {
  428. int i, j;
  429. const XML_Char *outputDir = 0;
  430. const XML_Char *encoding = 0;
  431. unsigned processFlags = XML_MAP_FILE;
  432. int windowsCodePages = 0;
  433. int outputType = 0;
  434. int useNamespaces = 0;
  435. int requireStandalone = 0;
  436. #ifdef _MSC_VER
  437. _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
  438. #endif
  439. i = 1;
  440. j = 0;
  441. while (i < argc) {
  442. if (j == 0) {
  443. if (argv[i][0] != T('-'))
  444. break;
  445. if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
  446. i++;
  447. break;
  448. }
  449. j++;
  450. }
  451. switch (argv[i][j]) {
  452. case T('r'):
  453. processFlags &= ~XML_MAP_FILE;
  454. j++;
  455. break;
  456. case T('s'):
  457. requireStandalone = 1;
  458. j++;
  459. break;
  460. case T('n'):
  461. useNamespaces = 1;
  462. j++;
  463. break;
  464. case T('x'):
  465. processFlags |= XML_EXTERNAL_ENTITIES;
  466. j++;
  467. break;
  468. case T('w'):
  469. windowsCodePages = 1;
  470. j++;
  471. break;
  472. case T('m'):
  473. outputType = 'm';
  474. j++;
  475. break;
  476. case T('c'):
  477. outputType = 'c';
  478. useNamespaces = 0;
  479. j++;
  480. break;
  481. case T('t'):
  482. outputType = 't';
  483. j++;
  484. break;
  485. case T('d'):
  486. if (argv[i][j + 1] == T('\0')) {
  487. if (++i == argc)
  488. usage(argv[0]);
  489. outputDir = argv[i];
  490. }
  491. else
  492. outputDir = argv[i] + j + 1;
  493. i++;
  494. j = 0;
  495. break;
  496. case T('e'):
  497. if (argv[i][j + 1] == T('\0')) {
  498. if (++i == argc)
  499. usage(argv[0]);
  500. encoding = argv[i];
  501. }
  502. else
  503. encoding = argv[i] + j + 1;
  504. i++;
  505. j = 0;
  506. break;
  507. case T('\0'):
  508. if (j > 1) {
  509. i++;
  510. j = 0;
  511. break;
  512. }
  513. /* fall through */
  514. default:
  515. usage(argv[0]);
  516. }
  517. }
  518. if (i == argc)
  519. usage(argv[0]);
  520. for (; i < argc; i++) {
  521. FILE *fp = 0;
  522. XML_Char *outName = 0;
  523. int result;
  524. XML_Parser parser;
  525. if (useNamespaces)
  526. parser = XML_ParserCreateNS(encoding, NSSEP);
  527. else
  528. parser = XML_ParserCreate(encoding);
  529. if (requireStandalone)
  530. XML_SetNotStandaloneHandler(parser, notStandalone);
  531. if (outputType == 't') {
  532. /* This is for doing timings; this gives a more realistic estimate of
  533. the parsing time. */
  534. outputDir = 0;
  535. XML_SetElementHandler(parser, nopStartElement, nopEndElement);
  536. XML_SetCharacterDataHandler(parser, nopCharacterData);
  537. XML_SetProcessingInstructionHandler(parser, nopProcessingInstruction);
  538. }
  539. else if (outputDir) {
  540. const XML_Char *file = argv[i];
  541. if (tcsrchr(file, T('/')))
  542. file = tcsrchr(file, T('/')) + 1;
  543. #ifdef WIN32
  544. if (tcsrchr(file, T('\\')))
  545. file = tcsrchr(file, T('\\')) + 1;
  546. #endif
  547. outName = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char));
  548. tcscpy(outName, outputDir);
  549. tcscat(outName, T("/"));
  550. tcscat(outName, file);
  551. fp = tfopen(outName, T("wb"));
  552. if (!fp) {
  553. tperror(outName);
  554. exit(1);
  555. }
  556. setvbuf(fp, NULL, _IOFBF, 16384);
  557. #ifdef XML_UNICODE
  558. puttc(0xFEFF, fp);
  559. #endif
  560. XML_SetUserData(parser, fp);
  561. switch (outputType) {
  562. case 'm':
  563. XML_UseParserAsHandlerArg(parser);
  564. XML_SetElementHandler(parser, metaStartElement, metaEndElement);
  565. XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
  566. XML_SetCommentHandler(parser, metaComment);
  567. XML_SetCdataSectionHandler(parser, metaStartCdataSection, metaEndCdataSection);
  568. XML_SetCharacterDataHandler(parser, metaCharacterData);
  569. XML_SetUnparsedEntityDeclHandler(parser, metaUnparsedEntityDecl);
  570. XML_SetNotationDeclHandler(parser, metaNotationDecl);
  571. XML_SetNamespaceDeclHandler(parser, metaStartNamespaceDecl, metaEndNamespaceDecl);
  572. metaStartDocument(parser);
  573. break;
  574. case 'c':
  575. XML_UseParserAsHandlerArg(parser);
  576. XML_SetDefaultHandler(parser, markup);
  577. XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
  578. XML_SetCharacterDataHandler(parser, defaultCharacterData);
  579. XML_SetProcessingInstructionHandler(parser, defaultProcessingInstruction);
  580. break;
  581. default:
  582. if (useNamespaces)
  583. XML_SetElementHandler(parser, startElementNS, endElementNS);
  584. else
  585. XML_SetElementHandler(parser, startElement, endElement);
  586. XML_SetCharacterDataHandler(parser, characterData);
  587. XML_SetProcessingInstructionHandler(parser, processingInstruction);
  588. break;
  589. }
  590. }
  591. if (windowsCodePages)
  592. XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
  593. result = XML_ProcessFile(parser, argv[i], processFlags);
  594. if (outputDir) {
  595. if (outputType == 'm')
  596. metaEndDocument(parser);
  597. fclose(fp);
  598. if (!result)
  599. tremove(outName);
  600. free(outName);
  601. }
  602. XML_ParserFree(parser);
  603. }
  604. return 0;
  605. }