PageRenderTime 44ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/src/xml_format.c

https://bitbucket.org/dnelson/xmlstar
C | 382 lines | 281 code | 33 blank | 68 comment | 71 complexity | c8b33766a4bd5be9992e6e98bd36e38c MD5 | raw file
  1. /* $Id: xml_format.c,v 1.25 2005/01/07 02:33:40 mgrouch Exp $ */
  2. /*
  3. XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents
  4. Copyright (c) 2002 Mikhail Grushinskiy. All Rights Reserved.
  5. Permission is hereby granted, free of charge, to any person obtaining a copy
  6. of this software and associated documentation files (the "Software"), to deal
  7. in the Software without restriction, including without limitation the rights
  8. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. copies of the Software, and to permit persons to whom the Software is
  10. furnished to do so, subject to the following conditions:
  11. The above copyright notice and this permission notice shall be included in
  12. all copies or substantial portions of the Software.
  13. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. THE SOFTWARE.
  20. */
  21. #include <config.h>
  22. #include <string.h>
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <libxml/xmlmemory.h>
  26. #include <libxml/debugXML.h>
  27. #include <libxml/xmlIO.h>
  28. #include <libxml/HTMLtree.h>
  29. #include <libxml/xinclude.h>
  30. #include <libxml/xpath.h>
  31. #include <libxml/xpathInternals.h>
  32. #include <libxml/xpointer.h>
  33. #include <libxml/parserInternals.h>
  34. #include <libxml/uri.h>
  35. #include "xmlstar.h"
  36. /*
  37. * TODO: 1. Attribute formatting options (as every attribute on a new line)
  38. * 2. exit values on errors
  39. */
  40. typedef struct _foOptions {
  41. int indent; /* indent output */
  42. int indent_tab; /* indent output with tab */
  43. int indent_spaces; /* num spaces for indentation */
  44. int omit_decl; /* omit xml declaration */
  45. int recovery; /* try to recover what is parsable */
  46. int dropdtd; /* remove the DOCTYPE of the input docs */
  47. int options; /* global parsing flags */
  48. #ifdef LIBXML_HTML_ENABLED
  49. int html; /* inputs are in HTML format */
  50. #endif
  51. int quiet; /* quiet mode */
  52. } foOptions;
  53. typedef foOptions *foOptionsPtr;
  54. const char *encoding = NULL;
  55. static char *spaces = NULL;
  56. /**
  57. * Print small help for command line options
  58. */
  59. void
  60. foUsage(int argc, char **argv, exit_status status)
  61. {
  62. extern void fprint_format_usage(FILE* o, const char* argv0);
  63. extern const char more_info[];
  64. FILE *o = (status == EXIT_SUCCESS)? stdout : stderr;
  65. fprint_format_usage(o, argv[0]);
  66. fprintf(o, "%s", more_info);
  67. exit(status);
  68. }
  69. /**
  70. * Initialize global command line options
  71. */
  72. void
  73. foInitOptions(foOptionsPtr ops)
  74. {
  75. ops->indent = 1;
  76. ops->indent_tab = 0;
  77. ops->indent_spaces = 2;
  78. ops->omit_decl = 0;
  79. ops->recovery = 0;
  80. ops->dropdtd = 0;
  81. ops->options = XML_PARSE_NONET;
  82. #ifdef LIBXML_HTML_ENABLED
  83. ops->html = 0;
  84. #endif
  85. ops->quiet = globalOptions.quiet;
  86. }
  87. /**
  88. * Initialize LibXML
  89. */
  90. void
  91. foInitLibXml(foOptionsPtr ops)
  92. {
  93. /*
  94. * Initialize library memory
  95. */
  96. xmlInitMemory();
  97. LIBXML_TEST_VERSION
  98. /*
  99. * Store line numbers in the document tree
  100. */
  101. xmlLineNumbersDefault(1);
  102. xmlSubstituteEntitiesDefault(1);
  103. xmlKeepBlanksDefault(0);
  104. xmlPedanticParserDefault(0);
  105. xmlGetWarningsDefaultValue = 1;
  106. xmlDoValidityCheckingDefaultValue = 0;
  107. xmlLoadExtDtdDefaultValue = 0;
  108. xmlTreeIndentString = NULL;
  109. if (ops->indent)
  110. {
  111. xmlIndentTreeOutput = 1;
  112. if (ops->indent_tab)
  113. {
  114. xmlTreeIndentString = "\t";
  115. }
  116. else if (ops->indent_spaces > 0)
  117. {
  118. spaces = xmlMalloc(ops->indent_spaces + 1);
  119. xmlTreeIndentString = spaces;
  120. memset(spaces, ' ', ops->indent_spaces);
  121. spaces[ops->indent_spaces] = '\0';
  122. }
  123. }
  124. else
  125. xmlIndentTreeOutput = 0;
  126. }
  127. /**
  128. * Parse global command line options
  129. */
  130. int
  131. foParseOptions(foOptionsPtr ops, int argc, char **argv)
  132. {
  133. int i;
  134. i = 2;
  135. while(i < argc)
  136. {
  137. if (!strcmp(argv[i], "--noindent") || !strcmp(argv[i], "-n"))
  138. {
  139. ops->indent = 0;
  140. i++;
  141. }
  142. else if (!strcmp(argv[i], "--encode") || !strcmp(argv[i], "-e"))
  143. {
  144. i++;
  145. encoding = argv[i];
  146. i++;
  147. }
  148. else if (!strcmp(argv[i], "--indent-tab") || !strcmp(argv[i], "-t"))
  149. {
  150. ops->indent_tab = 1;
  151. i++;
  152. }
  153. else if (!strcmp(argv[i], "--omit-decl") || !strcmp(argv[i], "-o"))
  154. {
  155. ops->omit_decl = 1;
  156. i++;
  157. }
  158. else if (!strcmp(argv[i], "--dropdtd") || !strcmp(argv[i], "-D"))
  159. {
  160. ops->dropdtd = 1;
  161. i++;
  162. }
  163. else if (!strcmp(argv[i], "--recover") || !strcmp(argv[i], "-R"))
  164. {
  165. ops->recovery = 1;
  166. ops->options |= XML_PARSE_RECOVER;
  167. i++;
  168. }
  169. else if (!strcmp(argv[i], "--nocdata") || !strcmp(argv[i], "-C"))
  170. {
  171. ops->options |= XML_PARSE_NOCDATA;
  172. i++;
  173. }
  174. else if (!strcmp(argv[i], "--nsclean") || !strcmp(argv[i], "-N"))
  175. {
  176. ops->options |= XML_PARSE_NSCLEAN;
  177. i++;
  178. }
  179. else if (!strcmp(argv[i], "--indent-spaces") || !strcmp(argv[i], "-s"))
  180. {
  181. int value;
  182. i++;
  183. if (i >= argc) foUsage(argc, argv, EXIT_BAD_ARGS);
  184. if (sscanf(argv[i], "%d", &value) == 1)
  185. {
  186. if (value > 0) ops->indent_spaces = value;
  187. }
  188. else
  189. {
  190. foUsage(argc, argv, EXIT_BAD_ARGS);
  191. }
  192. ops->indent_tab = 0;
  193. i++;
  194. }
  195. else if (!strcmp(argv[i], "--quiet") || !strcmp(argv[i], "-Q"))
  196. {
  197. ops->quiet = 1;
  198. i++;
  199. }
  200. #ifdef LIBXML_HTML_ENABLED
  201. else if (!strcmp(argv[i], "--html") || !strcmp(argv[i], "-H"))
  202. {
  203. ops->html = 1;
  204. i++;
  205. }
  206. #endif
  207. else if (!strcmp(argv[i], "--net"))
  208. {
  209. ops->options &= ~XML_PARSE_NONET;
  210. i++;
  211. }
  212. else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
  213. {
  214. foUsage(argc, argv, EXIT_SUCCESS);
  215. }
  216. else if (!strcmp(argv[i], "-"))
  217. {
  218. i++;
  219. break;
  220. }
  221. else if (argv[i][0] == '-')
  222. {
  223. foUsage(argc, argv, EXIT_BAD_ARGS);
  224. }
  225. else
  226. {
  227. i++;
  228. break;
  229. }
  230. }
  231. return i-1;
  232. }
  233. /**
  234. * 'process' xml document(s)
  235. */
  236. int
  237. foProcess(foOptionsPtr ops, int start, int argc, char **argv)
  238. {
  239. int ret = 0;
  240. xmlDocPtr doc = NULL;
  241. char *fileName = "-";
  242. if ((start > 1) && (start < argc) && (argv[start][0] != '-') &&
  243. strcmp(argv[start-1], "--indent-spaces") &&
  244. strcmp(argv[start-1], "-s"))
  245. {
  246. fileName = argv[start];
  247. }
  248. /*
  249. if (ops->recovery)
  250. {
  251. doc = xmlRecoverFile(fileName);
  252. }
  253. else
  254. */
  255. if (ops->quiet)
  256. suppressErrors();
  257. #ifdef LIBXML_HTML_ENABLED
  258. if (ops->html)
  259. {
  260. doc = htmlReadFile(fileName, NULL, ops->options);
  261. }
  262. else
  263. #endif
  264. doc = xmlReadFile(fileName, NULL, ops->options);
  265. if (doc == NULL)
  266. {
  267. /*fprintf(stderr, "%s:: error: XML parse error\n", fileName);*/
  268. return 2;
  269. }
  270. /*
  271. * Remove DOCTYPE nodes
  272. */
  273. if (ops->dropdtd) {
  274. xmlDtdPtr dtd;
  275. dtd = xmlGetIntSubset(doc);
  276. if (dtd != NULL) {
  277. xmlUnlinkNode((xmlNodePtr)dtd);
  278. xmlFreeDtd(dtd);
  279. }
  280. }
  281. if (!ops->omit_decl)
  282. {
  283. if (encoding != NULL)
  284. {
  285. xmlSaveFormatFileEnc("-", doc, encoding, 1);
  286. }
  287. else
  288. {
  289. xmlSaveFormatFile("-", doc, 1);
  290. }
  291. }
  292. else
  293. {
  294. int format = 1;
  295. xmlOutputBufferPtr buf = NULL;
  296. xmlCharEncodingHandlerPtr handler = NULL;
  297. buf = xmlOutputBufferCreateFile(stdout, handler);
  298. if (doc->children != NULL)
  299. {
  300. xmlNodePtr child = doc->children;
  301. while (child != NULL)
  302. {
  303. xmlNodeDumpOutput(buf, doc, child, 0, format, encoding);
  304. xmlOutputBufferWriteString(buf, "\n");
  305. child = child->next;
  306. }
  307. }
  308. ret = xmlOutputBufferClose(buf);
  309. }
  310. xmlFreeDoc(doc);
  311. return ret;
  312. }
  313. /**
  314. * Cleanup memory
  315. */
  316. void
  317. foCleanup()
  318. {
  319. free(spaces);
  320. spaces = NULL;
  321. xmlCleanupParser();
  322. #if 0
  323. xmlMemoryDump();
  324. #endif
  325. }
  326. /**
  327. * This is the main function for 'format' option
  328. */
  329. int
  330. foMain(int argc, char **argv)
  331. {
  332. int ret = 0;
  333. int start;
  334. static foOptions ops;
  335. if (argc <=1) foUsage(argc, argv, EXIT_BAD_ARGS);
  336. foInitOptions(&ops);
  337. start = foParseOptions(&ops, argc, argv);
  338. if (argc-start > 1) foUsage(argc, argv, EXIT_BAD_ARGS);
  339. foInitLibXml(&ops);
  340. ret = foProcess(&ops, start, argc, argv);
  341. foCleanup();
  342. return ret;
  343. }