PageRenderTime 69ms CodeModel.GetById 36ms RepoModel.GetById 0ms app.codeStats 0ms

/src/xml_validate.c

https://bitbucket.org/dnelson/xmlstar
C | 500 lines | 393 code | 48 blank | 59 comment | 110 complexity | c1fee1d4b312edec19aa682f24581672 MD5 | raw file
  1. /* $Id: xml_validate.c,v 1.36 2005/01/07 01:52:43 mgrouch Exp $ */
  2. /*
  3. XMLStarlet: Command Line Toolkit to query/edit/check/transform XML documents
  4. Copyright (c) 2002-2004 Mikhail Grushinskiy. All Rights Reserved.
  5. Permission is hereby granted, free of charge, to any person obtaining a copy
  6. of this software and associated documentation files (the "Software"), to deal
  7. in the Software without restriction, including without limitation the rights
  8. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9. copies of the Software, and to permit persons to whom the Software is
  10. furnished to do so, subject to the following conditions:
  11. The above copyright notice and this permission notice shall be included in
  12. all copies or substantial portions of the Software.
  13. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  18. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  19. THE SOFTWARE.
  20. */
  21. #include <config.h>
  22. #include <string.h>
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include "xmlstar.h"
  26. #include "trans.h"
  27. #ifdef LIBXML_SCHEMAS_ENABLED
  28. #include <libxml/xmlschemas.h>
  29. #include <libxml/xmlschemastypes.h>
  30. #endif
  31. #ifdef LIBXML_SCHEMAS_ENABLED
  32. #include <libxml/relaxng.h>
  33. #endif
  34. #include <libxml/xmlreader.h>
  35. /*
  36. * TODO: Use cases
  37. * 1. find malfomed XML documents in a given set of XML files
  38. * 2. find XML documents which do not match DTD/XSD in a given set of XML files
  39. * 3. precompile DTD once
  40. */
  41. typedef struct _valOptions {
  42. char *dtd; /* External DTD URL or file name */
  43. char *schema; /* External Schema URL or file name */
  44. char *relaxng; /* External Relax-NG Schema URL or file name */
  45. int err; /* Allow stderr messages */
  46. int stop; /* Stop on first error */
  47. int embed; /* Validate using embeded DTD */
  48. int wellFormed; /* Check if well formed only */
  49. int listGood; /* >0 list good, <0 list bad */
  50. int show_val_res; /* display file names and valid/invalid message */
  51. int nonet; /* disallow network access */
  52. } valOptions;
  53. typedef valOptions *valOptionsPtr;
  54. /**
  55. * display short help message
  56. */
  57. void
  58. valUsage(int argc, char **argv, exit_status status)
  59. {
  60. extern void fprint_validate_usage(FILE* o, const char* argv0);
  61. extern const char more_info[];
  62. FILE *o = (status == EXIT_SUCCESS)? stdout : stderr;
  63. fprint_validate_usage(o, argv[0]);
  64. fprintf(o, "%s", more_info);
  65. exit(status);
  66. }
  67. /**
  68. * Initialize global command line options
  69. */
  70. void
  71. valInitOptions(valOptionsPtr ops)
  72. {
  73. ops->wellFormed = 1;
  74. ops->err = 0;
  75. ops->stop = 0;
  76. ops->embed = 0;
  77. ops->dtd = NULL;
  78. ops->schema = NULL;
  79. ops->relaxng = NULL;
  80. ops->nonet = 1;
  81. if (globalOptions.quiet) {
  82. ops->listGood = 0;
  83. ops->show_val_res = 0;
  84. } else {
  85. ops->listGood = -1;
  86. ops->show_val_res = 1;
  87. }
  88. }
  89. /**
  90. * Parse global command line options
  91. */
  92. int
  93. valParseOptions(valOptionsPtr ops, int argc, char **argv)
  94. {
  95. int i;
  96. i = 2;
  97. while(i < argc)
  98. {
  99. if (!strcmp(argv[i], "--well-formed") || !strcmp(argv[i], "-w"))
  100. {
  101. ops->wellFormed = 1;
  102. i++;
  103. }
  104. else if (!strcmp(argv[i], "--err") || !strcmp(argv[i], "-e"))
  105. {
  106. ops->err = 1;
  107. i++;
  108. }
  109. else if (!strcmp(argv[i], "--stop") || !strcmp(argv[i], "-S"))
  110. {
  111. ops->stop = STOP;
  112. i++;
  113. }
  114. else if (!strcmp(argv[i], "--embed") || !strcmp(argv[i], "-E"))
  115. {
  116. ops->embed = 1;
  117. i++;
  118. }
  119. else if (!strcmp(argv[i], "--list-good") || !strcmp(argv[i], "-g"))
  120. {
  121. ops->listGood = 1;
  122. ops->show_val_res = 0;
  123. i++;
  124. }
  125. else if (!strcmp(argv[i], "--list-bad") || !strcmp(argv[i], "-b"))
  126. {
  127. ops->listGood = -1;
  128. ops->show_val_res = 0;
  129. i++;
  130. }
  131. else if (!strcmp(argv[i], "--quiet") || !strcmp(argv[i], "-q"))
  132. {
  133. ops->listGood = 0;
  134. ops->show_val_res = 0;
  135. i++;
  136. }
  137. else if (!strcmp(argv[i], "--dtd") || !strcmp(argv[i], "-d"))
  138. {
  139. i++;
  140. if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS);
  141. ops->dtd = argv[i];
  142. i++;
  143. }
  144. else if (!strcmp(argv[i], "--xsd") || !strcmp(argv[i], "-s"))
  145. {
  146. i++;
  147. if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS);
  148. ops->schema = argv[i];
  149. i++;
  150. }
  151. else if (!strcmp(argv[i], "--relaxng") || !strcmp(argv[i], "-r"))
  152. {
  153. i++;
  154. if (i >= argc) valUsage(argc, argv, EXIT_BAD_ARGS);
  155. ops->relaxng = argv[i];
  156. i++;
  157. }
  158. else if (!strcmp(argv[i], "--net"))
  159. {
  160. ops->nonet = 0;
  161. i++;
  162. }
  163. else if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
  164. {
  165. valUsage(argc, argv, EXIT_SUCCESS);
  166. }
  167. else if (!strcmp(argv[i], "-"))
  168. {
  169. i++;
  170. break;
  171. }
  172. else if (argv[i][0] == '-')
  173. {
  174. valUsage(argc, argv, EXIT_BAD_ARGS);
  175. }
  176. else
  177. {
  178. i++;
  179. break;
  180. }
  181. }
  182. return i-1;
  183. }
  184. /**
  185. * Validate XML document against DTD
  186. */
  187. int
  188. valAgainstDtd(valOptionsPtr ops, char* dtdvalid, xmlDocPtr doc, char* filename)
  189. {
  190. int result = 0;
  191. if (dtdvalid != NULL)
  192. {
  193. xmlDtdPtr dtd;
  194. #if !defined(LIBXML_VALID_ENABLED)
  195. xmlGenericError(xmlGenericErrorContext,
  196. "libxml2 has no validation support");
  197. return 2;
  198. #endif
  199. dtd = xmlParseDTD(NULL, (const xmlChar *)dtdvalid);
  200. if (dtd == NULL)
  201. {
  202. xmlGenericError(xmlGenericErrorContext,
  203. "Could not parse DTD %s\n", dtdvalid);
  204. result = 2;
  205. }
  206. else
  207. {
  208. xmlValidCtxtPtr cvp;
  209. if ((cvp = xmlNewValidCtxt()) == NULL)
  210. {
  211. xmlGenericError(xmlGenericErrorContext,
  212. "Couldn't allocate validation context\n");
  213. exit(-1);
  214. }
  215. if (ops->err)
  216. {
  217. cvp->userData = (void *) stderr;
  218. cvp->error = (xmlValidityErrorFunc) fprintf;
  219. cvp->warning = (xmlValidityWarningFunc) fprintf;
  220. }
  221. else
  222. {
  223. cvp->userData = (void *) NULL;
  224. cvp->error = (xmlValidityErrorFunc) NULL;
  225. cvp->warning = (xmlValidityWarningFunc) NULL;
  226. }
  227. if (!xmlValidateDtd(cvp, doc, dtd))
  228. {
  229. if ((ops->listGood < 0) && !ops->show_val_res)
  230. {
  231. fprintf(stdout, "%s\n", filename);
  232. }
  233. else if (ops->listGood == 0)
  234. xmlGenericError(xmlGenericErrorContext,
  235. "%s: does not match %s\n",
  236. filename, dtdvalid);
  237. result = 3;
  238. }
  239. else
  240. {
  241. if ((ops->listGood > 0) && !ops->show_val_res)
  242. {
  243. fprintf(stdout, "%s\n", filename);
  244. }
  245. }
  246. xmlFreeDtd(dtd);
  247. xmlFreeValidCtxt(cvp);
  248. }
  249. }
  250. return result;
  251. }
  252. /**
  253. * This is the main function for 'validate' option
  254. */
  255. int
  256. valMain(int argc, char **argv)
  257. {
  258. int start;
  259. static valOptions ops;
  260. static ErrorInfo errorInfo;
  261. int invalidFound = 0;
  262. int options = XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR;
  263. if (argc <= 2) valUsage(argc, argv, EXIT_BAD_ARGS);
  264. valInitOptions(&ops);
  265. start = valParseOptions(&ops, argc, argv);
  266. if (ops.nonet) options |= XML_PARSE_NONET;
  267. errorInfo.verbose = ops.err;
  268. xmlSetStructuredErrorFunc(&errorInfo, reportError);
  269. xmlLineNumbersDefault(1);
  270. if (ops.dtd)
  271. {
  272. /* xmlReader doesn't work with external dtd, have to use SAX
  273. * interface */
  274. int i;
  275. /* we have to exit() from the error reporting function to implement
  276. --stop */
  277. errorInfo.stop = ops.stop;
  278. for (i=start; i<argc; i++)
  279. {
  280. xmlDocPtr doc;
  281. int failed;
  282. failed = 0;
  283. doc = NULL;
  284. errorInfo.filename = argv[i];
  285. doc = xmlReadFile(argv[i], NULL, options);
  286. if (doc)
  287. {
  288. /* TODO: precompile DTD once */
  289. failed = valAgainstDtd(&ops, ops.dtd, doc, argv[i]);
  290. xmlFreeDoc(doc);
  291. }
  292. else
  293. {
  294. failed = 1; /* Malformed XML or could not open file */
  295. if ((ops.listGood < 0) && !ops.show_val_res)
  296. {
  297. fprintf(stdout, "%s\n", argv[i]);
  298. }
  299. }
  300. if (failed) invalidFound = 1;
  301. if (ops.show_val_res)
  302. {
  303. if (!failed)
  304. fprintf(stdout, "%s - valid\n", argv[i]);
  305. else
  306. fprintf(stdout, "%s - invalid\n", argv[i]);
  307. }
  308. }
  309. }
  310. else if (ops.schema || ops.relaxng || ops.embed || ops.wellFormed)
  311. {
  312. int i;
  313. xmlTextReaderPtr reader = NULL;
  314. #ifdef LIBXML_SCHEMAS_ENABLED
  315. xmlSchemaPtr schema = NULL;
  316. xmlSchemaParserCtxtPtr schemaParserCtxt = NULL;
  317. xmlSchemaValidCtxtPtr schemaCtxt = NULL;
  318. xmlRelaxNGPtr relaxng = NULL;
  319. xmlRelaxNGParserCtxtPtr relaxngParserCtxt = NULL;
  320. /* there is no xmlTextReaderRelaxNGValidateCtxt() !? */
  321. /* TODO: Do not print debug stuff */
  322. if (ops.schema)
  323. {
  324. schemaParserCtxt = xmlSchemaNewParserCtxt(ops.schema);
  325. if (!schemaParserCtxt)
  326. {
  327. invalidFound = 2;
  328. goto schemaCleanup;
  329. }
  330. errorInfo.filename = ops.schema;
  331. schema = xmlSchemaParse(schemaParserCtxt);
  332. if (!schema)
  333. {
  334. invalidFound = 2;
  335. goto schemaCleanup;
  336. }
  337. xmlSchemaFreeParserCtxt(schemaParserCtxt);
  338. schemaCtxt = xmlSchemaNewValidCtxt(schema);
  339. if (!schemaCtxt)
  340. {
  341. invalidFound = 2;
  342. goto schemaCleanup;
  343. }
  344. }
  345. else if (ops.relaxng)
  346. {
  347. relaxngParserCtxt = xmlRelaxNGNewParserCtxt(ops.relaxng);
  348. if (!relaxngParserCtxt)
  349. {
  350. invalidFound = 2;
  351. goto schemaCleanup;
  352. }
  353. errorInfo.filename = ops.relaxng;
  354. relaxng = xmlRelaxNGParse(relaxngParserCtxt);
  355. if (!relaxng)
  356. {
  357. invalidFound = 2;
  358. goto schemaCleanup;
  359. }
  360. }
  361. #endif /* LIBXML_SCHEMAS_ENABLED */
  362. for (i=start; i<argc; i++)
  363. {
  364. int failed = 0;
  365. if (ops.embed) options |= XML_PARSE_DTDVALID;
  366. if (!reader)
  367. {
  368. reader = xmlReaderForFile(argv[i], NULL, options);
  369. }
  370. else
  371. {
  372. failed = xmlReaderNewFile(reader, argv[i], NULL, options);
  373. }
  374. errorInfo.xmlReader = reader;
  375. errorInfo.filename = argv[i];
  376. /* It makes no sense to continue if we are not reporting errors
  377. * anyway. Note this doesn't apply to the --dtd case because the we
  378. * can't stop there without aborting the whole program (and
  379. * therefore we wouldn't be able to check multiple files).
  380. */
  381. if (!ops.err)
  382. ops.stop = STOP;
  383. if (reader && !failed)
  384. {
  385. #ifdef LIBXML_SCHEMAS_ENABLED
  386. if (schemaCtxt)
  387. {
  388. failed = xmlTextReaderSchemaValidateCtxt(reader,
  389. schemaCtxt, 0);
  390. }
  391. else if (relaxng)
  392. {
  393. failed = xmlTextReaderRelaxNGSetSchema(reader,
  394. relaxng);
  395. }
  396. #endif /* LIBXML_SCHEMAS_ENABLED */
  397. if (failed == 0)
  398. {
  399. int more_nodes;
  400. int validating = (schema || relaxng || ops.embed);
  401. do
  402. {
  403. more_nodes = xmlTextReaderRead(reader);
  404. failed =
  405. (more_nodes == -1)? 1 :
  406. (!validating)? 0 :
  407. xmlTextReaderIsValid(reader) != 1;
  408. } while (more_nodes == 1 && (!failed || !ops.stop));
  409. }
  410. }
  411. else
  412. {
  413. if (ops.err)
  414. fprintf(stderr, "couldn't read file '%s'\n", errorInfo.filename);
  415. failed = 1; /* could not open file */
  416. }
  417. if (failed) invalidFound = 1;
  418. if (!ops.show_val_res)
  419. {
  420. if ((ops.listGood > 0) && !failed)
  421. fprintf(stdout, "%s\n", argv[i]);
  422. if ((ops.listGood < 0) && failed)
  423. fprintf(stdout, "%s\n", argv[i]);
  424. }
  425. else
  426. {
  427. if (!failed)
  428. fprintf(stdout, "%s - valid\n", argv[i]);
  429. else
  430. fprintf(stdout, "%s - invalid\n", argv[i]);
  431. }
  432. }
  433. errorInfo.xmlReader = NULL;
  434. xmlFreeTextReader(reader);
  435. #ifdef LIBXML_SCHEMAS_ENABLED
  436. schemaCleanup:
  437. xmlSchemaFreeValidCtxt(schemaCtxt);
  438. xmlRelaxNGFree(relaxng);
  439. xmlSchemaFree(schema);
  440. xmlRelaxNGCleanupTypes();
  441. xmlSchemaCleanupTypes();
  442. #endif /* LIBXML_SCHEMAS_ENABLED */
  443. }
  444. xmlCleanupParser();
  445. return invalidFound;
  446. }