/src/main/java/com/t11e/discovery/datatool/StaxUtil.java

http://github.com/t11e/discovery_datatool · Java · 318 lines · 254 code · 17 blank · 47 comment · 27 complexity · 13da4e43ea853f74cf8b2481ee55bf66 MD5 · raw file

  1. package com.t11e.discovery.datatool;
  2. import java.util.regex.Pattern;
  3. import javax.xml.stream.XMLInputFactory;
  4. import javax.xml.stream.XMLOutputFactory;
  5. import javax.xml.stream.XMLStreamConstants;
  6. import javax.xml.stream.XMLStreamException;
  7. import javax.xml.stream.XMLStreamReader;
  8. import javax.xml.stream.XMLStreamWriter;
  9. public class StaxUtil
  10. {
  11. /**
  12. * Create a new XMLInputFactory, better than calling
  13. * XMLInputFactory.newInstance() directly as it allows for
  14. * easier override without dealing with endorsed directories.
  15. */
  16. public static XMLInputFactory newInputFactory()
  17. {
  18. return new com.ctc.wstx.stax.WstxInputFactory();
  19. }
  20. /**
  21. * Create a new XMLOutputFactory, better than calling
  22. * XMLOutputFactory() directly as it allows for
  23. * easier override without dealing with endorsed directories.
  24. */
  25. public static XMLOutputFactory newOutputFactory()
  26. {
  27. return new com.ctc.wstx.stax.WstxOutputFactory();
  28. }
  29. public static String getRequiredAttributeValue(final XMLStreamReader reader,
  30. final String ns, final String localName)
  31. throws XMLStreamException
  32. {
  33. final String value = reader.getAttributeValue(ns, localName);
  34. if (value == null)
  35. {
  36. throw newMissingAttributeException(ns, localName);
  37. }
  38. return value;
  39. }
  40. public static XMLStreamException newMissingAttributeException(final String ns,
  41. final String name)
  42. {
  43. return new XMLStreamException("The required attribute " +
  44. (ns == null ? "" : "{" + ns + "}") + name + " is missing");
  45. }
  46. /**
  47. * Variant of XMLStreamReader.nextTag that additionally ignores any DTD tokens.
  48. */
  49. public static int nextTagIgnoringDocType(final XMLStreamReader reader)
  50. throws XMLStreamException
  51. {
  52. int next = -1;
  53. boolean done = false;
  54. do
  55. {
  56. next = reader.next();
  57. switch (next)
  58. {
  59. case XMLStreamConstants.SPACE:
  60. case XMLStreamConstants.COMMENT:
  61. case XMLStreamConstants.PROCESSING_INSTRUCTION:
  62. break;
  63. case XMLStreamConstants.CDATA:
  64. case XMLStreamConstants.CHARACTERS:
  65. if (!reader.isWhiteSpace())
  66. {
  67. throw new XMLStreamException("Received non-all-whitespace CHARACTERS" +
  68. " or CDATA event in nextTagIgnoringDocType().");
  69. }
  70. break;
  71. case XMLStreamConstants.START_ELEMENT:
  72. case XMLStreamConstants.END_ELEMENT:
  73. done = true;
  74. break;
  75. case XMLStreamConstants.DTD:
  76. // Swallow
  77. break;
  78. default:
  79. throw new XMLStreamException("Received event " + tokenTypeDesc(next)
  80. + ", instead of START_ELEMENT or END_ELEMENT.");
  81. }
  82. }
  83. while (!done);
  84. return next;
  85. }
  86. /**
  87. * Find either the next START_ELEMENT or text, returns the text if
  88. * available and null if a tag was found.
  89. */
  90. public static String nextTextOrTag(final XMLStreamReader reader)
  91. throws XMLStreamException
  92. {
  93. String output = null;
  94. reader.require(XMLStreamConstants.START_ELEMENT, null, null);
  95. StringBuffer buffer = null;
  96. boolean done = false;
  97. do
  98. {
  99. final int type = reader.next();
  100. switch (type)
  101. {
  102. case XMLStreamConstants.CHARACTERS:
  103. case XMLStreamConstants.CDATA:
  104. case XMLStreamConstants.SPACE:
  105. case XMLStreamConstants.ENTITY_REFERENCE:
  106. if (buffer == null)
  107. {
  108. buffer = new StringBuffer();
  109. }
  110. buffer.append(reader.getText());
  111. break;
  112. case XMLStreamConstants.PROCESSING_INSTRUCTION:
  113. case XMLStreamConstants.COMMENT:
  114. // Swallow
  115. break;
  116. case XMLStreamConstants.START_ELEMENT:
  117. if (buffer != null)
  118. {
  119. boolean containsNonWhitespace = false;
  120. for (int i = 0; i < buffer.length(); i++)
  121. {
  122. if (!Character.isWhitespace(buffer.charAt(i)))
  123. {
  124. containsNonWhitespace = true;
  125. break;
  126. }
  127. }
  128. if (containsNonWhitespace)
  129. {
  130. throw new XMLStreamException("Mixed text and elements when " +
  131. " looking for the next text or tag" +
  132. "; tag=" + reader.getLocalName() +
  133. " text='" + buffer.toString() + "'");
  134. }
  135. }
  136. done = true;
  137. break;
  138. case XMLStreamConstants.END_ELEMENT:
  139. output = (buffer == null) ? "" : buffer.toString();
  140. done = true;
  141. break;
  142. default:
  143. throw new XMLStreamException("Unexpected event " +
  144. tokenTypeDesc(type) + " when looking for text or tag");
  145. }
  146. }
  147. while (!done);
  148. if (output == null)
  149. {
  150. reader.require(XMLStreamConstants.START_ELEMENT, null, null);
  151. }
  152. else
  153. {
  154. reader.require(XMLStreamConstants.END_ELEMENT, null, null);
  155. }
  156. return output;
  157. }
  158. public static void skipNestedElements(final XMLStreamReader reader)
  159. throws XMLStreamException
  160. {
  161. int level = 0;
  162. while (level >= 0)
  163. {
  164. final int next = reader.next();
  165. switch (next)
  166. {
  167. case XMLStreamConstants.SPACE:
  168. case XMLStreamConstants.COMMENT:
  169. case XMLStreamConstants.PROCESSING_INSTRUCTION:
  170. case XMLStreamConstants.CDATA:
  171. case XMLStreamConstants.CHARACTERS:
  172. break;
  173. case XMLStreamConstants.START_ELEMENT:
  174. level++;
  175. break;
  176. case XMLStreamConstants.END_ELEMENT:
  177. level--;
  178. break;
  179. default:
  180. throw new XMLStreamException("Received event " + tokenTypeDesc(next)
  181. + ", instead of START_ELEMENT or END_ELEMENT.");
  182. }
  183. }
  184. }
  185. protected static String tokenTypeDesc(final int type)
  186. {
  187. String desc;
  188. switch (type)
  189. {
  190. case XMLStreamConstants.START_ELEMENT:
  191. desc = "START_ELEMENT";
  192. break;
  193. case XMLStreamConstants.END_ELEMENT:
  194. desc = "END_ELEMENT";
  195. break;
  196. case XMLStreamConstants.START_DOCUMENT:
  197. desc = "START_DOCUMENT";
  198. break;
  199. case XMLStreamConstants.END_DOCUMENT:
  200. desc = "END_DOCUMENT";
  201. break;
  202. case XMLStreamConstants.CHARACTERS:
  203. desc = "CHARACTERS";
  204. break;
  205. case XMLStreamConstants.CDATA:
  206. desc = "CDATA";
  207. break;
  208. case XMLStreamConstants.SPACE:
  209. desc = "SPACE";
  210. break;
  211. case XMLStreamConstants.COMMENT:
  212. desc = "COMMENT";
  213. break;
  214. case XMLStreamConstants.PROCESSING_INSTRUCTION:
  215. desc = "PROCESSING_INSTRUCTION";
  216. break;
  217. case XMLStreamConstants.DTD:
  218. desc = "DTD";
  219. break;
  220. case XMLStreamConstants.ENTITY_REFERENCE:
  221. desc = "ENTITY_REFERENCE";
  222. break;
  223. default:
  224. desc = "UNKNOWN_" + type;
  225. }
  226. return desc;
  227. }
  228. public static void writeCharactersIfNotNull(final XMLStreamWriter writer,
  229. final String chars)
  230. throws XMLStreamException
  231. {
  232. if (chars != null)
  233. {
  234. writer.writeCharacters(chars);
  235. }
  236. }
  237. public static void writeAttributeIfNotNull(final XMLStreamWriter writer,
  238. final String name, final String value)
  239. throws XMLStreamException
  240. {
  241. if (value != null)
  242. {
  243. writer.writeAttribute(name, value);
  244. }
  245. }
  246. private static final Pattern INVALID_UTF_CHARS =
  247. Pattern.compile("[^" +
  248. "\\u0009" +
  249. "\\u000A" +
  250. "\\u000D" +
  251. "\\u0020-\\uD7FF" +
  252. "\\uE000-\\uFFFF" +
  253. "]");
  254. public static String filterInvalidCharacters(final String content)
  255. {
  256. final String filtered = INVALID_UTF_CHARS.matcher(content).replaceAll("");
  257. return filtered;
  258. }
  259. /**
  260. * Call handler.characters filtering any invalid UTF characters.
  261. *
  262. * The XML Specification lists the following valid UTF characters.
  263. * http://www.w3.org/TR/REC-xml/#charsets
  264. *
  265. * <pre>
  266. * Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
  267. * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
  268. * </pre>
  269. *
  270. * Document authors are encouraged to avoid "compatibility characters",
  271. * as defined in section 2.3 of [Unicode]. The characters defined in the
  272. * following ranges are also discouraged. They are either control characters
  273. * or permanently undefined Unicode characters:
  274. *
  275. * <pre>
  276. * [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDEF],
  277. * [#x1FFFE-#x1FFFF], [#x2FFFE-#x2FFFF], [#x3FFFE-#x3FFFF],
  278. * [#x4FFFE-#x4FFFF], [#x5FFFE-#x5FFFF], [#x6FFFE-#x6FFFF],
  279. * [#x7FFFE-#x7FFFF], [#x8FFFE-#x8FFFF], [#x9FFFE-#x9FFFF],
  280. * [#xAFFFE-#xAFFFF], [#xBFFFE-#xBFFFF], [#xCFFFE-#xCFFFF],
  281. * [#xDFFFE-#xDFFFF], [#xEFFFE-#xEFFFF], [#xFFFFE-#xFFFFF],
  282. * [#x10FFFE-#x10FFFF].
  283. * </pre>
  284. * @throws XMLStreamException
  285. **/
  286. public static void writeFilteredCharacters(
  287. final XMLStreamWriter writer,
  288. final String characters)
  289. throws XMLStreamException
  290. {
  291. if (characters != null)
  292. {
  293. final String filtered = filterInvalidCharacters(characters);
  294. writer.writeCharacters(filtered);
  295. }
  296. }
  297. protected StaxUtil()
  298. {
  299. // Prevent instantiation
  300. }
  301. }