/src/main/java/com/t11e/discovery/datatool/StaxUtil.java
Java | 318 lines | 254 code | 17 blank | 47 comment | 27 complexity | 13da4e43ea853f74cf8b2481ee55bf66 MD5 | raw file
1package com.t11e.discovery.datatool; 2 3import java.util.regex.Pattern; 4 5import javax.xml.stream.XMLInputFactory; 6import javax.xml.stream.XMLOutputFactory; 7import javax.xml.stream.XMLStreamConstants; 8import javax.xml.stream.XMLStreamException; 9import javax.xml.stream.XMLStreamReader; 10import javax.xml.stream.XMLStreamWriter; 11 12public class StaxUtil 13{ 14 /** 15 * Create a new XMLInputFactory, better than calling 16 * XMLInputFactory.newInstance() directly as it allows for 17 * easier override without dealing with endorsed directories. 18 */ 19 public static XMLInputFactory newInputFactory() 20 { 21 return new com.ctc.wstx.stax.WstxInputFactory(); 22 } 23 24 /** 25 * Create a new XMLOutputFactory, better than calling 26 * XMLOutputFactory() directly as it allows for 27 * easier override without dealing with endorsed directories. 28 */ 29 public static XMLOutputFactory newOutputFactory() 30 { 31 return new com.ctc.wstx.stax.WstxOutputFactory(); 32 33 } 34 35 public static String getRequiredAttributeValue(final XMLStreamReader reader, 36 final String ns, final String localName) 37 throws XMLStreamException 38 { 39 final String value = reader.getAttributeValue(ns, localName); 40 if (value == null) 41 { 42 throw newMissingAttributeException(ns, localName); 43 } 44 return value; 45 } 46 47 public static XMLStreamException newMissingAttributeException(final String ns, 48 final String name) 49 { 50 return new XMLStreamException("The required attribute " + 51 (ns == null ? "" : "{" + ns + "}") + name + " is missing"); 52 } 53 54 /** 55 * Variant of XMLStreamReader.nextTag that additionally ignores any DTD tokens. 56 */ 57 public static int nextTagIgnoringDocType(final XMLStreamReader reader) 58 throws XMLStreamException 59 { 60 int next = -1; 61 boolean done = false; 62 do 63 { 64 next = reader.next(); 65 switch (next) 66 { 67 case XMLStreamConstants.SPACE: 68 case XMLStreamConstants.COMMENT: 69 case XMLStreamConstants.PROCESSING_INSTRUCTION: 70 break; 71 case XMLStreamConstants.CDATA: 72 case XMLStreamConstants.CHARACTERS: 73 if (!reader.isWhiteSpace()) 74 { 75 throw new XMLStreamException("Received non-all-whitespace CHARACTERS" + 76 " or CDATA event in nextTagIgnoringDocType()."); 77 } 78 break; 79 case XMLStreamConstants.START_ELEMENT: 80 case XMLStreamConstants.END_ELEMENT: 81 done = true; 82 break; 83 case XMLStreamConstants.DTD: 84 // Swallow 85 break; 86 default: 87 throw new XMLStreamException("Received event " + tokenTypeDesc(next) 88 + ", instead of START_ELEMENT or END_ELEMENT."); 89 } 90 } 91 while (!done); 92 return next; 93 } 94 95 /** 96 * Find either the next START_ELEMENT or text, returns the text if 97 * available and null if a tag was found. 98 */ 99 public static String nextTextOrTag(final XMLStreamReader reader) 100 throws XMLStreamException 101 { 102 String output = null; 103 reader.require(XMLStreamConstants.START_ELEMENT, null, null); 104 StringBuffer buffer = null; 105 boolean done = false; 106 do 107 { 108 final int type = reader.next(); 109 switch (type) 110 { 111 case XMLStreamConstants.CHARACTERS: 112 case XMLStreamConstants.CDATA: 113 case XMLStreamConstants.SPACE: 114 case XMLStreamConstants.ENTITY_REFERENCE: 115 if (buffer == null) 116 { 117 buffer = new StringBuffer(); 118 } 119 buffer.append(reader.getText()); 120 break; 121 case XMLStreamConstants.PROCESSING_INSTRUCTION: 122 case XMLStreamConstants.COMMENT: 123 // Swallow 124 break; 125 case XMLStreamConstants.START_ELEMENT: 126 if (buffer != null) 127 { 128 boolean containsNonWhitespace = false; 129 for (int i = 0; i < buffer.length(); i++) 130 { 131 if (!Character.isWhitespace(buffer.charAt(i))) 132 { 133 containsNonWhitespace = true; 134 break; 135 } 136 } 137 if (containsNonWhitespace) 138 { 139 throw new XMLStreamException("Mixed text and elements when " + 140 " looking for the next text or tag" + 141 "; tag=" + reader.getLocalName() + 142 " text='" + buffer.toString() + "'"); 143 } 144 } 145 done = true; 146 break; 147 case XMLStreamConstants.END_ELEMENT: 148 output = (buffer == null) ? "" : buffer.toString(); 149 done = true; 150 break; 151 default: 152 throw new XMLStreamException("Unexpected event " + 153 tokenTypeDesc(type) + " when looking for text or tag"); 154 } 155 } 156 while (!done); 157 if (output == null) 158 { 159 reader.require(XMLStreamConstants.START_ELEMENT, null, null); 160 } 161 else 162 { 163 reader.require(XMLStreamConstants.END_ELEMENT, null, null); 164 } 165 return output; 166 } 167 168 public static void skipNestedElements(final XMLStreamReader reader) 169 throws XMLStreamException 170 { 171 int level = 0; 172 while (level >= 0) 173 { 174 final int next = reader.next(); 175 switch (next) 176 { 177 case XMLStreamConstants.SPACE: 178 case XMLStreamConstants.COMMENT: 179 case XMLStreamConstants.PROCESSING_INSTRUCTION: 180 case XMLStreamConstants.CDATA: 181 case XMLStreamConstants.CHARACTERS: 182 break; 183 case XMLStreamConstants.START_ELEMENT: 184 level++; 185 break; 186 case XMLStreamConstants.END_ELEMENT: 187 level--; 188 break; 189 default: 190 throw new XMLStreamException("Received event " + tokenTypeDesc(next) 191 + ", instead of START_ELEMENT or END_ELEMENT."); 192 } 193 } 194 } 195 196 protected static String tokenTypeDesc(final int type) 197 { 198 String desc; 199 switch (type) 200 { 201 case XMLStreamConstants.START_ELEMENT: 202 desc = "START_ELEMENT"; 203 break; 204 case XMLStreamConstants.END_ELEMENT: 205 desc = "END_ELEMENT"; 206 break; 207 case XMLStreamConstants.START_DOCUMENT: 208 desc = "START_DOCUMENT"; 209 break; 210 case XMLStreamConstants.END_DOCUMENT: 211 desc = "END_DOCUMENT"; 212 break; 213 case XMLStreamConstants.CHARACTERS: 214 desc = "CHARACTERS"; 215 break; 216 case XMLStreamConstants.CDATA: 217 desc = "CDATA"; 218 break; 219 case XMLStreamConstants.SPACE: 220 desc = "SPACE"; 221 break; 222 case XMLStreamConstants.COMMENT: 223 desc = "COMMENT"; 224 break; 225 case XMLStreamConstants.PROCESSING_INSTRUCTION: 226 desc = "PROCESSING_INSTRUCTION"; 227 break; 228 case XMLStreamConstants.DTD: 229 desc = "DTD"; 230 break; 231 case XMLStreamConstants.ENTITY_REFERENCE: 232 desc = "ENTITY_REFERENCE"; 233 break; 234 default: 235 desc = "UNKNOWN_" + type; 236 } 237 return desc; 238 } 239 240 public static void writeCharactersIfNotNull(final XMLStreamWriter writer, 241 final String chars) 242 throws XMLStreamException 243 { 244 if (chars != null) 245 { 246 writer.writeCharacters(chars); 247 } 248 } 249 250 public static void writeAttributeIfNotNull(final XMLStreamWriter writer, 251 final String name, final String value) 252 throws XMLStreamException 253 { 254 if (value != null) 255 { 256 writer.writeAttribute(name, value); 257 } 258 } 259 260 private static final Pattern INVALID_UTF_CHARS = 261 Pattern.compile("[^" + 262 "\\u0009" + 263 "\\u000A" + 264 "\\u000D" + 265 "\\u0020-\\uD7FF" + 266 "\\uE000-\\uFFFF" + 267 "]"); 268 269 public static String filterInvalidCharacters(final String content) 270 { 271 final String filtered = INVALID_UTF_CHARS.matcher(content).replaceAll(""); 272 return filtered; 273 } 274 275 /** 276 * Call handler.characters filtering any invalid UTF characters. 277 * 278 * The XML Specification lists the following valid UTF characters. 279 * http://www.w3.org/TR/REC-xml/#charsets 280 * 281 * <pre> 282 * Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] 283 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. 284 * </pre> 285 * 286 * Document authors are encouraged to avoid "compatibility characters", 287 * as defined in section 2.3 of [Unicode]. The characters defined in the 288 * following ranges are also discouraged. They are either control characters 289 * or permanently undefined Unicode characters: 290 * 291 * <pre> 292 * [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDEF], 293 * [#x1FFFE-#x1FFFF], [#x2FFFE-#x2FFFF], [#x3FFFE-#x3FFFF], 294 * [#x4FFFE-#x4FFFF], [#x5FFFE-#x5FFFF], [#x6FFFE-#x6FFFF], 295 * [#x7FFFE-#x7FFFF], [#x8FFFE-#x8FFFF], [#x9FFFE-#x9FFFF], 296 * [#xAFFFE-#xAFFFF], [#xBFFFE-#xBFFFF], [#xCFFFE-#xCFFFF], 297 * [#xDFFFE-#xDFFFF], [#xEFFFE-#xEFFFF], [#xFFFFE-#xFFFFF], 298 * [#x10FFFE-#x10FFFF]. 299 * </pre> 300 * @throws XMLStreamException 301 **/ 302 public static void writeFilteredCharacters( 303 final XMLStreamWriter writer, 304 final String characters) 305 throws XMLStreamException 306 { 307 if (characters != null) 308 { 309 final String filtered = filterInvalidCharacters(characters); 310 writer.writeCharacters(filtered); 311 } 312 } 313 314 protected StaxUtil() 315 { 316 // Prevent instantiation 317 } 318}