PageRenderTime 60ms CodeModel.GetById 16ms app.highlight 36ms RepoModel.GetById 4ms app.codeStats 1ms

/src/main/java/com/t11e/discovery/datatool/StaxUtil.java

http://github.com/t11e/discovery_datatool
Java | 318 lines | 254 code | 17 blank | 47 comment | 27 complexity | 13da4e43ea853f74cf8b2481ee55bf66 MD5 | raw file
  1package com.t11e.discovery.datatool;
  2
  3import java.util.regex.Pattern;
  4
  5import javax.xml.stream.XMLInputFactory;
  6import javax.xml.stream.XMLOutputFactory;
  7import javax.xml.stream.XMLStreamConstants;
  8import javax.xml.stream.XMLStreamException;
  9import javax.xml.stream.XMLStreamReader;
 10import javax.xml.stream.XMLStreamWriter;
 11
 12public class StaxUtil
 13{
 14  /**
 15   * Create a new XMLInputFactory, better than calling
 16   * XMLInputFactory.newInstance() directly as it allows for
 17   * easier override without dealing with endorsed directories.
 18   */
 19  public static XMLInputFactory newInputFactory()
 20  {
 21    return new com.ctc.wstx.stax.WstxInputFactory();
 22  }
 23
 24  /**
 25   * Create a new XMLOutputFactory, better than calling
 26   * XMLOutputFactory() directly as it allows for
 27   * easier override without dealing with endorsed directories.
 28   */
 29  public static XMLOutputFactory newOutputFactory()
 30  {
 31    return new com.ctc.wstx.stax.WstxOutputFactory();
 32
 33  }
 34
 35  public static String getRequiredAttributeValue(final XMLStreamReader reader,
 36    final String ns, final String localName)
 37    throws XMLStreamException
 38  {
 39    final String value = reader.getAttributeValue(ns, localName);
 40    if (value == null)
 41    {
 42      throw newMissingAttributeException(ns, localName);
 43    }
 44    return value;
 45  }
 46
 47  public static XMLStreamException newMissingAttributeException(final String ns,
 48    final String name)
 49  {
 50    return new XMLStreamException("The required attribute " +
 51      (ns == null ? "" : "{" + ns + "}") + name + " is missing");
 52  }
 53
 54  /**
 55   * Variant of XMLStreamReader.nextTag that additionally ignores any DTD tokens.
 56   */
 57  public static int nextTagIgnoringDocType(final XMLStreamReader reader)
 58    throws XMLStreamException
 59  {
 60    int next = -1;
 61    boolean done = false;
 62    do
 63    {
 64      next = reader.next();
 65      switch (next)
 66      {
 67        case XMLStreamConstants.SPACE:
 68        case XMLStreamConstants.COMMENT:
 69        case XMLStreamConstants.PROCESSING_INSTRUCTION:
 70          break;
 71        case XMLStreamConstants.CDATA:
 72        case XMLStreamConstants.CHARACTERS:
 73          if (!reader.isWhiteSpace())
 74          {
 75            throw new XMLStreamException("Received non-all-whitespace CHARACTERS" +
 76              " or CDATA event in nextTagIgnoringDocType().");
 77          }
 78          break;
 79        case XMLStreamConstants.START_ELEMENT:
 80        case XMLStreamConstants.END_ELEMENT:
 81          done = true;
 82          break;
 83        case XMLStreamConstants.DTD:
 84          // Swallow
 85          break;
 86        default:
 87          throw new XMLStreamException("Received event " + tokenTypeDesc(next)
 88            + ", instead of START_ELEMENT or END_ELEMENT.");
 89      }
 90    }
 91    while (!done);
 92    return next;
 93  }
 94
 95  /**
 96   * Find either the next START_ELEMENT or text, returns the text if
 97   * available and null if a tag was found.
 98   */
 99  public static String nextTextOrTag(final XMLStreamReader reader)
100    throws XMLStreamException
101  {
102    String output = null;
103    reader.require(XMLStreamConstants.START_ELEMENT, null, null);
104    StringBuffer buffer = null;
105    boolean done = false;
106    do
107    {
108      final int type = reader.next();
109      switch (type)
110      {
111        case XMLStreamConstants.CHARACTERS:
112        case XMLStreamConstants.CDATA:
113        case XMLStreamConstants.SPACE:
114        case XMLStreamConstants.ENTITY_REFERENCE:
115          if (buffer == null)
116          {
117            buffer = new StringBuffer();
118          }
119          buffer.append(reader.getText());
120          break;
121        case XMLStreamConstants.PROCESSING_INSTRUCTION:
122        case XMLStreamConstants.COMMENT:
123          // Swallow
124          break;
125        case XMLStreamConstants.START_ELEMENT:
126          if (buffer != null)
127          {
128            boolean containsNonWhitespace = false;
129            for (int i = 0; i < buffer.length(); i++)
130            {
131              if (!Character.isWhitespace(buffer.charAt(i)))
132              {
133                containsNonWhitespace = true;
134                break;
135              }
136            }
137            if (containsNonWhitespace)
138            {
139              throw new XMLStreamException("Mixed text and elements when " +
140                " looking for the next text or tag" +
141                "; tag=" + reader.getLocalName() +
142                " text='" + buffer.toString() + "'");
143            }
144          }
145          done = true;
146          break;
147        case XMLStreamConstants.END_ELEMENT:
148          output = (buffer == null) ? "" : buffer.toString();
149          done = true;
150          break;
151        default:
152          throw new XMLStreamException("Unexpected event " +
153            tokenTypeDesc(type) + " when looking for text or tag");
154      }
155    }
156    while (!done);
157    if (output == null)
158    {
159      reader.require(XMLStreamConstants.START_ELEMENT, null, null);
160    }
161    else
162    {
163      reader.require(XMLStreamConstants.END_ELEMENT, null, null);
164    }
165    return output;
166  }
167
168  public static void skipNestedElements(final XMLStreamReader reader)
169    throws XMLStreamException
170  {
171    int level = 0;
172    while (level >= 0)
173    {
174      final int next = reader.next();
175      switch (next)
176      {
177        case XMLStreamConstants.SPACE:
178        case XMLStreamConstants.COMMENT:
179        case XMLStreamConstants.PROCESSING_INSTRUCTION:
180        case XMLStreamConstants.CDATA:
181        case XMLStreamConstants.CHARACTERS:
182          break;
183        case XMLStreamConstants.START_ELEMENT:
184          level++;
185          break;
186        case XMLStreamConstants.END_ELEMENT:
187          level--;
188          break;
189        default:
190          throw new XMLStreamException("Received event " + tokenTypeDesc(next)
191            + ", instead of START_ELEMENT or END_ELEMENT.");
192      }
193    }
194  }
195
196  protected static String tokenTypeDesc(final int type)
197  {
198    String desc;
199    switch (type)
200    {
201      case XMLStreamConstants.START_ELEMENT:
202        desc = "START_ELEMENT";
203        break;
204      case XMLStreamConstants.END_ELEMENT:
205        desc = "END_ELEMENT";
206        break;
207      case XMLStreamConstants.START_DOCUMENT:
208        desc = "START_DOCUMENT";
209        break;
210      case XMLStreamConstants.END_DOCUMENT:
211        desc = "END_DOCUMENT";
212        break;
213      case XMLStreamConstants.CHARACTERS:
214        desc = "CHARACTERS";
215        break;
216      case XMLStreamConstants.CDATA:
217        desc = "CDATA";
218        break;
219      case XMLStreamConstants.SPACE:
220        desc = "SPACE";
221        break;
222      case XMLStreamConstants.COMMENT:
223        desc = "COMMENT";
224        break;
225      case XMLStreamConstants.PROCESSING_INSTRUCTION:
226        desc = "PROCESSING_INSTRUCTION";
227        break;
228      case XMLStreamConstants.DTD:
229        desc = "DTD";
230        break;
231      case XMLStreamConstants.ENTITY_REFERENCE:
232        desc = "ENTITY_REFERENCE";
233        break;
234      default:
235        desc = "UNKNOWN_" + type;
236    }
237    return desc;
238  }
239
240  public static void writeCharactersIfNotNull(final XMLStreamWriter writer,
241    final String chars)
242    throws XMLStreamException
243  {
244    if (chars != null)
245    {
246      writer.writeCharacters(chars);
247    }
248  }
249
250  public static void writeAttributeIfNotNull(final XMLStreamWriter writer,
251    final String name, final String value)
252    throws XMLStreamException
253  {
254    if (value != null)
255    {
256      writer.writeAttribute(name, value);
257    }
258  }
259
260  private static final Pattern INVALID_UTF_CHARS =
261      Pattern.compile("[^" +
262        "\\u0009" +
263        "\\u000A" +
264        "\\u000D" +
265        "\\u0020-\\uD7FF" +
266        "\\uE000-\\uFFFF" +
267        "]");
268
269  public static String filterInvalidCharacters(final String content)
270  {
271    final String filtered = INVALID_UTF_CHARS.matcher(content).replaceAll("");
272    return filtered;
273  }
274
275  /**
276   * Call handler.characters filtering any invalid UTF characters.
277   *
278   * The XML Specification lists the following valid UTF characters.
279   *   http://www.w3.org/TR/REC-xml/#charsets
280   *
281   * <pre>
282   *   Char       ::=          #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
283   *     any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
284   * </pre>
285   *
286   * Document authors are encouraged to avoid "compatibility characters",
287   * as defined in section 2.3 of [Unicode]. The characters defined in the
288   * following ranges are also discouraged. They are either control characters
289   * or permanently undefined Unicode characters:
290   *
291   * <pre>
292   *   [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDEF],
293   *   [#x1FFFE-#x1FFFF], [#x2FFFE-#x2FFFF], [#x3FFFE-#x3FFFF],
294   *   [#x4FFFE-#x4FFFF], [#x5FFFE-#x5FFFF], [#x6FFFE-#x6FFFF],
295   *   [#x7FFFE-#x7FFFF], [#x8FFFE-#x8FFFF], [#x9FFFE-#x9FFFF],
296   *   [#xAFFFE-#xAFFFF], [#xBFFFE-#xBFFFF], [#xCFFFE-#xCFFFF],
297   *   [#xDFFFE-#xDFFFF], [#xEFFFE-#xEFFFF], [#xFFFFE-#xFFFFF],
298   *   [#x10FFFE-#x10FFFF].
299   * </pre>
300   * @throws XMLStreamException
301   **/
302  public static void writeFilteredCharacters(
303    final XMLStreamWriter writer,
304    final String characters)
305    throws XMLStreamException
306  {
307    if (characters != null)
308    {
309      final String filtered = filterInvalidCharacters(characters);
310      writer.writeCharacters(filtered);
311    }
312  }
313
314  protected StaxUtil()
315  {
316    // Prevent instantiation
317  }
318}