PageRenderTime 65ms CodeModel.GetById 23ms RepoModel.GetById 1ms app.codeStats 1ms

/java-1.7.0-openjdk/openjdk/jaxp/src/com/sun/org/apache/xml/internal/serializer/ToStream.java

#
Java | 3570 lines | 1875 code | 348 blank | 1347 comment | 375 complexity | 2cb033fc13116f9b10114d7313ccfc25 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause-No-Nuclear-License-2014, LGPL-3.0, LGPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. * reserved comment block
  3. * DO NOT REMOVE OR ALTER!
  4. */
  5. /*
  6. * Copyright 2001-2004 The Apache Software Foundation.
  7. *
  8. * Licensed under the Apache License, Version 2.0 (the "License");
  9. * you may not use this file except in compliance with the License.
  10. * You may obtain a copy of the License at
  11. *
  12. * http://www.apache.org/licenses/LICENSE-2.0
  13. *
  14. * Unless required by applicable law or agreed to in writing, software
  15. * distributed under the License is distributed on an "AS IS" BASIS,
  16. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17. * See the License for the specific language governing permissions and
  18. * limitations under the License.
  19. */
  20. /*
  21. * $Id: ToStream.java,v 1.4 2005/11/10 06:43:26 suresh_emailid Exp $
  22. */
  23. package com.sun.org.apache.xml.internal.serializer;
  24. import java.io.IOException;
  25. import java.io.OutputStream;
  26. import java.io.UnsupportedEncodingException;
  27. import java.io.Writer;
  28. import java.util.Properties;
  29. import java.util.StringTokenizer;
  30. import java.util.Vector;
  31. import javax.xml.transform.ErrorListener;
  32. import javax.xml.transform.OutputKeys;
  33. import javax.xml.transform.Transformer;
  34. import javax.xml.transform.TransformerException;
  35. import com.sun.org.apache.xml.internal.serializer.utils.MsgKey;
  36. import com.sun.org.apache.xml.internal.serializer.utils.Utils;
  37. import com.sun.org.apache.xml.internal.serializer.utils.WrappedRuntimeException;
  38. import org.w3c.dom.Node;
  39. import org.xml.sax.Attributes;
  40. import org.xml.sax.ContentHandler;
  41. import org.xml.sax.SAXException;
  42. //import com.sun.media.sound.IESecurity;
  43. /**
  44. * This abstract class is a base class for other stream
  45. * serializers (xml, html, text ...) that write output to a stream.
  46. *
  47. * @xsl.usage internal
  48. */
  49. abstract public class ToStream extends SerializerBase
  50. {
  51. private static final String COMMENT_BEGIN = "<!--";
  52. private static final String COMMENT_END = "-->";
  53. /** Stack to keep track of disabling output escaping. */
  54. protected BoolStack m_disableOutputEscapingStates = new BoolStack();
  55. /**
  56. * The encoding information associated with this serializer.
  57. * Although initially there is no encoding,
  58. * there is a dummy EncodingInfo object that will say
  59. * that every character is in the encoding. This is useful
  60. * for a serializer that is in temporary output state and has
  61. * no associated encoding. A serializer in final output state
  62. * will have an encoding, and will worry about whether
  63. * single chars or surrogate pairs of high/low chars form
  64. * characters in the output encoding.
  65. */
  66. EncodingInfo m_encodingInfo = new EncodingInfo(null,null);
  67. /**
  68. * Method reference to the sun.io.CharToByteConverter#canConvert method
  69. * for this encoding. Invalid if m_charToByteConverter is null.
  70. */
  71. java.lang.reflect.Method m_canConvertMeth;
  72. /**
  73. * Boolean that tells if we already tried to get the converter.
  74. */
  75. boolean m_triedToGetConverter = false;
  76. /**
  77. * Opaque reference to the sun.io.CharToByteConverter for this
  78. * encoding.
  79. */
  80. Object m_charToByteConverter = null;
  81. /**
  82. * Stack to keep track of whether or not we need to
  83. * preserve whitespace.
  84. *
  85. * Used to push/pop values used for the field m_ispreserve, but
  86. * m_ispreserve is only relevant if m_doIndent is true.
  87. * If m_doIndent is false this field has no impact.
  88. *
  89. */
  90. protected BoolStack m_preserves = new BoolStack();
  91. /**
  92. * State flag to tell if preservation of whitespace
  93. * is important.
  94. *
  95. * Used only in shouldIndent() but only if m_doIndent is true.
  96. * If m_doIndent is false this flag has no impact.
  97. *
  98. */
  99. protected boolean m_ispreserve = false;
  100. /**
  101. * State flag that tells if the previous node processed
  102. * was text, so we can tell if we should preserve whitespace.
  103. *
  104. * Used in endDocument() and shouldIndent() but
  105. * only if m_doIndent is true.
  106. * If m_doIndent is false this flag has no impact.
  107. */
  108. protected boolean m_isprevtext = false;
  109. /**
  110. * The maximum character size before we have to resort
  111. * to escaping.
  112. */
  113. protected int m_maxCharacter = Encodings.getLastPrintable();
  114. /**
  115. * The system line separator for writing out line breaks.
  116. * The default value is from the system property,
  117. * but this value can be set through the xsl:output
  118. * extension attribute xalan:line-separator.
  119. */
  120. protected char[] m_lineSep =
  121. System.getProperty("line.separator").toCharArray();
  122. /**
  123. * True if the the system line separator is to be used.
  124. */
  125. protected boolean m_lineSepUse = true;
  126. /**
  127. * The length of the line seperator, since the write is done
  128. * one character at a time.
  129. */
  130. protected int m_lineSepLen = m_lineSep.length;
  131. /**
  132. * Map that tells which characters should have special treatment, and it
  133. * provides character to entity name lookup.
  134. */
  135. protected CharInfo m_charInfo;
  136. /** True if we control the buffer, and we should flush the output on endDocument. */
  137. boolean m_shouldFlush = true;
  138. /**
  139. * Add space before '/>' for XHTML.
  140. */
  141. protected boolean m_spaceBeforeClose = false;
  142. /**
  143. * Flag to signal that a newline should be added.
  144. *
  145. * Used only in indent() which is called only if m_doIndent is true.
  146. * If m_doIndent is false this flag has no impact.
  147. */
  148. boolean m_startNewLine;
  149. /**
  150. * Tells if we're in an internal document type subset.
  151. */
  152. protected boolean m_inDoctype = false;
  153. /**
  154. * Flag to quickly tell if the encoding is UTF8.
  155. */
  156. boolean m_isUTF8 = false;
  157. /** The xsl:output properties. */
  158. protected Properties m_format;
  159. /**
  160. * remembers if we are in between the startCDATA() and endCDATA() callbacks
  161. */
  162. protected boolean m_cdataStartCalled = false;
  163. /**
  164. * If this flag is true DTD entity references are not left as-is,
  165. * which is exiting older behavior.
  166. */
  167. private boolean m_expandDTDEntities = true;
  168. /**
  169. * Default constructor
  170. */
  171. public ToStream()
  172. {
  173. }
  174. /**
  175. * This helper method to writes out "]]>" when closing a CDATA section.
  176. *
  177. * @throws org.xml.sax.SAXException
  178. */
  179. protected void closeCDATA() throws org.xml.sax.SAXException
  180. {
  181. try
  182. {
  183. m_writer.write(CDATA_DELIMITER_CLOSE);
  184. // write out a CDATA section closing "]]>"
  185. m_cdataTagOpen = false; // Remember that we have done so.
  186. }
  187. catch (IOException e)
  188. {
  189. throw new SAXException(e);
  190. }
  191. }
  192. /**
  193. * Serializes the DOM node. Throws an exception only if an I/O
  194. * exception occured while serializing.
  195. *
  196. * @param node Node to serialize.
  197. * @throws IOException An I/O exception occured while serializing
  198. */
  199. public void serialize(Node node) throws IOException
  200. {
  201. try
  202. {
  203. TreeWalker walker =
  204. new TreeWalker(this);
  205. walker.traverse(node);
  206. }
  207. catch (org.xml.sax.SAXException se)
  208. {
  209. throw new WrappedRuntimeException(se);
  210. }
  211. }
  212. /**
  213. * Return true if the character is the high member of a surrogate pair.
  214. *
  215. * NEEDSDOC @param c
  216. *
  217. * NEEDSDOC ($objectName$) @return
  218. */
  219. static final boolean isUTF16Surrogate(char c)
  220. {
  221. return (c & 0xFC00) == 0xD800;
  222. }
  223. /**
  224. * Taken from XSLTC
  225. */
  226. private boolean m_escaping = true;
  227. /**
  228. * Flush the formatter's result stream.
  229. *
  230. * @throws org.xml.sax.SAXException
  231. */
  232. protected final void flushWriter() throws org.xml.sax.SAXException
  233. {
  234. final java.io.Writer writer = m_writer;
  235. if (null != writer)
  236. {
  237. try
  238. {
  239. if (writer instanceof WriterToUTF8Buffered)
  240. {
  241. if (m_shouldFlush)
  242. ((WriterToUTF8Buffered) writer).flush();
  243. else
  244. ((WriterToUTF8Buffered) writer).flushBuffer();
  245. }
  246. if (writer instanceof WriterToASCI)
  247. {
  248. if (m_shouldFlush)
  249. writer.flush();
  250. }
  251. else
  252. {
  253. // Flush always.
  254. // Not a great thing if the writer was created
  255. // by this class, but don't have a choice.
  256. writer.flush();
  257. }
  258. }
  259. catch (IOException ioe)
  260. {
  261. throw new org.xml.sax.SAXException(ioe);
  262. }
  263. }
  264. }
  265. /**
  266. * Get the output stream where the events will be serialized to.
  267. *
  268. * @return reference to the result stream, or null of only a writer was
  269. * set.
  270. */
  271. public OutputStream getOutputStream()
  272. {
  273. if (m_writer instanceof WriterToUTF8Buffered)
  274. return ((WriterToUTF8Buffered) m_writer).getOutputStream();
  275. if (m_writer instanceof WriterToASCI)
  276. return ((WriterToASCI) m_writer).getOutputStream();
  277. else
  278. return null;
  279. }
  280. // Implement DeclHandler
  281. /**
  282. * Report an element type declaration.
  283. *
  284. * <p>The content model will consist of the string "EMPTY", the
  285. * string "ANY", or a parenthesised group, optionally followed
  286. * by an occurrence indicator. The model will be normalized so
  287. * that all whitespace is removed,and will include the enclosing
  288. * parentheses.</p>
  289. *
  290. * @param name The element type name.
  291. * @param model The content model as a normalized string.
  292. * @exception SAXException The application may raise an exception.
  293. */
  294. public void elementDecl(String name, String model) throws SAXException
  295. {
  296. // Do not inline external DTD
  297. if (m_inExternalDTD)
  298. return;
  299. try
  300. {
  301. final java.io.Writer writer = m_writer;
  302. DTDprolog();
  303. writer.write("<!ELEMENT ");
  304. writer.write(name);
  305. writer.write(' ');
  306. writer.write(model);
  307. writer.write('>');
  308. writer.write(m_lineSep, 0, m_lineSepLen);
  309. }
  310. catch (IOException e)
  311. {
  312. throw new SAXException(e);
  313. }
  314. }
  315. /**
  316. * Report an internal entity declaration.
  317. *
  318. * <p>Only the effective (first) declaration for each entity
  319. * will be reported.</p>
  320. *
  321. * @param name The name of the entity. If it is a parameter
  322. * entity, the name will begin with '%'.
  323. * @param value The replacement text of the entity.
  324. * @exception SAXException The application may raise an exception.
  325. * @see #externalEntityDecl
  326. * @see org.xml.sax.DTDHandler#unparsedEntityDecl
  327. */
  328. public void internalEntityDecl(String name, String value)
  329. throws SAXException
  330. {
  331. // Do not inline external DTD
  332. if (m_inExternalDTD)
  333. return;
  334. try
  335. {
  336. DTDprolog();
  337. outputEntityDecl(name, value);
  338. }
  339. catch (IOException e)
  340. {
  341. throw new SAXException(e);
  342. }
  343. }
  344. /**
  345. * Output the doc type declaration.
  346. *
  347. * @param name non-null reference to document type name.
  348. * NEEDSDOC @param value
  349. *
  350. * @throws org.xml.sax.SAXException
  351. */
  352. void outputEntityDecl(String name, String value) throws IOException
  353. {
  354. final java.io.Writer writer = m_writer;
  355. writer.write("<!ENTITY ");
  356. writer.write(name);
  357. writer.write(" \"");
  358. writer.write(value);
  359. writer.write("\">");
  360. writer.write(m_lineSep, 0, m_lineSepLen);
  361. }
  362. /**
  363. * Output a system-dependent line break.
  364. *
  365. * @throws org.xml.sax.SAXException
  366. */
  367. protected final void outputLineSep() throws IOException
  368. {
  369. m_writer.write(m_lineSep, 0, m_lineSepLen);
  370. }
  371. /**
  372. * Specifies an output format for this serializer. It the
  373. * serializer has already been associated with an output format,
  374. * it will switch to the new format. This method should not be
  375. * called while the serializer is in the process of serializing
  376. * a document.
  377. *
  378. * @param format The output format to use
  379. */
  380. public void setOutputFormat(Properties format)
  381. {
  382. boolean shouldFlush = m_shouldFlush;
  383. init(m_writer, format, false, false);
  384. m_shouldFlush = shouldFlush;
  385. }
  386. /**
  387. * Initialize the serializer with the specified writer and output format.
  388. * Must be called before calling any of the serialize methods.
  389. * This method can be called multiple times and the xsl:output properties
  390. * passed in the 'format' parameter are accumulated across calls.
  391. *
  392. * @param writer The writer to use
  393. * @param format The output format
  394. * @param shouldFlush True if the writer should be flushed at EndDocument.
  395. */
  396. private synchronized void init(
  397. Writer writer,
  398. Properties format,
  399. boolean defaultProperties,
  400. boolean shouldFlush)
  401. {
  402. m_shouldFlush = shouldFlush;
  403. // if we are tracing events we need to trace what
  404. // characters are written to the output writer.
  405. if (m_tracer != null
  406. && !(writer instanceof SerializerTraceWriter) )
  407. m_writer = new SerializerTraceWriter(writer, m_tracer);
  408. else
  409. m_writer = writer;
  410. m_format = format;
  411. // m_cdataSectionNames =
  412. // OutputProperties.getQNameProperties(
  413. // OutputKeys.CDATA_SECTION_ELEMENTS,
  414. // format);
  415. setCdataSectionElements(OutputKeys.CDATA_SECTION_ELEMENTS, format);
  416. setIndentAmount(
  417. OutputPropertyUtils.getIntProperty(
  418. OutputPropertiesFactory.S_KEY_INDENT_AMOUNT,
  419. format));
  420. setIndent(
  421. OutputPropertyUtils.getBooleanProperty(OutputKeys.INDENT, format));
  422. {
  423. String sep =
  424. format.getProperty(OutputPropertiesFactory.S_KEY_LINE_SEPARATOR);
  425. if (sep != null) {
  426. m_lineSep = sep.toCharArray();
  427. m_lineSepLen = sep.length();
  428. }
  429. }
  430. boolean shouldNotWriteXMLHeader =
  431. OutputPropertyUtils.getBooleanProperty(
  432. OutputKeys.OMIT_XML_DECLARATION,
  433. format);
  434. setOmitXMLDeclaration(shouldNotWriteXMLHeader);
  435. setDoctypeSystem(format.getProperty(OutputKeys.DOCTYPE_SYSTEM));
  436. String doctypePublic = format.getProperty(OutputKeys.DOCTYPE_PUBLIC);
  437. setDoctypePublic(doctypePublic);
  438. // if standalone was explicitly specified
  439. if (format.get(OutputKeys.STANDALONE) != null)
  440. {
  441. String val = format.getProperty(OutputKeys.STANDALONE);
  442. if (defaultProperties)
  443. setStandaloneInternal(val);
  444. else
  445. setStandalone(val);
  446. }
  447. setMediaType(format.getProperty(OutputKeys.MEDIA_TYPE));
  448. if (null != doctypePublic)
  449. {
  450. if (doctypePublic.startsWith("-//W3C//DTD XHTML"))
  451. m_spaceBeforeClose = true;
  452. }
  453. /*
  454. * This code is added for XML 1.1 Version output.
  455. */
  456. String version = getVersion();
  457. if (null == version)
  458. {
  459. version = format.getProperty(OutputKeys.VERSION);
  460. setVersion(version);
  461. }
  462. // initCharsMap();
  463. String encoding = getEncoding();
  464. if (null == encoding)
  465. {
  466. encoding =
  467. Encodings.getMimeEncoding(
  468. format.getProperty(OutputKeys.ENCODING));
  469. setEncoding(encoding);
  470. }
  471. m_isUTF8 = encoding.equals(Encodings.DEFAULT_MIME_ENCODING);
  472. // Access this only from the Hashtable level... we don't want to
  473. // get default properties.
  474. String entitiesFileName =
  475. (String) format.get(OutputPropertiesFactory.S_KEY_ENTITIES);
  476. if (null != entitiesFileName)
  477. {
  478. String method =
  479. (String) format.get(OutputKeys.METHOD);
  480. m_charInfo = CharInfo.getCharInfo(entitiesFileName, method);
  481. }
  482. }
  483. /**
  484. * Initialize the serializer with the specified writer and output format.
  485. * Must be called before calling any of the serialize methods.
  486. *
  487. * @param writer The writer to use
  488. * @param format The output format
  489. */
  490. private synchronized void init(Writer writer, Properties format)
  491. {
  492. init(writer, format, false, false);
  493. }
  494. /**
  495. * Initialize the serializer with the specified output stream and output
  496. * format. Must be called before calling any of the serialize methods.
  497. *
  498. * @param output The output stream to use
  499. * @param format The output format
  500. * @param defaultProperties true if the properties are the default
  501. * properties
  502. *
  503. * @throws UnsupportedEncodingException The encoding specified in the
  504. * output format is not supported
  505. */
  506. protected synchronized void init(
  507. OutputStream output,
  508. Properties format,
  509. boolean defaultProperties)
  510. throws UnsupportedEncodingException
  511. {
  512. String encoding = getEncoding();
  513. if (encoding == null)
  514. {
  515. // if not already set then get it from the properties
  516. encoding =
  517. Encodings.getMimeEncoding(
  518. format.getProperty(OutputKeys.ENCODING));
  519. setEncoding(encoding);
  520. }
  521. if (encoding.equalsIgnoreCase("UTF-8"))
  522. {
  523. m_isUTF8 = true;
  524. // if (output instanceof java.io.BufferedOutputStream)
  525. // {
  526. // init(new WriterToUTF8(output), format, defaultProperties, true);
  527. // }
  528. // else if (output instanceof java.io.FileOutputStream)
  529. // {
  530. // init(new WriterToUTF8Buffered(output), format, defaultProperties, true);
  531. // }
  532. // else
  533. // {
  534. // // Not sure what to do in this case. I'm going to be conservative
  535. // // and not buffer.
  536. // init(new WriterToUTF8(output), format, defaultProperties, true);
  537. // }
  538. init(
  539. new WriterToUTF8Buffered(output),
  540. format,
  541. defaultProperties,
  542. true);
  543. }
  544. else if (
  545. encoding.equals("WINDOWS-1250")
  546. || encoding.equals("US-ASCII")
  547. || encoding.equals("ASCII"))
  548. {
  549. init(new WriterToASCI(output), format, defaultProperties, true);
  550. }
  551. else
  552. {
  553. Writer osw;
  554. try
  555. {
  556. osw = Encodings.getWriter(output, encoding);
  557. }
  558. catch (UnsupportedEncodingException uee)
  559. {
  560. System.out.println(
  561. "Warning: encoding \""
  562. + encoding
  563. + "\" not supported"
  564. + ", using "
  565. + Encodings.DEFAULT_MIME_ENCODING);
  566. encoding = Encodings.DEFAULT_MIME_ENCODING;
  567. setEncoding(encoding);
  568. osw = Encodings.getWriter(output, encoding);
  569. }
  570. init(osw, format, defaultProperties, true);
  571. }
  572. }
  573. /**
  574. * Returns the output format for this serializer.
  575. *
  576. * @return The output format in use
  577. */
  578. public Properties getOutputFormat()
  579. {
  580. return m_format;
  581. }
  582. /**
  583. * Specifies a writer to which the document should be serialized.
  584. * This method should not be called while the serializer is in
  585. * the process of serializing a document.
  586. *
  587. * @param writer The output writer stream
  588. */
  589. public void setWriter(Writer writer)
  590. {
  591. // if we are tracing events we need to trace what
  592. // characters are written to the output writer.
  593. if (m_tracer != null
  594. && !(writer instanceof SerializerTraceWriter) )
  595. m_writer = new SerializerTraceWriter(writer, m_tracer);
  596. else
  597. m_writer = writer;
  598. }
  599. /**
  600. * Set if the operating systems end-of-line line separator should
  601. * be used when serializing. If set false NL character
  602. * (decimal 10) is left alone, otherwise the new-line will be replaced on
  603. * output with the systems line separator. For example on UNIX this is
  604. * NL, while on Windows it is two characters, CR NL, where CR is the
  605. * carriage-return (decimal 13).
  606. *
  607. * @param use_sytem_line_break True if an input NL is replaced with the
  608. * operating systems end-of-line separator.
  609. * @return The previously set value of the serializer.
  610. */
  611. public boolean setLineSepUse(boolean use_sytem_line_break)
  612. {
  613. boolean oldValue = m_lineSepUse;
  614. m_lineSepUse = use_sytem_line_break;
  615. return oldValue;
  616. }
  617. /**
  618. * Specifies an output stream to which the document should be
  619. * serialized. This method should not be called while the
  620. * serializer is in the process of serializing a document.
  621. * <p>
  622. * The encoding specified in the output properties is used, or
  623. * if no encoding was specified, the default for the selected
  624. * output method.
  625. *
  626. * @param output The output stream
  627. */
  628. public void setOutputStream(OutputStream output)
  629. {
  630. try
  631. {
  632. Properties format;
  633. if (null == m_format)
  634. format =
  635. OutputPropertiesFactory.getDefaultMethodProperties(
  636. Method.XML);
  637. else
  638. format = m_format;
  639. init(output, format, true);
  640. }
  641. catch (UnsupportedEncodingException uee)
  642. {
  643. // Should have been warned in init, I guess...
  644. }
  645. }
  646. /**
  647. * @see SerializationHandler#setEscaping(boolean)
  648. */
  649. public boolean setEscaping(boolean escape)
  650. {
  651. final boolean temp = m_escaping;
  652. m_escaping = escape;
  653. return temp;
  654. }
  655. /**
  656. * Might print a newline character and the indentation amount
  657. * of the given depth.
  658. *
  659. * @param depth the indentation depth (element nesting depth)
  660. *
  661. * @throws org.xml.sax.SAXException if an error occurs during writing.
  662. */
  663. protected void indent(int depth) throws IOException
  664. {
  665. if (m_startNewLine)
  666. outputLineSep();
  667. /* For m_indentAmount > 0 this extra test might be slower
  668. * but Xalan's default value is 0, so this extra test
  669. * will run faster in that situation.
  670. */
  671. if (m_indentAmount > 0)
  672. printSpace(depth * m_indentAmount);
  673. }
  674. /**
  675. * Indent at the current element nesting depth.
  676. * @throws IOException
  677. */
  678. protected void indent() throws IOException
  679. {
  680. indent(m_elemContext.m_currentElemDepth);
  681. }
  682. /**
  683. * Prints <var>n</var> spaces.
  684. * @param n Number of spaces to print.
  685. *
  686. * @throws org.xml.sax.SAXException if an error occurs when writing.
  687. */
  688. private void printSpace(int n) throws IOException
  689. {
  690. final java.io.Writer writer = m_writer;
  691. for (int i = 0; i < n; i++)
  692. {
  693. writer.write(' ');
  694. }
  695. }
  696. /**
  697. * Report an attribute type declaration.
  698. *
  699. * <p>Only the effective (first) declaration for an attribute will
  700. * be reported. The type will be one of the strings "CDATA",
  701. * "ID", "IDREF", "IDREFS", "NMTOKEN", "NMTOKENS", "ENTITY",
  702. * "ENTITIES", or "NOTATION", or a parenthesized token group with
  703. * the separator "|" and all whitespace removed.</p>
  704. *
  705. * @param eName The name of the associated element.
  706. * @param aName The name of the attribute.
  707. * @param type A string representing the attribute type.
  708. * @param valueDefault A string representing the attribute default
  709. * ("#IMPLIED", "#REQUIRED", or "#FIXED") or null if
  710. * none of these applies.
  711. * @param value A string representing the attribute's default value,
  712. * or null if there is none.
  713. * @exception SAXException The application may raise an exception.
  714. */
  715. public void attributeDecl(
  716. String eName,
  717. String aName,
  718. String type,
  719. String valueDefault,
  720. String value)
  721. throws SAXException
  722. {
  723. // Do not inline external DTD
  724. if (m_inExternalDTD)
  725. return;
  726. try
  727. {
  728. final java.io.Writer writer = m_writer;
  729. DTDprolog();
  730. writer.write("<!ATTLIST ");
  731. writer.write(eName);
  732. writer.write(' ');
  733. writer.write(aName);
  734. writer.write(' ');
  735. writer.write(type);
  736. if (valueDefault != null)
  737. {
  738. writer.write(' ');
  739. writer.write(valueDefault);
  740. }
  741. //writer.write(" ");
  742. //writer.write(value);
  743. writer.write('>');
  744. writer.write(m_lineSep, 0, m_lineSepLen);
  745. }
  746. catch (IOException e)
  747. {
  748. throw new SAXException(e);
  749. }
  750. }
  751. /**
  752. * Get the character stream where the events will be serialized to.
  753. *
  754. * @return Reference to the result Writer, or null.
  755. */
  756. public Writer getWriter()
  757. {
  758. return m_writer;
  759. }
  760. /**
  761. * Report a parsed external entity declaration.
  762. *
  763. * <p>Only the effective (first) declaration for each entity
  764. * will be reported.</p>
  765. *
  766. * @param name The name of the entity. If it is a parameter
  767. * entity, the name will begin with '%'.
  768. * @param publicId The declared public identifier of the entity, or
  769. * null if none was declared.
  770. * @param systemId The declared system identifier of the entity.
  771. * @exception SAXException The application may raise an exception.
  772. * @see #internalEntityDecl
  773. * @see org.xml.sax.DTDHandler#unparsedEntityDecl
  774. */
  775. public void externalEntityDecl(
  776. String name,
  777. String publicId,
  778. String systemId)
  779. throws SAXException
  780. {
  781. try {
  782. DTDprolog();
  783. m_writer.write("<!ENTITY ");
  784. m_writer.write(name);
  785. if (publicId != null) {
  786. m_writer.write(" PUBLIC \"");
  787. m_writer.write(publicId);
  788. }
  789. else {
  790. m_writer.write(" SYSTEM \"");
  791. m_writer.write(systemId);
  792. }
  793. m_writer.write("\" >");
  794. m_writer.write(m_lineSep, 0, m_lineSepLen);
  795. } catch (IOException e) {
  796. // TODO Auto-generated catch block
  797. e.printStackTrace();
  798. }
  799. }
  800. /**
  801. * Tell if this character can be written without escaping.
  802. */
  803. protected boolean escapingNotNeeded(char ch)
  804. {
  805. final boolean ret;
  806. if (ch < 127)
  807. {
  808. // This is the old/fast code here, but is this
  809. // correct for all encodings?
  810. if (ch >= CharInfo.S_SPACE || (CharInfo.S_LINEFEED == ch ||
  811. CharInfo.S_CARRIAGERETURN == ch || CharInfo.S_HORIZONAL_TAB == ch))
  812. ret= true;
  813. else
  814. ret = false;
  815. }
  816. else {
  817. ret = m_encodingInfo.isInEncoding(ch);
  818. }
  819. return ret;
  820. }
  821. /**
  822. * Once a surrogate has been detected, write out the pair of
  823. * characters if it is in the encoding, or if there is no
  824. * encoding, otherwise write out an entity reference
  825. * of the value of the unicode code point of the character
  826. * represented by the high/low surrogate pair.
  827. * <p>
  828. * An exception is thrown if there is no low surrogate in the pair,
  829. * because the array ends unexpectely, or if the low char is there
  830. * but its value is such that it is not a low surrogate.
  831. *
  832. * @param c the first (high) part of the surrogate, which
  833. * must be confirmed before calling this method.
  834. * @param ch Character array.
  835. * @param i position Where the surrogate was detected.
  836. * @param end The end index of the significant characters.
  837. * @return 0 if the pair of characters was written out as-is,
  838. * the unicode code point of the character represented by
  839. * the surrogate pair if an entity reference with that value
  840. * was written out.
  841. *
  842. * @throws IOException
  843. * @throws org.xml.sax.SAXException if invalid UTF-16 surrogate detected.
  844. */
  845. protected int writeUTF16Surrogate(char c, char ch[], int i, int end)
  846. throws IOException
  847. {
  848. int codePoint = 0;
  849. if (i + 1 >= end)
  850. {
  851. throw new IOException(
  852. Utils.messages.createMessage(
  853. MsgKey.ER_INVALID_UTF16_SURROGATE,
  854. new Object[] { Integer.toHexString((int) c)}));
  855. }
  856. final char high = c;
  857. final char low = ch[i+1];
  858. if (!Encodings.isLowUTF16Surrogate(low)) {
  859. throw new IOException(
  860. Utils.messages.createMessage(
  861. MsgKey.ER_INVALID_UTF16_SURROGATE,
  862. new Object[] {
  863. Integer.toHexString((int) c)
  864. + " "
  865. + Integer.toHexString(low)}));
  866. }
  867. final java.io.Writer writer = m_writer;
  868. // If we make it to here we have a valid high, low surrogate pair
  869. if (m_encodingInfo.isInEncoding(c,low)) {
  870. // If the character formed by the surrogate pair
  871. // is in the encoding, so just write it out
  872. writer.write(ch,i,2);
  873. }
  874. else {
  875. // Don't know what to do with this char, it is
  876. // not in the encoding and not a high char in
  877. // a surrogate pair, so write out as an entity ref
  878. final String encoding = getEncoding();
  879. if (encoding != null) {
  880. /* The output encoding is known,
  881. * so somthing is wrong.
  882. */
  883. codePoint = Encodings.toCodePoint(high, low);
  884. // not in the encoding, so write out a character reference
  885. writer.write('&');
  886. writer.write('#');
  887. writer.write(Integer.toString(codePoint));
  888. writer.write(';');
  889. } else {
  890. /* The output encoding is not known,
  891. * so just write it out as-is.
  892. */
  893. writer.write(ch, i, 2);
  894. }
  895. }
  896. // non-zero only if character reference was written out.
  897. return codePoint;
  898. }
  899. /**
  900. * Handle one of the default entities, return false if it
  901. * is not a default entity.
  902. *
  903. * @param ch character to be escaped.
  904. * @param i index into character array.
  905. * @param chars non-null reference to character array.
  906. * @param len length of chars.
  907. * @param fromTextNode true if the characters being processed
  908. * are from a text node, false if they are from an attribute value
  909. * @param escLF true if the linefeed should be escaped.
  910. *
  911. * @return i+1 if the character was written, else i.
  912. *
  913. * @throws java.io.IOException
  914. */
  915. int accumDefaultEntity(
  916. java.io.Writer writer,
  917. char ch,
  918. int i,
  919. char[] chars,
  920. int len,
  921. boolean fromTextNode,
  922. boolean escLF)
  923. throws IOException
  924. {
  925. if (!escLF && CharInfo.S_LINEFEED == ch)
  926. {
  927. writer.write(m_lineSep, 0, m_lineSepLen);
  928. }
  929. else
  930. {
  931. // if this is text node character and a special one of those,
  932. // or if this is a character from attribute value and a special one of those
  933. if ((fromTextNode && m_charInfo.shouldMapTextChar(ch)) || (!fromTextNode && m_charInfo.shouldMapAttrChar(ch)))
  934. {
  935. String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
  936. if (null != outputStringForChar)
  937. {
  938. writer.write(outputStringForChar);
  939. }
  940. else
  941. return i;
  942. }
  943. else
  944. return i;
  945. }
  946. return i + 1;
  947. }
  948. /**
  949. * Normalize the characters, but don't escape.
  950. *
  951. * @param ch The characters from the XML document.
  952. * @param start The start position in the array.
  953. * @param length The number of characters to read from the array.
  954. * @param isCData true if a CDATA block should be built around the characters.
  955. * @param useSystemLineSeparator true if the operating systems
  956. * end-of-line separator should be output rather than a new-line character.
  957. *
  958. * @throws IOException
  959. * @throws org.xml.sax.SAXException
  960. */
  961. void writeNormalizedChars(
  962. char ch[],
  963. int start,
  964. int length,
  965. boolean isCData,
  966. boolean useSystemLineSeparator)
  967. throws IOException, org.xml.sax.SAXException
  968. {
  969. final java.io.Writer writer = m_writer;
  970. int end = start + length;
  971. for (int i = start; i < end; i++)
  972. {
  973. char c = ch[i];
  974. if (CharInfo.S_LINEFEED == c && useSystemLineSeparator)
  975. {
  976. writer.write(m_lineSep, 0, m_lineSepLen);
  977. }
  978. else if (isCData && (!escapingNotNeeded(c)))
  979. {
  980. // if (i != 0)
  981. if (m_cdataTagOpen)
  982. closeCDATA();
  983. // This needs to go into a function...
  984. if (Encodings.isHighUTF16Surrogate(c))
  985. {
  986. writeUTF16Surrogate(c, ch, i, end);
  987. i++ ; // process two input characters
  988. }
  989. else
  990. {
  991. writer.write("&#");
  992. String intStr = Integer.toString((int) c);
  993. writer.write(intStr);
  994. writer.write(';');
  995. }
  996. // if ((i != 0) && (i < (end - 1)))
  997. // if (!m_cdataTagOpen && (i < (end - 1)))
  998. // {
  999. // writer.write(CDATA_DELIMITER_OPEN);
  1000. // m_cdataTagOpen = true;
  1001. // }
  1002. }
  1003. else if (
  1004. isCData
  1005. && ((i < (end - 2))
  1006. && (']' == c)
  1007. && (']' == ch[i + 1])
  1008. && ('>' == ch[i + 2])))
  1009. {
  1010. writer.write(CDATA_CONTINUE);
  1011. i += 2;
  1012. }
  1013. else
  1014. {
  1015. if (escapingNotNeeded(c))
  1016. {
  1017. if (isCData && !m_cdataTagOpen)
  1018. {
  1019. writer.write(CDATA_DELIMITER_OPEN);
  1020. m_cdataTagOpen = true;
  1021. }
  1022. writer.write(c);
  1023. }
  1024. // This needs to go into a function...
  1025. else if (Encodings.isHighUTF16Surrogate(c))
  1026. {
  1027. if (m_cdataTagOpen)
  1028. closeCDATA();
  1029. writeUTF16Surrogate(c, ch, i, end);
  1030. i++; // process two input characters
  1031. }
  1032. else
  1033. {
  1034. if (m_cdataTagOpen)
  1035. closeCDATA();
  1036. writer.write("&#");
  1037. String intStr = Integer.toString((int) c);
  1038. writer.write(intStr);
  1039. writer.write(';');
  1040. }
  1041. }
  1042. }
  1043. }
  1044. /**
  1045. * Ends an un-escaping section.
  1046. *
  1047. * @see #startNonEscaping
  1048. *
  1049. * @throws org.xml.sax.SAXException
  1050. */
  1051. public void endNonEscaping() throws org.xml.sax.SAXException
  1052. {
  1053. m_disableOutputEscapingStates.pop();
  1054. }
  1055. /**
  1056. * Starts an un-escaping section. All characters printed within an un-
  1057. * escaping section are printed as is, without escaping special characters
  1058. * into entity references. Only XML and HTML serializers need to support
  1059. * this method.
  1060. * <p> The contents of the un-escaping section will be delivered through the
  1061. * regular <tt>characters</tt> event.
  1062. *
  1063. * @throws org.xml.sax.SAXException
  1064. */
  1065. public void startNonEscaping() throws org.xml.sax.SAXException
  1066. {
  1067. m_disableOutputEscapingStates.push(true);
  1068. }
  1069. /**
  1070. * Receive notification of cdata.
  1071. *
  1072. * <p>The Parser will call this method to report each chunk of
  1073. * character data. SAX parsers may return all contiguous character
  1074. * data in a single chunk, or they may split it into several
  1075. * chunks; however, all of the characters in any single event
  1076. * must come from the same external entity, so that the Locator
  1077. * provides useful information.</p>
  1078. *
  1079. * <p>The application must not attempt to read from the array
  1080. * outside of the specified range.</p>
  1081. *
  1082. * <p>Note that some parsers will report whitespace using the
  1083. * ignorableWhitespace() method rather than this one (validating
  1084. * parsers must do so).</p>
  1085. *
  1086. * @param ch The characters from the XML document.
  1087. * @param start The start position in the array.
  1088. * @param length The number of characters to read from the array.
  1089. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  1090. * wrapping another exception.
  1091. * @see #ignorableWhitespace
  1092. * @see org.xml.sax.Locator
  1093. *
  1094. * @throws org.xml.sax.SAXException
  1095. */
  1096. protected void cdata(char ch[], int start, final int length)
  1097. throws org.xml.sax.SAXException
  1098. {
  1099. try
  1100. {
  1101. final int old_start = start;
  1102. if (m_elemContext.m_startTagOpen)
  1103. {
  1104. closeStartTag();
  1105. m_elemContext.m_startTagOpen = false;
  1106. }
  1107. m_ispreserve = true;
  1108. if (shouldIndent())
  1109. indent();
  1110. boolean writeCDataBrackets =
  1111. (((length >= 1) && escapingNotNeeded(ch[start])));
  1112. /* Write out the CDATA opening delimiter only if
  1113. * we are supposed to, and if we are not already in
  1114. * the middle of a CDATA section
  1115. */
  1116. if (writeCDataBrackets && !m_cdataTagOpen)
  1117. {
  1118. m_writer.write(CDATA_DELIMITER_OPEN);
  1119. m_cdataTagOpen = true;
  1120. }
  1121. // writer.write(ch, start, length);
  1122. if (isEscapingDisabled())
  1123. {
  1124. charactersRaw(ch, start, length);
  1125. }
  1126. else
  1127. writeNormalizedChars(ch, start, length, true, m_lineSepUse);
  1128. /* used to always write out CDATA closing delimiter here,
  1129. * but now we delay, so that we can merge CDATA sections on output.
  1130. * need to write closing delimiter later
  1131. */
  1132. if (writeCDataBrackets)
  1133. {
  1134. /* if the CDATA section ends with ] don't leave it open
  1135. * as there is a chance that an adjacent CDATA sections
  1136. * starts with ]>.
  1137. * We don't want to merge ]] with > , or ] with ]>
  1138. */
  1139. if (ch[start + length - 1] == ']')
  1140. closeCDATA();
  1141. }
  1142. // time to fire off CDATA event
  1143. if (m_tracer != null)
  1144. super.fireCDATAEvent(ch, old_start, length);
  1145. }
  1146. catch (IOException ioe)
  1147. {
  1148. throw new org.xml.sax.SAXException(
  1149. Utils.messages.createMessage(
  1150. MsgKey.ER_OIERROR,
  1151. null),
  1152. ioe);
  1153. //"IO error", ioe);
  1154. }
  1155. }
  1156. /**
  1157. * Tell if the character escaping should be disabled for the current state.
  1158. *
  1159. * @return true if the character escaping should be disabled.
  1160. */
  1161. private boolean isEscapingDisabled()
  1162. {
  1163. return m_disableOutputEscapingStates.peekOrFalse();
  1164. }
  1165. /**
  1166. * If available, when the disable-output-escaping attribute is used,
  1167. * output raw text without escaping.
  1168. *
  1169. * @param ch The characters from the XML document.
  1170. * @param start The start position in the array.
  1171. * @param length The number of characters to read from the array.
  1172. *
  1173. * @throws org.xml.sax.SAXException
  1174. */
  1175. protected void charactersRaw(char ch[], int start, int length)
  1176. throws org.xml.sax.SAXException
  1177. {
  1178. if (m_inEntityRef)
  1179. return;
  1180. try
  1181. {
  1182. if (m_elemContext.m_startTagOpen)
  1183. {
  1184. closeStartTag();
  1185. m_elemContext.m_startTagOpen = false;
  1186. }
  1187. m_ispreserve = true;
  1188. m_writer.write(ch, start, length);
  1189. }
  1190. catch (IOException e)
  1191. {
  1192. throw new SAXException(e);
  1193. }
  1194. }
  1195. /**
  1196. * Receive notification of character data.
  1197. *
  1198. * <p>The Parser will call this method to report each chunk of
  1199. * character data. SAX parsers may return all contiguous character
  1200. * data in a single chunk, or they may split it into several
  1201. * chunks; however, all of the characters in any single event
  1202. * must come from the same external entity, so that the Locator
  1203. * provides useful information.</p>
  1204. *
  1205. * <p>The application must not attempt to read from the array
  1206. * outside of the specified range.</p>
  1207. *
  1208. * <p>Note that some parsers will report whitespace using the
  1209. * ignorableWhitespace() method rather than this one (validating
  1210. * parsers must do so).</p>
  1211. *
  1212. * @param chars The characters from the XML document.
  1213. * @param start The start position in the array.
  1214. * @param length The number of characters to read from the array.
  1215. * @throws org.xml.sax.SAXException Any SAX exception, possibly
  1216. * wrapping another exception.
  1217. * @see #ignorableWhitespace
  1218. * @see org.xml.sax.Locator
  1219. *
  1220. * @throws org.xml.sax.SAXException
  1221. */
  1222. public void characters(final char chars[], final int start, final int length)
  1223. throws org.xml.sax.SAXException
  1224. {
  1225. // It does not make sense to continue with rest of the method if the number of
  1226. // characters to read from array is 0.
  1227. // Section 7.6.1 of XSLT 1.0 (http://www.w3.org/TR/xslt#value-of) suggest no text node
  1228. // is created if string is empty.
  1229. if (length == 0 || (m_inEntityRef && !m_expandDTDEntities))
  1230. return;
  1231. if (m_elemContext.m_startTagOpen)
  1232. {
  1233. closeStartTag();
  1234. m_elemContext.m_startTagOpen = false;
  1235. }
  1236. else if (m_needToCallStartDocument)
  1237. {
  1238. startDocumentInternal();
  1239. }
  1240. if (m_cdataStartCalled || m_elemContext.m_isCdataSection)
  1241. {
  1242. /* either due to startCDATA() being called or due to
  1243. * cdata-section-elements atribute, we need this as cdata
  1244. */
  1245. cdata(chars, start, length);
  1246. return;
  1247. }
  1248. if (m_cdataTagOpen)
  1249. closeCDATA();
  1250. if (m_disableOutputEscapingStates.peekOrFalse() || (!m_escaping))
  1251. {
  1252. charactersRaw(chars, start, length);
  1253. // time to fire off characters generation event
  1254. if (m_tracer != null)
  1255. super.fireCharEvent(chars, start, length);
  1256. return;
  1257. }
  1258. if (m_elemContext.m_startTagOpen)
  1259. {
  1260. closeStartTag();
  1261. m_elemContext.m_startTagOpen = false;
  1262. }
  1263. try
  1264. {
  1265. int i;
  1266. int startClean;
  1267. // skip any leading whitspace
  1268. // don't go off the end and use a hand inlined version
  1269. // of isWhitespace(ch)
  1270. final int end = start + length;
  1271. int lastDirtyCharProcessed = start - 1; // last non-clean character that was processed
  1272. // that was processed
  1273. final Writer writer = m_writer;
  1274. boolean isAllWhitespace = true;
  1275. // process any leading whitspace
  1276. i = start;
  1277. while (i < end && isAllWhitespace) {
  1278. char ch1 = chars[i];
  1279. if (m_charInfo.shouldMapTextChar(ch1)) {
  1280. // The character is supposed to be replaced by a String
  1281. // so write out the clean whitespace characters accumulated
  1282. // so far
  1283. // then the String.
  1284. writeOutCleanChars(chars, i, lastDirtyCharProcessed);
  1285. String outputStringForChar = m_charInfo
  1286. .getOutputStringForChar(ch1);
  1287. writer.write(outputStringForChar);
  1288. // We can't say that everything we are writing out is
  1289. // all whitespace, we just wrote out a String.
  1290. isAllWhitespace = false;
  1291. lastDirtyCharProcessed = i; // mark the last non-clean
  1292. // character processed
  1293. i++;
  1294. } else {
  1295. // The character is clean, but is it a whitespace ?
  1296. switch (ch1) {
  1297. // TODO: Any other whitespace to consider?
  1298. case CharInfo.S_SPACE:
  1299. // Just accumulate the clean whitespace
  1300. i++;
  1301. break;
  1302. case CharInfo.S_LINEFEED:
  1303. lastDirtyCharProcessed = processLineFeed(chars, i,
  1304. lastDirtyCharProcessed, writer);
  1305. i++;
  1306. break;
  1307. case CharInfo.S_CARRIAGERETURN:
  1308. writeOutCleanChars(chars, i, lastDirtyCharProcessed);
  1309. writer.write("&#13;");
  1310. lastDirtyCharProcessed = i;
  1311. i++;
  1312. break;
  1313. case CharInfo.S_HORIZONAL_TAB:
  1314. // Just accumulate the clean whitespace
  1315. i++;
  1316. break;
  1317. default:
  1318. // The character was clean, but not a whitespace
  1319. // so break the loop to continue with this character
  1320. // (we don't increment index i !!)
  1321. isAllWhitespace = false;
  1322. break;
  1323. }
  1324. }
  1325. }
  1326. /* If there is some non-whitespace, mark that we may need
  1327. * to preserve this. This is only important if we have indentation on.
  1328. */
  1329. if (i < end || !isAllWhitespace)
  1330. m_ispreserve = true;
  1331. for (; i < end; i++)
  1332. {
  1333. char ch = chars[i];
  1334. if (m_charInfo.shouldMapTextChar(ch)) {
  1335. // The character is supposed to be replaced by a String
  1336. // e.g. '&' --> "&amp;"
  1337. // e.g. '<' --> "&lt;"
  1338. writeOutCleanChars(chars, i, lastDirtyCharProcessed);
  1339. String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
  1340. writer.write(outputStringForChar);
  1341. lastDirtyCharProcessed = i;
  1342. }
  1343. else {
  1344. if (ch <= 0x1F) {
  1345. // Range 0x00 through 0x1F inclusive
  1346. //
  1347. // This covers the non-whitespace control characters
  1348. // in the range 0x1 to 0x1F inclusive.
  1349. // It also covers the whitespace control characters in the same way:
  1350. // 0x9 TAB
  1351. // 0xA NEW LINE
  1352. // 0xD CARRIAGE RETURN
  1353. //
  1354. // We also cover 0x0 ... It isn't valid
  1355. // but we will output "&#0;"
  1356. // The default will handle this just fine, but this
  1357. // is a little performance boost to handle the more
  1358. // common TAB, NEW-LINE, CARRIAGE-RETURN
  1359. switch (ch) {
  1360. case CharInfo.S_HORIZONAL_TAB:
  1361. // Leave whitespace TAB as a real character
  1362. break;
  1363. case CharInfo.S_LINEFEED:
  1364. lastDirtyCharProcessed = processLineFeed(chars, i, lastDirtyCharProcessed, writer);
  1365. break;
  1366. case CharInfo.S_CARRIAGERETURN:
  1367. writeOutCleanChars(chars, i, lastDirtyCharProcessed);
  1368. writer.write("&#13;");
  1369. lastDirtyCharProcessed = i;
  1370. // Leave whitespace…

Large files files are truncated, but you can click here to view the full file