PageRenderTime 51ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/saxonB/net/sf/saxon/event/XMLEmitter.java

https://bitbucket.org/dmwelch/phdxnat_pipeline
Java | 894 lines | 640 code | 98 blank | 156 comment | 202 complexity | 72473142ba59d2d94848c871a2ea7fdc MD5 | raw file
  1. package net.sf.saxon.event;
  2. import net.sf.saxon.charcode.UnicodeCharacterSet;
  3. import net.sf.saxon.charcode.UTF16;
  4. import net.sf.saxon.om.FastStringBuffer;
  5. import net.sf.saxon.sort.IntHashMap;
  6. import net.sf.saxon.tinytree.CharSlice;
  7. import net.sf.saxon.tinytree.CompressedWhitespace;
  8. import net.sf.saxon.trans.XPathException;
  9. import net.sf.saxon.value.Whitespace;
  10. import javax.xml.transform.OutputKeys;
  11. import java.util.Properties;
  12. import java.util.Stack;
  13. /**
  14. * XMLEmitter is an Emitter that generates XML output
  15. * to a specified destination.
  16. */
  17. public class XMLEmitter extends Emitter {
  18. // NOTE: we experimented with XMLUTF8Emitter which combines XML escaping and UTF8 encoding
  19. // into a single loop. Scrapped it because we couldn't measure any benefits - but there
  20. // ought to be, in theory. Perhaps we weren't buffering the writes carefully enough.
  21. protected boolean empty = true;
  22. protected boolean openStartTag = false;
  23. protected boolean declarationIsWritten = false;
  24. protected int elementCode;
  25. protected boolean preferHex = false;
  26. protected boolean undeclareNamespaces = false;
  27. //private boolean warningIssued = false;
  28. // The element stack holds the display names (lexical QNames) of elements that
  29. // have been started but not finished. It is used to obtain the element name
  30. // for the end tag.
  31. protected Stack elementStack = new Stack();
  32. // Getting a display name for a namecode can be expensive because it involves string
  33. // concatenation, and more importantly, checking of the name against the encoding. So
  34. // we keep a local cache of names we have seen before.
  35. private IntHashMap nameLookup = new IntHashMap(100);
  36. // For other names we use a hashtable. It
  37. private boolean indenting = false;
  38. private int indentSpaces = 3;
  39. private String indentChars = "\n ";
  40. private int totalAttributeLength = 0;
  41. private boolean requireWellFormed = false;
  42. static boolean[] specialInText; // lookup table for special characters in text
  43. static boolean[] specialInAtt; // lookup table for special characters in attributes
  44. // create look-up table for ASCII characters that need special treatment
  45. static {
  46. specialInText = new boolean[128];
  47. for (int i=0; i<=31; i++) specialInText[i] = true; // allowed in XML 1.1 as character references
  48. for (int i=32; i<=127; i++) specialInText[i] = false;
  49. // note, 0 is used to switch escaping on and off for mapped characters
  50. specialInText['\n'] = false;
  51. specialInText['\t'] = false;
  52. specialInText['\r'] = true;
  53. specialInText['<'] = true;
  54. specialInText['>'] = true;
  55. specialInText['&'] = true;
  56. specialInAtt = new boolean[128];
  57. for (int i=0; i<=31; i++) specialInAtt[i] = true; // allowed in XML 1.1 as character references
  58. for (int i=32; i<=127; i++) specialInAtt[i] = false;
  59. specialInAtt[(char)0] = true;
  60. // used to switch escaping on and off for mapped characters
  61. specialInAtt['\r'] = true;
  62. specialInAtt['\n'] = true;
  63. specialInAtt['\t'] = true;
  64. specialInAtt['<'] = true;
  65. specialInAtt['>'] = true;
  66. specialInAtt['&'] = true;
  67. specialInAtt['\"'] = true;
  68. }
  69. /**
  70. * Start of the event stream. Nothing is done at this stage: the opening of the output
  71. * file is deferred until some content is written to it.
  72. */
  73. public void open() throws XPathException {}
  74. /**
  75. * Start of a document node. Nothing is done at this stage: the opening of the output
  76. * file is deferred until some content is written to it.
  77. */
  78. public void startDocument(int properties) throws XPathException {}
  79. /**
  80. * Notify the end of a document node
  81. */
  82. public void endDocument() throws XPathException {
  83. if (!elementStack.isEmpty()) {
  84. throw new IllegalStateException("Attempt to end document in serializer when elements are unclosed");
  85. }
  86. }
  87. /**
  88. * Do the real work of starting the document. This happens when the first
  89. * content is written.
  90. * @throws XPathException
  91. */
  92. protected void openDocument () throws XPathException
  93. {
  94. if (writer==null) {
  95. makeWriter();
  96. }
  97. if (characterSet==null) {
  98. characterSet = UnicodeCharacterSet.getInstance();
  99. }
  100. if (outputProperties==null) {
  101. outputProperties = new Properties();
  102. }
  103. String rep = outputProperties.getProperty(SaxonOutputKeys.CHARACTER_REPRESENTATION);
  104. rep = Whitespace.trim(rep);
  105. if (rep != null) {
  106. preferHex = (rep.equalsIgnoreCase("hex"));
  107. }
  108. rep = outputProperties.getProperty(SaxonOutputKeys.UNDECLARE_PREFIXES);
  109. if (rep!=null) {
  110. undeclareNamespaces = (rep.equalsIgnoreCase("yes"));
  111. }
  112. writeDeclaration();
  113. }
  114. /**
  115. * Output the XML declaration
  116. */
  117. public void writeDeclaration() throws XPathException {
  118. if (declarationIsWritten) return;
  119. declarationIsWritten = true;
  120. try {
  121. indenting = "yes".equals(outputProperties.getProperty(OutputKeys.INDENT));
  122. String s = outputProperties.getProperty(SaxonOutputKeys.INDENT_SPACES);
  123. if (s!=null) {
  124. try {
  125. indentSpaces = Integer.parseInt(Whitespace.trim(s));
  126. } catch (NumberFormatException err) {
  127. indentSpaces = 3;
  128. }
  129. }
  130. String byteOrderMark = outputProperties.getProperty(SaxonOutputKeys.BYTE_ORDER_MARK);
  131. String encoding = outputProperties.getProperty(OutputKeys.ENCODING);
  132. if (encoding==null || encoding.equalsIgnoreCase("utf8")) {
  133. encoding = "UTF-8";
  134. }
  135. if ("yes".equals(byteOrderMark) && (
  136. "UTF-8".equalsIgnoreCase(encoding) ||
  137. "UTF-16LE".equalsIgnoreCase(encoding) ||
  138. "UTF-16BE".equalsIgnoreCase(encoding))) {
  139. writer.write('\uFEFF');
  140. }
  141. String omitXMLDeclaration = outputProperties.getProperty(OutputKeys.OMIT_XML_DECLARATION);
  142. if (omitXMLDeclaration==null) {
  143. omitXMLDeclaration = "no";
  144. }
  145. String version = outputProperties.getProperty(OutputKeys.VERSION);
  146. if (version==null) {
  147. version = getConfiguration().getNameChecker().getXMLVersion();
  148. } else {
  149. if (!version.equals("1.0") && !version.equals("1.1")) {
  150. XPathException err = new XPathException("XML version must be 1.0 or 1.1");
  151. err.setErrorCode("SESU0006");
  152. throw err;
  153. }
  154. if (!version.equals("1.0") && omitXMLDeclaration.equals("yes") &&
  155. outputProperties.getProperty(OutputKeys.DOCTYPE_SYSTEM) != null) {
  156. XPathException err = new XPathException("Values of 'version', 'omit-xml-declaration', and 'doctype-system' conflict");
  157. err.setErrorCode("SEPM0009");
  158. throw err;
  159. }
  160. }
  161. if (version.equals("1.0") && undeclareNamespaces) {
  162. XPathException err = new XPathException("Cannot undeclare namespaces with XML version 1.0");
  163. err.setErrorCode("SEPM0010");
  164. throw err;
  165. }
  166. String standalone = outputProperties.getProperty(OutputKeys.STANDALONE);
  167. if ("omit".equals(standalone)) {
  168. standalone = null;
  169. }
  170. if (standalone != null) {
  171. requireWellFormed = true;
  172. if (omitXMLDeclaration.equals("yes")) {
  173. XPathException err = new XPathException("Values of 'standalone' and 'omit-xml-declaration' conflict");
  174. err.setErrorCode("SEPM0009");
  175. throw err;
  176. }
  177. }
  178. if (omitXMLDeclaration.equals("no")) {
  179. writer.write("<?xml version=\"" + version + "\" " + "encoding=\"" + encoding + '\"' +
  180. (standalone != null ? " standalone=\"" + standalone + '\"' : "") + "?>");
  181. // don't write a newline character: it's wrong if the output is an
  182. // external general parsed entity
  183. }
  184. } catch (java.io.IOException err) {
  185. throw new XPathException(err);
  186. }
  187. }
  188. /**
  189. * Output the document type declaration
  190. * @param type The element name
  191. * @param systemId The DOCTYP system identifier
  192. * @param publicId The DOCTYPE public identifier
  193. */
  194. protected void writeDocType(String type, String systemId, String publicId) throws XPathException {
  195. try {
  196. if (declarationIsWritten && !indenting) {
  197. // don't add a newline if indenting, because the indenter will already have done so
  198. writer.write("\n");
  199. }
  200. writer.write("<!DOCTYPE " + type + '\n');
  201. if (systemId!=null && publicId==null) {
  202. writer.write(" SYSTEM \"" + systemId + "\">\n");
  203. } else if (systemId==null && publicId!=null) { // handles the HTML case
  204. writer.write(" PUBLIC \"" + publicId + "\">\n");
  205. } else {
  206. writer.write(" PUBLIC \"" + publicId + "\" \"" + systemId + "\">\n");
  207. }
  208. } catch (java.io.IOException err) {
  209. throw new XPathException(err);
  210. }
  211. }
  212. /**
  213. * End of the document.
  214. */
  215. public void close() throws XPathException {
  216. // if nothing has been written, we should still create the file and write an XML declaration
  217. if (empty) {
  218. openDocument();
  219. }
  220. try {
  221. if (writer != null) {
  222. writer.flush();
  223. }
  224. } catch (java.io.IOException err) {
  225. throw new XPathException(err);
  226. }
  227. }
  228. /**
  229. * Start of an element. Output the start tag, escaping special characters.
  230. */
  231. public void startElement (int nameCode, int typeCode, int locationId, int properties) throws XPathException
  232. {
  233. if (empty) {
  234. openDocument();
  235. } else if (requireWellFormed && elementStack.isEmpty()) {
  236. XPathException err = new XPathException("When 'standalone' or 'doctype-system' is specified, the document must be well-formed; " +
  237. "but this document contains more than one top-level element");
  238. err.setErrorCode("SEPM0004");
  239. throw err;
  240. }
  241. String displayName;
  242. // See if we've seen this name before
  243. displayName = getCachedName(nameCode);
  244. // Otherwise, look it up in the namepool and check that it's encodable
  245. if (displayName == null) {
  246. displayName = namePool.getDisplayName(nameCode);
  247. if (!allCharactersEncodable) {
  248. int badchar = testCharacters(displayName);
  249. if (badchar!=0) {
  250. XPathException err = new XPathException("Element name contains a character (decimal + " +
  251. badchar + ") not available in the selected encoding");
  252. err.setErrorCode("SERE0008");
  253. throw err;
  254. }
  255. }
  256. putCachedName(nameCode, displayName);
  257. }
  258. elementStack.push(displayName);
  259. elementCode = nameCode;
  260. try {
  261. if (empty) {
  262. String systemId = outputProperties.getProperty(OutputKeys.DOCTYPE_SYSTEM);
  263. String publicId = outputProperties.getProperty(OutputKeys.DOCTYPE_PUBLIC);
  264. if (systemId!=null) {
  265. requireWellFormed = true;
  266. writeDocType(displayName, systemId, publicId);
  267. }
  268. empty = false;
  269. }
  270. if (openStartTag) {
  271. closeStartTag();
  272. }
  273. writer.write('<');
  274. writer.write(displayName);
  275. openStartTag = true;
  276. totalAttributeLength = 0;
  277. } catch (java.io.IOException err) {
  278. throw new XPathException(err);
  279. }
  280. }
  281. public void namespace(int namespaceCode, int properties) throws XPathException {
  282. try {
  283. String nsprefix = namePool.getPrefixFromNamespaceCode(namespaceCode);
  284. String nsuri = namePool.getURIFromNamespaceCode(namespaceCode);
  285. int len = nsuri.length() + nsprefix.length() + 8;
  286. String sep = " ";
  287. if (indenting && (totalAttributeLength + len) > 80 && totalAttributeLength != 0) {
  288. sep = getAttributeIndentString();
  289. }
  290. totalAttributeLength += len;
  291. if (nsprefix.length() == 0) {
  292. writer.write(sep);
  293. writeAttribute(elementCode, "xmlns", nsuri, 0);
  294. } else if (nsprefix.equals("xml")) {
  295. //return;
  296. } else {
  297. int badchar = testCharacters(nsprefix);
  298. if (badchar!=0) {
  299. XPathException err = new XPathException("Namespace prefix contains a character (decimal + " +
  300. badchar + ") not available in the selected encoding");
  301. err.setErrorCode("SERE0008");
  302. throw err;
  303. }
  304. if (undeclareNamespaces || nsuri.length() != 0) {
  305. writer.write(sep);
  306. writeAttribute(elementCode, "xmlns:" + nsprefix, nsuri, 0);
  307. }
  308. }
  309. } catch (java.io.IOException err) {
  310. throw new XPathException(err);
  311. }
  312. }
  313. public void attribute(int nameCode, int typeCode, CharSequence value, int locationId, int properties)
  314. throws XPathException {
  315. String displayName;
  316. // See if we've seen this name before
  317. displayName = getCachedName(nameCode);
  318. // Otherwise, look it up in the namepool and check that it's encodable
  319. if (displayName == null) {
  320. displayName = namePool.getDisplayName(nameCode);
  321. if (!allCharactersEncodable) {
  322. int badchar = testCharacters(displayName);
  323. if (badchar!=0) {
  324. XPathException err = new XPathException("Attribute name contains a character (decimal + " +
  325. badchar + ") not available in the selected encoding");
  326. err.setErrorCode("SERE0008");
  327. throw err;
  328. }
  329. }
  330. putCachedName(nameCode, displayName);
  331. }
  332. final int len = displayName.length() + value.length() + 4;
  333. String sep = " ";
  334. if (indenting && (totalAttributeLength + len) > 80 && totalAttributeLength != 0) {
  335. sep = getAttributeIndentString();
  336. }
  337. totalAttributeLength += len;
  338. try {
  339. writer.write(sep);
  340. writeAttribute(
  341. elementCode,
  342. displayName,
  343. value,
  344. properties );
  345. } catch (java.io.IOException err) {
  346. throw new XPathException(err);
  347. }
  348. }
  349. private String getAttributeIndentString() {
  350. int indent = (elementStack.size()-1) * indentSpaces + ((String)elementStack.peek()).length() + 3;
  351. while (indent >= indentChars.length()) {
  352. indentChars += " ";
  353. }
  354. return indentChars.substring(0, indent);
  355. }
  356. public void startContent() throws XPathException {
  357. // don't add ">" to the start tag until we know whether the element has content
  358. }
  359. /**
  360. * Mark the end of the start tag
  361. * @throws XPathException if an IO exception occurs
  362. */
  363. public void closeStartTag() throws XPathException {
  364. try {
  365. if (openStartTag) {
  366. writer.write('>');
  367. openStartTag = false;
  368. }
  369. } catch (java.io.IOException err) {
  370. throw new XPathException(err);
  371. }
  372. }
  373. /**
  374. * Close an empty element tag. (This is overridden in XHTMLEmitter).
  375. * @param displayName the name of the empty element
  376. * @param nameCode the fingerprint of the name of the empty element
  377. * @return the string used to close an empty element tag.
  378. */
  379. protected String emptyElementTagCloser(String displayName, int nameCode) {
  380. return "/>";
  381. }
  382. /**
  383. * Write attribute name=value pair.
  384. * @param elCode The element name is not used in this version of the
  385. * method, but is used in the HTML subclass.
  386. * @param attname The attribute name, which has already been validated to ensure
  387. * it can be written in this encoding
  388. * @param value The value of the attribute
  389. * @param properties Any special properties of the attribute
  390. */
  391. protected void writeAttribute(int elCode, String attname, CharSequence value, int properties) throws XPathException {
  392. try {
  393. String val = value.toString();
  394. writer.write(attname);
  395. if ((properties & ReceiverOptions.NO_SPECIAL_CHARS) != 0) {
  396. writer.write('=');
  397. writer.write('"');
  398. writer.write(val);
  399. writer.write('"');
  400. } else if ((properties & ReceiverOptions.USE_NULL_MARKERS) != 0) {
  401. // null (0) characters will be used before and after any section of
  402. // the value generated from a character map
  403. writer.write('=');
  404. char delimiter = (val.indexOf('"') >= 0 && val.indexOf('\'') < 0 ? '\'' : '"');
  405. writer.write(delimiter);
  406. writeEscape(value, true);
  407. writer.write(delimiter);
  408. } else {
  409. writer.write("=\"");
  410. writeEscape(value, true);
  411. writer.write('\"');
  412. }
  413. } catch (java.io.IOException err) {
  414. throw new XPathException(err);
  415. }
  416. }
  417. /**
  418. * Test that all characters in a name (for example) are supported in the target encoding.
  419. * @param chars the characters to be tested
  420. * @return zero if all the characters are available, or the value of the
  421. * first offending character if not
  422. */
  423. protected int testCharacters(CharSequence chars) throws XPathException {
  424. for (int i=0; i<chars.length(); i++) {
  425. char c = chars.charAt(i);
  426. if (c > 127) {
  427. if (UTF16.isHighSurrogate(c)) {
  428. int cc = UTF16.combinePair(c, chars.charAt(++i));
  429. if (!characterSet.inCharset(cc)) {
  430. return cc;
  431. }
  432. } else if (!characterSet.inCharset(c)) {
  433. return c;
  434. }
  435. }
  436. }
  437. return 0;
  438. }
  439. /**
  440. * End of an element.
  441. */
  442. public void endElement () throws XPathException
  443. {
  444. String displayName = (String)elementStack.pop();
  445. try {
  446. if (openStartTag) {
  447. writer.write(emptyElementTagCloser(displayName, elementCode));
  448. openStartTag = false;
  449. } else {
  450. writer.write("</");
  451. writer.write(displayName);
  452. writer.write('>');
  453. }
  454. } catch (java.io.IOException err) {
  455. throw new XPathException(err);
  456. }
  457. }
  458. /**
  459. * Character data.
  460. */
  461. public void characters (CharSequence chars, int locationId, int properties) throws XPathException
  462. {
  463. if (empty) {
  464. openDocument();
  465. if (!Whitespace.isWhite(chars)) {
  466. if (requireWellFormed || outputProperties.getProperty(OutputKeys.DOCTYPE_SYSTEM)!=null) {
  467. XPathException err = new XPathException("When 'standalone' or 'doctype-system' is specified, the document must be well-formed; " +
  468. "but this document contains a top-level text node");
  469. err.setErrorCode("SEPM0004");
  470. throw err;
  471. }
  472. }
  473. }
  474. if (requireWellFormed && elementStack.isEmpty() && !Whitespace.isWhite(chars)) {
  475. XPathException err = new XPathException("When 'standalone' or 'doctype-system' is specified, the document must be well-formed; " +
  476. "but this document contains a top-level text node");
  477. err.setErrorCode("SEPM0004");
  478. throw err;
  479. }
  480. try {
  481. if (openStartTag) {
  482. closeStartTag();
  483. }
  484. if ((properties & ReceiverOptions.NO_SPECIAL_CHARS) != 0) {
  485. writeCharSequence(chars);
  486. } else if ((properties & ReceiverOptions.DISABLE_ESCAPING) == 0) {
  487. writeEscape(chars, false);
  488. } else {
  489. // disable-output-escaping="yes"
  490. if (testCharacters(chars) == 0) {
  491. if ((properties & ReceiverOptions.USE_NULL_MARKERS) == 0) {
  492. // null (0) characters will be used before and after any section of
  493. // the value generated from a character map
  494. writeCharSequence(chars);
  495. } else {
  496. // Need to strip out any null markers. See test output-html109
  497. final int len = chars.length();
  498. for (int i=0; i<len; i++) {
  499. char c = chars.charAt(i);
  500. if (c != 0) {
  501. writer.write(c);
  502. }
  503. }
  504. }
  505. } else {
  506. // Using disable output escaping with characters
  507. // that are not available in the target encoding
  508. // The required action is to ignore d-o-e in respect of those characters that are
  509. // not available in the encoding. This is slow...
  510. final int len = chars.length();
  511. for (int i=0; i<len; i++) {
  512. char c = chars.charAt(i);
  513. if (c != 0) {
  514. if (c > 127 && UTF16.isHighSurrogate(c)) {
  515. char[] pair = new char[2];
  516. pair[0] = c;
  517. pair[1] = chars.charAt(++i);
  518. int cc = UTF16.combinePair(c, pair[1]);
  519. if (!characterSet.inCharset(cc)) {
  520. writeEscape(new CharSlice(pair), false);
  521. } else {
  522. writeCharSequence(new CharSlice(pair));
  523. }
  524. } else {
  525. char[] ca = {c};
  526. if (!characterSet.inCharset(c)) {
  527. writeEscape(new CharSlice(ca), false);
  528. } else {
  529. writeCharSequence(new CharSlice(ca));
  530. }
  531. }
  532. }
  533. }
  534. }
  535. }
  536. } catch (java.io.IOException err) {
  537. throw new XPathException(err);
  538. }
  539. }
  540. /**
  541. * Write a CharSequence (without any escaping of special characters): various implementations
  542. * @param s the character sequence to be written
  543. */
  544. public void writeCharSequence(CharSequence s) throws java.io.IOException {
  545. if (s instanceof String) {
  546. writer.write((String)s);
  547. } else if (s instanceof CharSlice) {
  548. ((CharSlice)s).write(writer);
  549. } else if (s instanceof FastStringBuffer) {
  550. ((FastStringBuffer)s).write(writer);
  551. } else if (s instanceof CompressedWhitespace) {
  552. ((CompressedWhitespace)s).write(writer);
  553. } else {
  554. writer.write(s.toString());
  555. }
  556. }
  557. /**
  558. * Handle a processing instruction.
  559. */
  560. public void processingInstruction (String target, CharSequence data, int locationId, int properties)
  561. throws XPathException {
  562. if (empty) {
  563. openDocument();
  564. }
  565. int x = testCharacters(target);
  566. if (x != 0) {
  567. XPathException err = new XPathException("Character in processing instruction name cannot be represented " +
  568. "in the selected encoding (code " + x + ')');
  569. err.setErrorCode("SERE0008");
  570. throw err;
  571. }
  572. x = testCharacters(data);
  573. if (x != 0) {
  574. XPathException err = new XPathException("Character in processing instruction data cannot be represented " +
  575. "in the selected encoding (code " + x + ')');
  576. err.setErrorCode("SERE0008");
  577. throw err;
  578. }
  579. try {
  580. if (openStartTag) {
  581. closeStartTag();
  582. }
  583. writer.write("<?" + target + (data.length()>0 ? ' ' + data.toString() : "") + "?>");
  584. } catch (java.io.IOException err) {
  585. throw new XPathException(err);
  586. }
  587. }
  588. /**
  589. * Write contents of array to current writer, after escaping special characters.
  590. * This method converts the XML special characters (such as < and &) into their
  591. * predefined entities.
  592. * @param chars The character sequence containing the string
  593. * @param inAttribute Set to true if the text is in an attribute value
  594. */
  595. protected void writeEscape(final CharSequence chars, final boolean inAttribute)
  596. throws java.io.IOException, XPathException {
  597. int segstart = 0;
  598. boolean disabled = false;
  599. final boolean[] specialChars = (inAttribute ? specialInAtt : specialInText);
  600. if (chars instanceof CompressedWhitespace) {
  601. ((CompressedWhitespace)chars).writeEscape(specialChars, writer);
  602. return;
  603. }
  604. final int clength = chars.length();
  605. while (segstart < clength) {
  606. int i = segstart;
  607. // find a maximal sequence of "ordinary" characters
  608. while (i < clength) {
  609. final char c = chars.charAt(i);
  610. if (c < 127) {
  611. if (specialChars[c]) {
  612. break;
  613. } else {
  614. i++;
  615. }
  616. } else if (c < 160) {
  617. break;
  618. } else if (c == 0x2028) {
  619. break;
  620. } else if (UTF16.isHighSurrogate(c)) {
  621. break;
  622. } else if (!characterSet.inCharset(c)) {
  623. break;
  624. } else {
  625. i++;
  626. }
  627. }
  628. // if this was the whole string write it out and exit
  629. if (i >= clength) {
  630. if (segstart == 0) {
  631. writeCharSequence(chars);
  632. } else {
  633. writeCharSequence(chars.subSequence(segstart, i));
  634. }
  635. return;
  636. }
  637. // otherwise write out this sequence
  638. if (i > segstart) {
  639. writeCharSequence(chars.subSequence(segstart, i));
  640. }
  641. // examine the special character that interrupted the scan
  642. final char c = chars.charAt(i);
  643. if (c==0) {
  644. // used to switch escaping on and off
  645. disabled = !disabled;
  646. } else if (disabled) {
  647. if (c > 127) {
  648. if (UTF16.isHighSurrogate(c)) {
  649. int cc = UTF16.combinePair(c, chars.charAt(i+1));
  650. if (!characterSet.inCharset(cc)) {
  651. XPathException de = new XPathException("Character x" + Integer.toHexString(cc) +
  652. " is not available in the chosen encoding");
  653. de.setErrorCode("SERE0008");
  654. throw de;
  655. }
  656. } else if (!characterSet.inCharset(c)) {
  657. XPathException de = new XPathException("Character " + c + " (x" + Integer.toHexString((int)c) +
  658. ") is not available in the chosen encoding");
  659. de.setErrorCode("SERE0008");
  660. throw de;
  661. }
  662. }
  663. writer.write(c);
  664. } else if (c>=127 && c<160) {
  665. // XML 1.1 requires these characters to be written as character references
  666. outputCharacterReference(c);
  667. } else if (c>=160) {
  668. if (c==0x2028) {
  669. outputCharacterReference(c);
  670. } else if (UTF16.isHighSurrogate(c)) {
  671. char d = chars.charAt(++i);
  672. int charval = UTF16.combinePair(c, d);
  673. if (characterSet.inCharset(charval)) {
  674. writer.write(c);
  675. writer.write(d);
  676. } else {
  677. outputCharacterReference(charval);
  678. }
  679. } else {
  680. // process characters not available in the current encoding
  681. outputCharacterReference(c);
  682. }
  683. } else {
  684. // process special ASCII characters
  685. if (c=='<') {
  686. writer.write("&lt;");
  687. } else if (c=='>') {
  688. writer.write("&gt;");
  689. } else if (c=='&') {
  690. writer.write("&amp;");
  691. } else if (c=='\"') {
  692. writer.write("&#34;");
  693. } else if (c=='\n') {
  694. writer.write("&#xA;");
  695. } else if (c=='\r') {
  696. writer.write("&#xD;");
  697. } else if (c=='\t') {
  698. writer.write("&#x9;");
  699. } else {
  700. // C0 control characters
  701. outputCharacterReference(c);
  702. }
  703. }
  704. segstart = ++i;
  705. }
  706. }
  707. /**
  708. * Output a decimal or hexadecimal character reference
  709. */
  710. private char[] charref = new char[10];
  711. protected void outputCharacterReference(int charval) throws java.io.IOException {
  712. if (preferHex) {
  713. int o = 0;
  714. charref[o++]='&';
  715. charref[o++]='#';
  716. charref[o++]='x';
  717. String code = Integer.toHexString(charval);
  718. int len = code.length();
  719. for (int k=0; k<len; k++) {
  720. charref[o++]=code.charAt(k);
  721. }
  722. charref[o++]=';';
  723. writer.write(charref, 0, o);
  724. } else {
  725. int o = 0;
  726. charref[o++]='&';
  727. charref[o++]='#';
  728. String code = Integer.toString(charval);
  729. int len = code.length();
  730. for (int k=0; k<len; k++) {
  731. charref[o++]=code.charAt(k);
  732. }
  733. charref[o++]=';';
  734. writer.write(charref, 0, o);
  735. }
  736. }
  737. /**
  738. * Handle a comment.
  739. */
  740. public void comment (CharSequence chars, int locationId, int properties) throws XPathException
  741. {
  742. if (empty) {
  743. openDocument();
  744. }
  745. int x = testCharacters(chars);
  746. if (x != 0) {
  747. XPathException err = new XPathException("Character in comment cannot be represented " +
  748. "in the selected encoding (code " + x + ')');
  749. err.setErrorCode("SERE0008");
  750. throw err;
  751. }
  752. try {
  753. if (openStartTag) {
  754. closeStartTag();
  755. }
  756. writer.write("<!--");
  757. writer.write(chars.toString());
  758. writer.write("-->");
  759. } catch (java.io.IOException err) {
  760. throw new XPathException(err);
  761. }
  762. }
  763. /**
  764. * Get a name from the local name cache
  765. * @param nameCode the integer name code
  766. * @return a lexical QName if the name is in the cache; otherwise, null
  767. */
  768. protected String getCachedName(int nameCode) {
  769. return (String)nameLookup.get(nameCode);
  770. }
  771. /**
  772. * Add a name to the local name cache
  773. * @param nameCode the integer name code
  774. * @param displayName the corresponding lexical QName
  775. */
  776. protected void putCachedName(int nameCode, String displayName) {
  777. nameLookup.put(nameCode, displayName);
  778. }
  779. }
  780. //
  781. // The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
  782. // you may not use this file except in compliance with the License. You may obtain a copy of the
  783. // License at http://www.mozilla.org/MPL/
  784. //
  785. // Software distributed under the License is distributed on an "AS IS" basis,
  786. // WITHOUT WARRANTY OF ANY KIND, either express or implied.
  787. // See the License for the specific language governing rights and limitations under the License.
  788. //
  789. // The Original Code is: all this file.
  790. //
  791. // The Initial Developer of the Original Code is Michael H. Kay.
  792. //
  793. // Portions created by (your name) are Copyright (C) (your legal entity). All Rights Reserved.
  794. //
  795. // Contributor(s): none.
  796. //