PageRenderTime 42ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/bundles/plugins-trunk/XML/sidekick/html/parser/html/HtmlFormatter.java

#
Java | 378 lines | 269 code | 54 blank | 55 comment | 40 complexity | 2f69a649f72fe4e969e3a6e9fe788766 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
  1. /*
  2. * HtmlFormatter.java -- HTML document pretty-printer
  3. * Copyright (C) 1999 Quiotix Corporation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License, version 2, as
  7. * published by the Free Software Foundation.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License (http://www.gnu.org/copyleft/gpl.txt)
  13. * for more details.
  14. */
  15. package sidekick.html.parser.html;
  16. import java.util.HashSet;
  17. import java.util.Iterator;
  18. import java.util.Set;
  19. import java.util.Arrays;
  20. /**
  21. * HtmlFormatter is a Visitor which traverses an HtmlDocument, dumping the
  22. * contents of the document to a specified output stream. It assumes that
  23. * the documents has been preprocessed by HtmlCollector (which matches up
  24. * beginning and end tags) and by HtmlScrubber (which formats tags in a
  25. * consistent way). In particular, HtmlScrubber should be invoked with the
  26. * TRIM_SPACES option to remove trailing spaces, which can confuse the
  27. * formatting algorithm.
  28. * <p/>
  29. * <P>The right margin and indent increment can be specified as properties.
  30. * <p>danson:
  31. * Modified for Beauty plugin for jEdit, added ability to handle jsps. Removed
  32. * the PrintWriter from the MarginWriter as PrintWriter munges line separators
  33. * in its own weird way. For jEdit, I want the same line separator that has
  34. * been specified for the current buffer, which is not necessarily the system
  35. * line separator. Formatted content is now written to a StringBuffer and can
  36. * be retrieved with the <code>toString</code> method.
  37. * <p>
  38. * Did some minor modification to the handling of PRE, SCRIPT, and STYLE blocks.
  39. * Formatting once would be fine, formatting the same file a second time would
  40. * cause extra blank lines to be added just before the closing tag. This also
  41. * required some minor modification to the .jj file too.
  42. *
  43. * @author Brian Goetz, Quiotix
  44. * @see sidekick.html.parser.html.HtmlVisitor
  45. * @see sidekick.html.parser.html.HtmlCollector
  46. * @see sidekick.html.parser.html.HtmlScrubber
  47. */
  48. public class HtmlFormatter extends HtmlVisitor {
  49. protected MarginWriter out;
  50. protected int rightMargin = 80;
  51. protected int indentSize = 2;
  52. protected String lineSeparator = System.getProperty("line.separator");
  53. protected static Set<String> tagsIndentBlock = new HashSet<String>(
  54. Arrays.asList("TABLE", "TR", "TD", "TH", "FORM", "HTML",
  55. "HEAD", "BODY", "SELECT"));
  56. protected static Set<String> tagsNewlineBefore = new HashSet<String>(
  57. Arrays.asList("P", "H1", "H2", "H3", "H4", "H5", "H6", "BR", "taglib"));//, "OL", "UL", "LI", "BR"));
  58. protected static Set<String> tagsPreformatted = new HashSet<String>(
  59. Arrays.asList("PRE", "SCRIPT", "STYLE"));
  60. protected static Set<String> tagsTryMatch = new HashSet<String>(
  61. Arrays.asList("A", "TD", "TH", "TR", "I", "B","EM", "FONT", "TT", "UL"));
  62. protected TagBlockRenderer blockRenderer = new TagBlockRenderer();
  63. protected HtmlDocument.HtmlElement previousElement;
  64. protected boolean inPreBlock;
  65. public HtmlFormatter() throws Exception {
  66. out = new MarginWriter();
  67. out.setRightMargin(rightMargin);
  68. out.setLineSeparator(lineSeparator);
  69. }
  70. public String toString() {
  71. return out.toString();
  72. }
  73. public void setRightMargin(int margin) {
  74. rightMargin = margin;
  75. out.setRightMargin(rightMargin);
  76. }
  77. public void setIndent(int indent) {
  78. indentSize = indent;
  79. }
  80. public void setLineSeparator(String ls) {
  81. lineSeparator = ls;
  82. out.setLineSeparator(lineSeparator);
  83. }
  84. public void visit(HtmlDocument.TagBlock block) {
  85. boolean indent;
  86. boolean preformat;
  87. int wasMargin = 0;
  88. preformat = tagsPreformatted.contains(block.startTag.tagName.toUpperCase());
  89. if (tagsTryMatch.contains(block.startTag.tagName.toUpperCase())) {
  90. blockRenderer.start();
  91. blockRenderer.setTargetWidth(out.getRightMargin() - out.getLeftMargin());
  92. blockRenderer.visit(block);
  93. blockRenderer.finish();
  94. if (!blockRenderer.hasBlownTarget()) {
  95. if (preformat)
  96. out.print(blockRenderer.getString());
  97. else
  98. out.printAutoWrap(blockRenderer.getString());
  99. previousElement = block.endTag;
  100. return;
  101. }
  102. }
  103. // Only will get here if we've failed the try-block test
  104. indent = tagsIndentBlock.contains(block.startTag.tagName.toUpperCase());
  105. if (preformat) {
  106. wasMargin = out.getLeftMargin();
  107. visit(block.startTag);
  108. out.setLeftMargin(0);
  109. inPreBlock = true;
  110. visit(block.body);
  111. inPreBlock = false;
  112. out.setLeftMargin(wasMargin);
  113. visit(block.endTag);
  114. } else if (indent) {
  115. out.printlnSoft();
  116. visit(block.startTag);
  117. out.printlnSoft();
  118. out.setLeftMargin(out.getLeftMargin() + indentSize);
  119. visit(block.body);
  120. out.setLeftMargin(out.getLeftMargin() - indentSize);
  121. out.printlnSoft();
  122. visit(block.endTag);
  123. out.printlnSoft();
  124. inPreBlock = false;
  125. } else {
  126. visit(block.startTag);
  127. visit(block.body);
  128. visit(block.endTag);
  129. };
  130. }
  131. public void visit(HtmlDocument.Tag t) {
  132. String s = t.toString();
  133. int hanging;
  134. if (tagsNewlineBefore.contains(t.tagName.toUpperCase())
  135. || out.getCurPosition() + s.length() > out.getRightMargin())
  136. out.printlnSoft();
  137. out.print(t.tagStart + t.tagName);
  138. hanging = t.tagName.length() + 1;
  139. for (Iterator it = t.attributeList.attributes.iterator(); it.hasNext();) {
  140. HtmlDocument.Attribute a = (HtmlDocument.Attribute) it.next();
  141. out.printAutoWrap(" " + a.toString(), hanging);
  142. };
  143. if (t.tagEnd.length() > 1 && !t.tagEnd.startsWith("/"))
  144. out.print(" "); // got a jsp tag
  145. out.print(t.tagEnd);
  146. previousElement = t;
  147. }
  148. public void visit(HtmlDocument.EndTag t) {
  149. out.printAutoWrap(t.toString());
  150. if (tagsNewlineBefore.contains(t.tagName.toUpperCase())) {
  151. out.printlnSoft();
  152. ///out.print(lineSeparator);
  153. };
  154. previousElement = t;
  155. }
  156. public void visit(HtmlDocument.Comment c) {
  157. out.print(c.toString());
  158. previousElement = c;
  159. }
  160. public void visit(HtmlDocument.Text t) {
  161. if (inPreBlock)
  162. out.print(t.text);
  163. else {
  164. int start = 0;
  165. while (start < t.text.length()) {
  166. int index = t.text.indexOf(' ', start) + 1;
  167. if (index == 0)
  168. index = t.text.length();
  169. out.printAutoWrap(t.text.substring(start, index));
  170. start = index;
  171. }
  172. }
  173. previousElement = t;
  174. }
  175. public void visit(HtmlDocument.Newline n) {
  176. if (inPreBlock) {
  177. out.print(lineSeparator);
  178. } else if (previousElement instanceof HtmlDocument.Tag
  179. || previousElement instanceof HtmlDocument.EndTag
  180. || previousElement instanceof HtmlDocument.Comment
  181. || previousElement instanceof HtmlDocument.Newline) {
  182. out.printlnSoft();
  183. } else if (previousElement instanceof HtmlDocument.Text) {
  184. out.print(" ");
  185. }
  186. previousElement = n;
  187. }
  188. public void start() {
  189. previousElement = null;
  190. inPreBlock = false;
  191. }
  192. public void finish() {
  193. }
  194. }
  195. /**
  196. * Utility class, used by HtmlFormatter, which adds some word-wrapping
  197. * and hanging indent functionality to a PrintWriter.
  198. */
  199. class MarginWriter {
  200. protected int tabStop;
  201. protected int curPosition;
  202. protected int leftMargin;
  203. protected int rightMargin;
  204. StringBuffer sb = null;
  205. protected char[] spaces = new char[256];
  206. protected String lineSeparator = System.getProperty("line.separator");
  207. public MarginWriter() {
  208. sb = new StringBuffer();
  209. for (int i = 0; i < spaces.length; i++)
  210. spaces[i] = ' ';
  211. }
  212. public String toString() {
  213. return sb.toString();
  214. }
  215. public void print(String s) {
  216. if (curPosition == 0 && leftMargin > 0) {
  217. sb.append(spaces, 0, leftMargin);
  218. curPosition = leftMargin;
  219. };
  220. sb.append(s);
  221. curPosition += s.length();
  222. }
  223. public void printAutoWrap(String s) {
  224. if (curPosition > leftMargin
  225. && curPosition + s.length() > rightMargin)
  226. println();
  227. print(s);
  228. }
  229. public void printAutoWrap(String s, int hanging) {
  230. if (curPosition > leftMargin
  231. && curPosition + s.length() > rightMargin) {
  232. println();
  233. sb.append(spaces, 0, hanging + leftMargin);
  234. curPosition = leftMargin + hanging;
  235. };
  236. print(s);
  237. }
  238. public void println() {
  239. curPosition = 0;
  240. sb.append(lineSeparator);
  241. }
  242. public void printlnSoft() {
  243. if (curPosition > 0)
  244. println();
  245. }
  246. public void setLeftMargin(int leftMargin) {
  247. this.leftMargin = leftMargin;
  248. }
  249. public int getLeftMargin() {
  250. return leftMargin;
  251. }
  252. public void setRightMargin(int rightMargin) {
  253. this.rightMargin = rightMargin;
  254. }
  255. public int getRightMargin() {
  256. return rightMargin;
  257. }
  258. public int getCurPosition() {
  259. return (curPosition == 0 ? leftMargin : curPosition);
  260. }
  261. public void setLineSeparator(String ls) {
  262. this.lineSeparator = ls;
  263. }
  264. }
  265. /**
  266. * Utility class, used by HtmlFormatter, which tentatively tries to format
  267. * the contents of an HtmlDocument.TagBlock to see if the entire block can
  268. * fit on the rest of the line. If it cannot, it gives up and indicates
  269. * failure through the hasBlownTarget method; if it can, the contents can
  270. * be retrieved through the getString method.
  271. */
  272. class TagBlockRenderer extends HtmlVisitor {
  273. protected String s;
  274. protected boolean multiLine;
  275. protected boolean blownTarget;
  276. protected int targetWidth = 80;
  277. public void start() {
  278. s = "";
  279. multiLine = false;
  280. blownTarget = false;
  281. }
  282. public void finish() {
  283. }
  284. public void setTargetWidth(int w) {
  285. targetWidth = w;
  286. }
  287. public String getString() {
  288. return s;
  289. }
  290. public boolean isMultiLine() {
  291. return multiLine;
  292. }
  293. public boolean hasBlownTarget() {
  294. return blownTarget;
  295. }
  296. public void visit(HtmlDocument.Tag t) {
  297. if (s.length() < targetWidth)
  298. s += t.toString();
  299. else
  300. blownTarget = true;
  301. }
  302. public void visit(HtmlDocument.EndTag t) {
  303. if (s.length() < targetWidth)
  304. s += t.toString();
  305. else
  306. blownTarget = true;
  307. }
  308. public void visit(HtmlDocument.Comment c) {
  309. if (s.length() < targetWidth)
  310. s += c.toString();
  311. else
  312. blownTarget = true;
  313. }
  314. public void visit(HtmlDocument.Text t) {
  315. if (s.length() < targetWidth)
  316. s += t.toString();
  317. else
  318. blownTarget = true;
  319. }
  320. public void visit(HtmlDocument.Newline n) {
  321. multiLine = true;
  322. s += " ";
  323. }
  324. }