PageRenderTime 44ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/light_logs/light_logs/src/java/src/com/toolkit/util/pdf/XHTMLRendererText.java

http://lighttexteditor.googlecode.com/
Java | 310 lines | 97 code | 27 blank | 186 comment | 7 complexity | 0575938e4f188611fc5b5a5a00880a21 MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0, LGPL-2.1, LGPL-2.0
  1. /********************************************************************
  2. *
  3. * Copyright (c) 2006-2007 Berlin Brown and botnode.com All Rights Reserved
  4. *
  5. * http://www.opensource.org/licenses/bsd-license.php
  6. * All rights reserved.
  7. * Redistribution and use in source and binary forms, with or without modification,
  8. * are permitted provided that the following conditions are met:
  9. * * Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * * Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. * * Neither the name of the Botnode.com (Berlin Brown) nor
  15. * the names of its contributors may be used to endorse or promote
  16. * products derived from this software without specific prior written permission.
  17. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  21. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  22. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  23. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  24. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  25. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  26. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  27. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28. *
  29. * Date: 1/5/2009
  30. * 7/15/2009 - Added Clojure 1.0, other performance fixes and cleanups.
  31. *
  32. * Main Description: Light Log Viewer is a tool for making it easier to search log files.
  33. * Light Log Viewer adds some text highlighting, quick key navigation to text files, simple graphs
  34. * and charts for monitoring logs, file database to quickly navigate to files of interest,
  35. * and HTML to PDF convert tool.
  36. * Light Log was developed with a combination of Clojure 1.0, Java and Scala with use of libs, SWT 3.4, JFreeChart, iText.
  37. *
  38. * Quickstart : the best way to run the Light Log viewer is to click on the win32 batch script light_logs.bat
  39. * (you may need to edit the Linux script for Unix/Linux environments).
  40. * Edit the win32 script to add more heap memory or other parameters.
  41. *
  42. * The clojure source is contained in : HOME/src/octane
  43. * The java source is contained in : HOME/src/java/src
  44. *
  45. * To build the java source, see : HOME/src/java/build.xml and build_pdf_gui.xml
  46. *
  47. * Metrics: (as of 7/15/2009) Light Log Viewer consists of 6500 lines of Clojure code, and contains wrapper code
  48. * around the Java source. There are 2000+ lines of Java code in the Java library for Light Log Viewer.
  49. *
  50. * Additional Development Notes: The SWT gui and other libraries are launched from a dynamic classloader. Clojure is also
  51. * started from the same code, and reflection is used to dynamically initiate Clojure. See the 'start' package. The binary
  52. * code is contained in the octane_start.jar library.
  53. *
  54. * Home Page: http://code.google.com/p/lighttexteditor/
  55. *
  56. * Contact: Berlin Brown <berlin dot brown at gmail.com>
  57. *********************************************************************/
  58. package com.toolkit.util.pdf;
  59. import java.util.ArrayList;
  60. import java.util.List;
  61. import java.util.regex.Matcher;
  62. import java.util.regex.Pattern;
  63. import com.toolkit.util.Print;
  64. /**
  65. * Utility interface for taking an input HTML document, formatting the content and then
  66. * ensuring that is properly formed for XHTMLRenderer and TagSoup.
  67. *
  68. * APPLICATION LEVEL
  69. *
  70. * @author Berlin
  71. * @version $Revision: 1.0 $
  72. */
  73. public interface XHTMLRendererText {
  74. /**
  75. * Set the original input text document.
  76. * @param text
  77. */
  78. public void setText(final String text);
  79. /**
  80. * Return the original text document.
  81. *
  82. * @return String
  83. */
  84. public String getText();
  85. /**
  86. * Parse the input document and return the formatted text.
  87. *
  88. * @return String
  89. */
  90. public String parse();
  91. /**
  92. * Implementation Routine setBaseRenderer.
  93. * @param base XHTMLRendererBase
  94. */
  95. public void setBaseRenderer(final XHTMLRendererBase base);
  96. /**
  97. * Implementation Routine getBaseRenderer.
  98. * @return XHTMLRendererBase
  99. */
  100. public XHTMLRendererBase getBaseRenderer();
  101. ///////////////////////////////////////////////////////////////////////////
  102. // XHtml Tuple Non-Static Inner Classes
  103. // GroupPosTuple and TextTuple.
  104. ///////////////////////////////////////////////////////////////////////////
  105. /**
  106. * Simple tuple data structure, <Group Start, End Positions>
  107. * @author Berlin
  108. * @version $Revision: 1.0 $
  109. */
  110. public final class GroupPosTuple {
  111. private final int start;
  112. private final int end;
  113. /**
  114. * Constructor for GroupPosTuple.
  115. * @param s int
  116. * @param e int
  117. */
  118. public GroupPosTuple(final int s, final int e) {
  119. start = s;
  120. end = e;
  121. }
  122. /**
  123. * Implementation Routine getStart.
  124. * @return int
  125. */
  126. public final int getStart() {
  127. return this.start;
  128. }
  129. /**
  130. * Implementation Routine getEnd.
  131. * @return int
  132. */
  133. public final int getEnd() {
  134. return this.end;
  135. }
  136. /**
  137. * Implementation Routine toString.
  138. * @return String
  139. */
  140. public String toString() {
  141. return "<<GroupPosTuple start=" + start + " end=" + end + ">>";
  142. }
  143. } // End of Class
  144. /**
  145. * Simple tuple data structure, <text>,<List of Group Positions>
  146. * @author Berlin
  147. * @version $Revision: 1.0 $
  148. */
  149. public final class TextTuple {
  150. private String text = "";
  151. private final List pos_tuples = new ArrayList();
  152. /**
  153. * Constructor for TextTuple.
  154. * @param doc String
  155. * @param s int
  156. * @param e int
  157. */
  158. public TextTuple(final String doc, final int s, final int e) {
  159. this(doc);
  160. // Set the first group pos tuple.
  161. this.addPosTuple(s, e);
  162. }
  163. /**
  164. * Constructor for TextTuple.
  165. */
  166. public TextTuple() { }
  167. /**
  168. * Constructor for TextTuple.
  169. * @param doc String
  170. */
  171. public TextTuple(final String doc) {
  172. if (doc == null) {
  173. text = "";
  174. } else {
  175. text = doc;
  176. }
  177. }
  178. /**
  179. * Implementation Routine toString.
  180. * @return String
  181. */
  182. public String toString() {
  183. return this.text;
  184. }
  185. /**
  186. * Implementation Routine getTuples.
  187. * @return List
  188. */
  189. public List getTuples() { return pos_tuples; }
  190. /**
  191. * @return the text
  192. */
  193. public final String getText() {
  194. return text;
  195. }
  196. /**
  197. * Implementation Routine setText.
  198. * @param txt String
  199. */
  200. public final void setText(String txt) {
  201. this.text = txt;
  202. }
  203. /**
  204. * Implementation Routine addPosTuple.
  205. * @param s int
  206. * @param e int
  207. * @return GroupPosTuple
  208. */
  209. public final GroupPosTuple addPosTuple(final int s, final int e) {
  210. final GroupPosTuple tuple = new GroupPosTuple(s, e);
  211. pos_tuples.add(tuple);
  212. return tuple;
  213. }
  214. } // End of Class
  215. /**
  216. * @author Berlin
  217. */
  218. public final class TextUtils {
  219. /**
  220. * Implementation Routine removeHtmlSection.
  221. * @param maindoc String
  222. * @param repl String
  223. * @param startTag String
  224. * @param endTag String
  225. * @return String
  226. */
  227. public final static String removeHtmlSection(final String maindoc,
  228. final String repl, final String startTag, final String endTag) {
  229. // Compile regular expression
  230. final StringBuffer track_pattern_buf = new StringBuffer();
  231. track_pattern_buf.append(startTag);
  232. track_pattern_buf.append("(.*?)");
  233. track_pattern_buf.append(endTag);
  234. //final Pattern pattern = Pattern.compile(track_pattern_buf.toString());
  235. final Pattern pattern = new_pattern(track_pattern_buf.toString());
  236. final int lorig = maindoc.length();
  237. // Replace all occurrences of pattern in input
  238. final Matcher matcher = pattern.matcher(maindoc);
  239. final String output = matcher.replaceAll(repl);
  240. final int lnew = output.length();
  241. if (lnew == lorig) {
  242. // Qualify when a text change has been made
  243. Print.println("<Searching for tags for further processing, failed [[" +startTag+ "]]> diff:" + (lorig-lnew));
  244. }
  245. return output;
  246. }
  247. /**
  248. * Implementation Routine new_pattern.
  249. * @param pattern String
  250. * @return Pattern
  251. */
  252. public static final Pattern new_pattern(final String pattern) {
  253. return Pattern.compile(pattern,
  254. Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
  255. }
  256. /**
  257. * Implementation Routine findMatch.
  258. * @param text String
  259. * @param pattern_str String
  260. * @param group int
  261. * @param total_groups int
  262. * @return TextTuple
  263. */
  264. public static final TextTuple findMatch(final String text, final String pattern_str, int group, int total_groups) {
  265. final StringBuffer res = new StringBuffer();
  266. final Pattern pattern = new_pattern(pattern_str);
  267. final Matcher match = pattern.matcher(text);
  268. final TextTuple tuple = new TextTuple();
  269. while (match.find()) {
  270. if (match.groupCount() >= total_groups) {
  271. res.append(match.group(group));
  272. tuple.addPosTuple(match.start(group), match.end(group));
  273. }
  274. } // End of While
  275. tuple.setText(res.toString());
  276. return tuple;
  277. }
  278. }
  279. }