/src/com/atlassian/uwc/converters/swiki/HTMLTableConverter.java

https://bitbucket.org/atlassianlabs/universal-wiki-connector · Java · 313 lines · 208 code · 44 blank · 61 comment · 34 complexity · f192298680049eac43bf9d540519f127 MD5 · raw file

  1. package com.atlassian.uwc.converters.swiki;
  2. import java.util.regex.Matcher;
  3. import java.util.regex.Pattern;
  4. import org.apache.log4j.Logger;
  5. import com.atlassian.uwc.converters.BaseConverter;
  6. import com.atlassian.uwc.ui.Page;
  7. /************************************************************************
  8. * This class is to convert the html tables to Confluence table syntax.
  9. * The first level table will be converted to Confluence table macro. The
  10. * nested tables will be converted to normal confluence table syntax which uses
  11. * | or || to separate the cells. The table has to have </table> end tags.
  12. * For others such as tr, th and td, the end tags are optional.
  13. *
  14. * @author bsun
  15. *
  16. */
  17. public class HTMLTableConverter extends BaseConverter {
  18. Logger log = Logger.getLogger(this.getClass());
  19. static final String TableStart="<table", TableEnd="</table>";
  20. public void convert(Page page) {
  21. log.debug("Table Converter - starting");
  22. String input = page.getOriginalText();
  23. String converted = convertTables(input);
  24. log.debug("converted = " + converted);
  25. page.setConvertedText(converted);
  26. log.debug("Table Converter - complete");
  27. }
  28. /**
  29. * to convert html tables to confluence table syntax
  30. *
  31. * @param input
  32. * @return
  33. */
  34. public String convertTables(String input)
  35. {
  36. String output=input;
  37. while (true)
  38. {
  39. int endIndex=output.indexOf(TableEnd);
  40. if (endIndex < 0)
  41. break;
  42. int startIndex=output.lastIndexOf(TableStart, endIndex);
  43. if (startIndex < 0)
  44. break;
  45. int startIndex2=output.indexOf(TableStart);
  46. String tableString=output.substring(startIndex , endIndex + TableEnd.length());
  47. String convertedTableString="";
  48. if (startIndex2 < startIndex)
  49. //must be a nested table
  50. convertedTableString=convertNestedTable(tableString);
  51. else
  52. convertedTableString=convertTable(tableString);
  53. output=output.substring(0, startIndex) +
  54. convertedTableString + output.substring(endIndex + TableEnd.length());
  55. }
  56. return output;
  57. }
  58. /*********************************************
  59. * to convert the html table to Confluence table macro
  60. *
  61. * @param input
  62. * @return
  63. */
  64. public String convertTable(String input)
  65. {
  66. String output = input;
  67. Pattern pattern = Pattern.compile("(<table[^>]*>)(.*?)(</table>)", Pattern.DOTALL);
  68. Matcher matcher = pattern.matcher(input);
  69. if (matcher.find())
  70. {
  71. String start=convertStartTag("table", matcher.group(1));
  72. String rows=convertRows(matcher.group(2).trim());
  73. output=start + rows + "{table}";
  74. }
  75. return output;
  76. }
  77. /****************************************************
  78. * to converted the nested table to normal confluence table
  79. * syntax, i.e. the cells separated by | or ||s.
  80. *
  81. * @param input
  82. * @return
  83. */
  84. public String convertNestedTable(String input)
  85. {
  86. String temp = input.replaceAll("<table[^>]*>", "");
  87. temp=temp.replaceAll("</table>", "");
  88. String output="";
  89. String rowString="";
  90. String endLine = System.getProperty( "line.separator" );
  91. int index=0;
  92. while (true)
  93. {
  94. //convert row first
  95. int rowIndex=temp.indexOf("<tr", index);
  96. if (rowIndex < 0)
  97. break;
  98. int emptyRowIndex=temp.indexOf("<tr/>", index);
  99. if (rowIndex == emptyRowIndex)
  100. {
  101. index = emptyRowIndex + "<tr/>".length();
  102. continue;
  103. }
  104. int nextRowIndex=temp.indexOf("<tr", rowIndex + 3);
  105. if (nextRowIndex >=0)
  106. {
  107. rowString=temp.substring(rowIndex, nextRowIndex);
  108. index=nextRowIndex;
  109. }
  110. else
  111. {
  112. rowString = temp.substring(rowIndex);
  113. index += rowString.length();
  114. }
  115. rowString = rowString.replaceAll("<tr[^>]*>", "");
  116. rowString=rowString.replaceAll("</tr>", "");
  117. //now convert the cells
  118. if (rowString.indexOf("<th") >= 0)
  119. {
  120. rowString=rowString.replaceAll("<th[^>]*>", "||");
  121. rowString=rowString.replaceAll("</th>", "");
  122. output += rowString + "||" + endLine;
  123. }
  124. if (rowString.indexOf("<td") >=0 )
  125. {
  126. rowString=rowString.replaceAll("<td[^>]*>", "|");
  127. rowString=rowString.replaceAll("</td>", "");
  128. output += rowString + "|" + endLine;
  129. }
  130. }
  131. return output;
  132. }
  133. /*********************************************************
  134. * to convert the start tag (table, tr, th and td) to confluence table macro
  135. * start tag
  136. * @param attr
  137. * @param input
  138. * @return
  139. */
  140. protected String convertStartTag(String attr, String input)
  141. {
  142. String buffer=input.substring(("<" + attr).length(), input.lastIndexOf('>'));
  143. if (buffer.trim().length() == 0)
  144. return "{" + attr + "}";
  145. String output="{" + attr + ":";
  146. Pattern p = Pattern.compile(" ");
  147. String[] items = p.split(buffer);
  148. for (String i : items)
  149. {
  150. if (i.trim().length() > 0)
  151. output += i + "|";
  152. }
  153. output=output.substring(0, output.length() - 1);
  154. output += "}";
  155. return output;
  156. }
  157. /**************************************************************
  158. * to convert the table rows to Confluence table rows macro.
  159. * The end tags </tr> are optional in the input.
  160. *
  161. * @param input
  162. * @return
  163. */
  164. public String convertRows(String input)
  165. {
  166. String output="";
  167. String rowString="";
  168. int rowIndex=input.indexOf("<tr");
  169. int emptyRowIndex=input.indexOf("<tr/>");
  170. if (rowIndex == emptyRowIndex)
  171. return "{tr}{tr}";
  172. int nextRowIndex=input.indexOf("<tr", rowIndex + 3);
  173. if (nextRowIndex >=0)
  174. {
  175. rowString=input.substring(rowIndex, nextRowIndex);
  176. if (rowString.indexOf("</tr>") < 0)
  177. rowString += "</tr>";
  178. rowString = convertRow(rowString);
  179. output=input.substring(0, rowIndex) +
  180. rowString + convertRows(input.substring(nextRowIndex));
  181. }
  182. else
  183. {
  184. rowString = input.substring(rowIndex);
  185. if (rowString.indexOf("</tr>") < 0)
  186. rowString += "</tr>";
  187. rowString = convertRow(rowString);
  188. output=input.substring(0, rowIndex) + rowString ;
  189. }
  190. return output;
  191. }
  192. /**************************************************************
  193. * to convert the table row to Confluence table row macro. The end
  194. * tag </tr> must exist in the input.
  195. *
  196. * @param input
  197. * @return
  198. */
  199. public String convertRow(String input)
  200. {
  201. Pattern pattern = null;
  202. Matcher matcher = null;
  203. String output = "";
  204. pattern = Pattern.compile("(<tr[^>]*>)(.*?)(</tr>)", Pattern.DOTALL);
  205. matcher = pattern.matcher(input);
  206. if (matcher.find())
  207. {
  208. String start=convertStartTag("tr", matcher.group(1));
  209. String rows=matcher.group(2);
  210. if (rows.indexOf("<th") >= 0)
  211. rows=convertCells("th", matcher.group(2).trim());
  212. else
  213. rows=convertCells("td", matcher.group(2).trim());
  214. String converted=start + rows + "{tr}";
  215. output += converted;
  216. }
  217. if(output.length() == 0)
  218. return input;
  219. return output;
  220. }
  221. /**************************************************************
  222. * to convert the table cells to Confluence table cells macro. The
  223. * end tags such as </td> and </th> are optional in the input.
  224. *
  225. * @param input
  226. * @return
  227. */
  228. public String convertCells(String attr, String input)
  229. {
  230. Pattern pattern1 = null, pattern2 = null;
  231. Matcher matcher = null;
  232. String output = "";
  233. boolean last=false;
  234. //String regex="(<th[^>]*>)(.*?)((<th[^>]*>)|(</th>))";
  235. String regex1="(<" + attr + "[^>]*>)(.*?)((<" + attr +
  236. "[^>]*>)|(</" + attr + ">))";
  237. pattern1 = Pattern.compile(regex1, Pattern.DOTALL);
  238. String regex2="(<" + attr + "[^>]*>)(.*)";
  239. pattern2 = Pattern.compile(regex2, Pattern.DOTALL);
  240. String temp=input;
  241. while(true)
  242. {
  243. if (temp.trim().length() == 0)
  244. break;
  245. matcher = pattern1.matcher(temp);
  246. if (matcher.find() == false)
  247. {
  248. matcher=pattern2.matcher(temp);
  249. last=true;
  250. if (!matcher.find())
  251. break;
  252. }
  253. String start=convertStartTag(attr, matcher.group(1));
  254. String rows=matcher.group(2).trim();
  255. String converted=start + rows + "{" + attr + "}";
  256. output += converted;
  257. if (last)
  258. break;
  259. String g3=matcher.group(3);
  260. if (g3.indexOf("<" + attr) >= 0)
  261. temp=temp.substring(matcher.start(3));
  262. else
  263. temp=temp.substring(matcher.end(3));
  264. }
  265. if (output.length() == 0)
  266. return input;
  267. return output;
  268. }
  269. }