PageRenderTime 27ms CodeModel.GetById 9ms RepoModel.GetById 1ms app.codeStats 0ms

/src/com/atlassian/uwc/converters/tikiwiki/TableConverter.java

https://bitbucket.org/SteveClark7/uwc-liferayconverter
Java | 453 lines | 260 code | 34 blank | 159 comment | 32 complexity | 08f30680e510c67be628981caa2623aa MD5 | raw file
  1. package com.atlassian.uwc.converters.tikiwiki;
  2. import java.util.Vector;
  3. import java.util.regex.Matcher;
  4. import java.util.regex.Pattern;
  5. import org.apache.log4j.Logger;
  6. import com.atlassian.uwc.converters.BaseConverter;
  7. import com.atlassian.uwc.ui.Page;
  8. /**
  9. * Converts tikiwiki tables to Confluence table syntax
  10. *
  11. * @author Laura Kolker
  12. *
  13. */
  14. public class TableConverter extends BaseConverter {
  15. Logger log = Logger.getLogger(this.getClass());
  16. public void convert(Page page) {
  17. log.debug("Converting Table - starting");
  18. String input = page.getOriginalText();
  19. String converted = convertTable(input);
  20. page.setConvertedText(converted);
  21. log.debug("Converting Table - complete");
  22. }
  23. /**
  24. * @param input tikiwiki input
  25. * @return Confluence syntax replacement for the given input
  26. */
  27. protected String convertTable(String input) {
  28. String output = convertRows(input);
  29. output = convertHeaders(output);
  30. return output;
  31. }
  32. String pipe = "\\|";
  33. String doublePipe = pipe + pipe;
  34. String rowDelim = doublePipe + "|\n";
  35. String table =
  36. "(?:" + //dont capture
  37. "^|\n" + //beginning of string or newline
  38. ")" + //end don't capture
  39. doublePipe + //double pipe
  40. ".*?" + //anything until
  41. doublePipe + //double pipe
  42. "(?=" + //zero width non capture group (zero-width in important, 'cause we'll need that newline)
  43. "\n|$" + //newline or end of string
  44. ")"; //end non capture group
  45. public final Pattern tablePattern = Pattern.compile(table, Pattern.DOTALL);
  46. String row = "(?:" +rowDelim + ")" + "(.*?)" + "(?="+rowDelim+")";//FIXME this doesn't quite work -- one of the rowDelims Has to be | or we get false positives
  47. Pattern rowPattern = Pattern.compile(row);
  48. /**
  49. * converts the basic row syntax
  50. * @param input tikiwiki syntax
  51. * <br/>Example:<br/>
  52. * || __Some__| tikiwiki||rows|here||
  53. * @return confluence syntax
  54. * <br/>Example:<br/>
  55. * | __Some__| tikiwiki|<br/>
  56. * |rows|here|
  57. */
  58. protected String convertRows(String input) {
  59. String output = input;
  60. //tables that are right next to each other need an extra newline
  61. int newlineIndex = -1;
  62. Vector<Boolean> needsDelimNewline = new Vector<Boolean>();
  63. //we slurp up the table in case of ambiguous pipe delimiters
  64. Matcher tableFinder = tablePattern.matcher(input);
  65. StringBuffer sb = new StringBuffer();
  66. boolean found = false;
  67. while (tableFinder.find()) {
  68. found = true;
  69. String table = tableFinder.group();
  70. Matcher rowFinder = rowPattern.matcher(table);
  71. StringBuffer rowSb = new StringBuffer();
  72. int numColumns = 0;
  73. while (rowFinder.find()) {
  74. String content = rowFinder.group(1);
  75. if ("".equals(content)) continue;
  76. //handle empty first cells (otherwise are lost)
  77. if (content.startsWith("|")) content = " " + content;
  78. String replacement = "|" + content + "|\n";
  79. replacement = convertCells(replacement);
  80. if (numColumns < 1)
  81. numColumns = getNumberOfColumns(replacement);
  82. else
  83. replacement = enforceColumnNumbering(numColumns, replacement);
  84. replacement = RegexUtil.handleEscapesInReplacement(replacement);
  85. rowFinder.appendReplacement(rowSb, replacement);
  86. }
  87. numColumns = 0;
  88. rowFinder.appendTail(rowSb);
  89. String replacement = rowSb.toString();
  90. //deal with newline issues for tables right next to each other
  91. if (newlineIndex > -1 && newlineIndex == tableFinder.start())
  92. needsDelimNewline.add(new Boolean(true));
  93. else
  94. needsDelimNewline.add(new Boolean(false));
  95. newlineIndex = tableFinder.end();
  96. replacement = RegexUtil.handleEscapesInReplacement(replacement);
  97. tableFinder.appendReplacement(sb, replacement);
  98. }
  99. if (found) {
  100. tableFinder.appendTail(sb);
  101. output = sb.toString();
  102. output = removeOpeningExtraPipes(output);
  103. output = removeFinalDelims(output, needsDelimNewline);
  104. }
  105. return output;
  106. }
  107. /**
  108. * removes the last cell delimiters from all the tables.
  109. * Otherwise we end up with an extra "\n||\n" at then end of the table
  110. * @param input string with tables
  111. * @param needsDelimNewline list of true/false objects, one for each table.
  112. * If the object is true, the corresponding table needs an extra newline to seperate
  113. * it from the next table. (Important for tables that are sitting right next to each other)
  114. * @return
  115. */
  116. protected String removeFinalDelims(String input, Vector<Boolean> needsDelimNewline) {
  117. //removing delimiters might have to happen more than once
  118. String preRemove = "";
  119. String postRemove = input;
  120. while (!preRemove.equals(postRemove)) {
  121. preRemove = postRemove;
  122. postRemove = removeFinalDelim(preRemove, needsDelimNewline);
  123. }
  124. if (postRemove != null) input = postRemove;
  125. return input;
  126. }
  127. /**
  128. * same as removeFinalDelim(String input, Vector<Boolean>), but
  129. * the boolean vector that's passed is null.
  130. * @param input
  131. * @return
  132. */
  133. protected String removeFinalDelim(String input) {
  134. return removeFinalDelim(input, null);
  135. }
  136. String finalDelim =
  137. "(" + //start capture (group 1)
  138. "(?:" + //start non-capture group
  139. "\\|" + //a pipe
  140. "[^\n]*" + //0 or more newline until
  141. "\n" + //newline
  142. ")" + //end non-capture group
  143. "+" + //1 or more of the previous non-capture group
  144. ")" + //end capture (group1)
  145. "(" + //start capture (group 2)
  146. "(" + //start capture (group 3)
  147. "\n*" + //0 or more newlines
  148. ")" + //end capture (group 3)
  149. "\\|\\|" + //two pipes
  150. ")" + //end capture (group 2)
  151. "(" + //start capture (group 4)
  152. "\n|$" + //a newline or end of string
  153. ")"; //end capture (group 4)
  154. Pattern finalDelimPattern = Pattern.compile(finalDelim);
  155. /**
  156. * This is used to clean up the conversion for rows.
  157. * This must be called to avoid having an extra || delimiter
  158. * after the converted table. Also, important for handling
  159. * tables that are right next to each other.
  160. * @param input
  161. * <br/>Example:<br/>
  162. * | Table | here |<br/>
  163. * ||
  164. * @param needsNL has the same number of objects as the number of tables in
  165. * the given input. If a needNL object is true, then the corresponding table
  166. * needs an extra newline to seperate it from another table. If this object
  167. * if null, it's the same as if a needsNL object was passed with all elements set to false.
  168. * @return
  169. * <br/>Example:<br/>
  170. * | Table | here |
  171. */
  172. protected String removeFinalDelim(String input, Vector<Boolean> needsNL) {
  173. Matcher finalFinder = finalDelimPattern.matcher(input);
  174. StringBuffer sb = new StringBuffer();
  175. boolean found = false;
  176. int tableIndex = 0;
  177. while (finalFinder.find()) {
  178. found = true;
  179. String before = finalFinder.group(1);
  180. //deal with table-seperating newlines
  181. if (needsNL != null && tableIndex < needsNL.size())
  182. before = addNewline(before, needsNL.get(tableIndex));
  183. tableIndex++;
  184. String replacement = before;
  185. replacement = RegexUtil.handleEscapesInReplacement(replacement);
  186. finalFinder.appendReplacement(sb, replacement);
  187. }
  188. if (found) {
  189. finalFinder.appendTail(sb);
  190. return sb.toString();
  191. }
  192. return input;
  193. }
  194. /**
  195. * if needsNL is true, adds a newline to the beginning of input
  196. * @param input
  197. * @param needsNL
  198. * @return
  199. */
  200. protected String addNewline(String input, Boolean needsNL) {
  201. return (needsNL.booleanValue()?"\n":"") + input;
  202. }
  203. String header = "\\|\\s*__([^|]*)(?:__\\s*)(?:(?=\\|[^\n])|(\\|\n))";
  204. Pattern headerPattern = Pattern.compile(header);
  205. /**
  206. * converts tikiwiki header syntax (bolded) to confluence header syntax
  207. * @param input
  208. * <br/>Example:<br/>
  209. * | __Header__ | __Header__ |
  210. * | __Header__ | not a header |
  211. *
  212. * @return
  213. * <br/>Example:<br/>
  214. * || Header || Header ||
  215. * || Header | not a header |
  216. */
  217. protected String convertHeaders(String input) {
  218. Matcher headerFinder = headerPattern.matcher(input);
  219. StringBuffer sb = new StringBuffer();
  220. String output = input;
  221. boolean found = false;
  222. while (headerFinder.find()) {
  223. found = true;
  224. String content = headerFinder.group(1);
  225. String endDelim = headerFinder.group(2);
  226. if (endDelim == null) endDelim = " ";
  227. else endDelim = " |" + endDelim;
  228. String replacement = "|| " + content + endDelim;
  229. headerFinder.appendReplacement(sb, replacement);
  230. }
  231. if (found) {
  232. headerFinder.appendTail(sb);
  233. output = sb.toString();
  234. }
  235. return output;
  236. }
  237. String noWSBegin = "\\|+([^\\s|]+)";
  238. String noWSEnd = "([^\\s|]+)\\|+";
  239. /**
  240. * makes sure cells have an orderly amount of whitespace
  241. * @param input
  242. * <br/>Example:<br/>
  243. * |cells|with| disorderly|whitespace |
  244. *
  245. * @return
  246. * <br/>Example:<br/>
  247. * | cells | with | disorderly | whitespace |
  248. */
  249. protected String convertCells(String input) {
  250. String replacementBegin = "| {group1}";
  251. String replacementEnd = "{group1} |";
  252. String output = RegexUtil.loopRegex(input, noWSBegin, replacementBegin);
  253. output = RegexUtil.loopRegex(output, noWSEnd, replacementEnd);
  254. output = escapeSpecialCharacters(output);
  255. output = expandEmptyCells(output);
  256. return output;
  257. }
  258. /**
  259. * escapes confluence special characters that would
  260. * be rendered incorrectly.
  261. *
  262. * Example: if cell begins with a dash (-), the Confluence
  263. * renderer will think it's a list item unless we escape the dash.
  264. *
  265. * @param input
  266. * @return
  267. */
  268. protected String escapeSpecialCharacters(String input) {
  269. // so far there's only dashes
  270. String escaped = escapeListContextDashes(input);
  271. return escaped;
  272. }
  273. String listContextDashes =
  274. "(?<=" + //zero width lookbehind
  275. "\\|" + //starting pipe
  276. ")" + //end zero width capture
  277. "\\s*" + //optional whitespace
  278. "-"; //dash
  279. Pattern listDashesPattern = Pattern.compile(listContextDashes);
  280. /**
  281. * @param input
  282. * @return replaces any dashes in the given input
  283. * that were not meant to be in list context
  284. * with escaped dashes.
  285. */
  286. protected String escapeListContextDashes(String input) {
  287. Matcher listDashesFinder = listDashesPattern.matcher(input);
  288. if (listDashesFinder.find()) {
  289. return listDashesFinder.replaceAll(" \\\\-");
  290. }
  291. return input;
  292. }
  293. String triplePipe = "(?<=^|\n)" + doublePipe + pipe;
  294. /**
  295. * Translates incorrect triple pipes (|||) to a single pipe.
  296. *
  297. * (Sometimes at the end of a conversion we end up with
  298. * a triple pipe at the beginning of a line.)
  299. *
  300. * @param input
  301. * @return input with instances of line beginning triple pipes,
  302. * translated to single pipes (|)
  303. */
  304. protected String removeOpeningExtraPipes(String input) {
  305. return input.replaceAll(triplePipe, "|");
  306. }
  307. Pattern pipePattern = Pattern.compile(pipe + "{1,2}"); //one or 2 columns
  308. /**
  309. * @param input a row
  310. * @return the number of columns referenced in the given input
  311. */
  312. protected int getNumberOfColumns(String input) {
  313. Matcher pipeFinder = pipePattern.matcher(input);
  314. int num = 0;
  315. while (pipeFinder.find()) {
  316. num++;
  317. }
  318. return num - 1;
  319. }
  320. /**
  321. * makes the given input have numColumns number of cols
  322. * @param numColumns
  323. * @param input a table row
  324. * @return
  325. */
  326. protected String enforceColumnNumbering(int numColumns, String input) {
  327. int num = getNumberOfColumns(input);
  328. if (num < numColumns) {
  329. input = addColspans(numColumns, input, num);
  330. }
  331. else if (num > numColumns) {
  332. //how many more?
  333. int difference = num - numColumns;
  334. input = reduceColspans(input, difference);
  335. }
  336. return input;
  337. }
  338. /**
  339. * adds the necessary num of columns to input to make it have
  340. * requiredNum number of cols
  341. * @param requiredNum the required number of columns
  342. * @param input the row
  343. * @param inputNum the row's current number of columns
  344. * @return the row with the required # of cols
  345. */
  346. protected String addColspans(int requiredNum, String input, int inputNum) {
  347. String withoutNewline = input.replaceFirst("\n+$", "");
  348. boolean addNL = (!input.equals(withoutNewline));
  349. for(int i = inputNum; i < requiredNum; i++)
  350. withoutNewline += " |";
  351. input = withoutNewline + (addNL?"\n":"");
  352. return input;
  353. }
  354. /**
  355. * removes unnecessary (unsupported) colspans. (Cleans up ugly extra | |)
  356. * @param input A row
  357. * @param difference a positive number, indicating how many extra cells
  358. * there were than the header of the table
  359. * @return If the ending cells in the row are empty, and the row has colspans, then
  360. * try to remove the unnecessary empty cells, so that the row looks cleaner.
  361. * Doesn't work for every case.
  362. * @throws IllegalArgumentException if difference is 0 or negative
  363. */
  364. protected String reduceColspans(String input, int difference) {
  365. if (difference < 1) {
  366. String message = "difference must be greater than 0. Difference: " + difference;
  367. log.error(message);
  368. throw new IllegalArgumentException(message);
  369. }
  370. //FIXME this doesn't work for every case.
  371. //if the difference is greater than the number of empty cells, then
  372. //even if there are candidate empty cells for pruning,
  373. //they won't get removed
  374. // log.debug("input =\t'" + input + "'");
  375. boolean andNewline = input.endsWith("\n");
  376. String emptyColumns =
  377. "^" + //start at the beginning of the string
  378. "(" + //start capture (group 1)
  379. ".*" + //everything until
  380. ")" + //end capture (group 1)
  381. "(?:" + //start non-capture group
  382. "(" + //start capture (group 2)
  383. "\\|" + //a pipe
  384. ")" + //end capture (group 2)
  385. " " + //a space
  386. ")" + //end non-capture group
  387. "{"+difference+"}" + //repeat the previous group exactly 'difference' number of times
  388. "\\|" + //a pipe
  389. "$"; //the end of the string
  390. Pattern emptyColsPattern = Pattern.compile(emptyColumns);
  391. Matcher emptyColsMatcher = emptyColsPattern.matcher(input);
  392. if (emptyColsMatcher.find()) {
  393. String preEmptyCols = emptyColsMatcher.group(1);
  394. String lastPipe = emptyColsMatcher.group(2);
  395. String newRow = preEmptyCols + lastPipe;
  396. newRow += andNewline?"\n":"";
  397. // log.debug("newRow =\t'" + newRow + "'");
  398. return newRow;
  399. }
  400. return input;
  401. }
  402. /**
  403. * expands empty cells so that they are correct Confluence Syntax.
  404. * <br/>Example:
  405. * <br/>input: | A ||
  406. * <br/>output: | A | |
  407. * <br/>Example:
  408. * <br/>input: ||A||
  409. * <br/>output: ||A||
  410. * @param input
  411. * @return
  412. */
  413. protected String expandEmptyCells(String input) {
  414. if (!input.startsWith("||")) { //don't change double pipes in header rows
  415. return input.replaceAll(doublePipe, "| |");
  416. }
  417. return input;
  418. }
  419. }