PageRenderTime 41ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/src/com/atlassian/uwc/converters/dokuwiki/DokuWikiLinkConverter.java

https://bitbucket.org/dodok1/uwc
Java | 211 lines | 121 code | 23 blank | 67 comment | 21 complexity | 86abf1dcdd308477fb5c1209a2f69e42 MD5 | raw file
  1. package com.atlassian.uwc.converters.dokuwiki;
  2. import com.atlassian.uwc.ui.ConverterEngine;
  3. import com.atlassian.uwc.ui.Page;
  4. import com.atlassian.uwc.converters.BaseConverter;
  5. import org.apache.log4j.Logger;
  6. import java.net.URLEncoder;
  7. import java.io.UnsupportedEncodingException;
  8. /**
  9. * A custom converter to turn DokuWiki's links into Confluence page names.
  10. *
  11. * <strong>NOTE:</strong> This class is heavily dependent on the page name
  12. * set by ConverterEngine.setupPages(). Any change there will probably force a change here.
  13. *
  14. * @author Rex (Rolf Staflin)
  15. * @version $Id$
  16. */
  17. public class DokuWikiLinkConverter extends BaseConverter {
  18. private static Logger log = Logger.getLogger(DokuWikiLinkConverter.class);
  19. private static final String LINK_START = "[[";
  20. private static final String LINK_END = "]]";
  21. private static final String SEPARATOR = "|";
  22. /**
  23. * These are assumed to be protocols rather than DokuWiki namespaces.
  24. */
  25. private static final String[] protocols = {
  26. "file",
  27. "http",
  28. "https",
  29. "ftp",
  30. "mailto",
  31. "svn"
  32. };
  33. /**
  34. * Converts any links from the DokuWiki format to Confluence's format. Any links pointing to
  35. * other documents in the wiki are massaged further so that they point to the correct page title.
  36. * @param page A page with text to be converted.
  37. */
  38. public void convert(Page page) {
  39. assert page != null;
  40. assert page.getOriginalText() != null;
  41. String text = page.getOriginalText();
  42. int linkStart = text.indexOf(LINK_START);
  43. while (linkStart >= 0) {
  44. int linkEnd = text.indexOf(LINK_END, linkStart);
  45. if (linkEnd < 0) {
  46. break;
  47. }
  48. String link = text.substring(linkStart + 2, linkEnd);
  49. int separator = link.indexOf(SEPARATOR);
  50. String linkText = null;
  51. String linkTarget = link.trim();
  52. if (separator >= 0) {
  53. linkText = link.substring(separator + 1).trim();
  54. linkTarget = link.substring(0, separator).trim();
  55. // Remove any line breaks from the link text
  56. linkText = linkText.replaceAll("\r\n", " ");
  57. linkText = linkText.replaceAll("\r", " ");
  58. linkText = linkText.replaceAll("\n", " ");
  59. }
  60. if (isPageReference(linkTarget)) {
  61. // First of all, this may be a local reference (e.g., from
  62. // foo:bar you can link to foo:baz with [[baz]], and we need
  63. // to change that into [foo -- baz] because that's what the
  64. // baz page will have been renamed to.
  65. if (linkTarget.indexOf(":") < 0) {
  66. // Get the name space from the current page name.
  67. int lastSeparator = page.getName().lastIndexOf(ConverterEngine.CONFLUENCE_SEPARATOR);
  68. if (lastSeparator >= 0) {
  69. linkTarget = page.getName().substring(0, lastSeparator +
  70. ConverterEngine.CONFLUENCE_SEPARATOR.length()) +
  71. linkTarget;
  72. }
  73. } else {
  74. // Replace colons with the separator used in naming the pages.
  75. linkTarget = linkTarget.replaceAll(":", ConverterEngine.CONFLUENCE_SEPARATOR);
  76. }
  77. // Replace underscores with spaces
  78. linkTarget = linkTarget.replaceAll("_", " ");
  79. } else {
  80. linkTarget = normalizeLink(linkTarget);
  81. }
  82. StringBuffer newText = new StringBuffer("[");
  83. if (linkText != null) {
  84. newText.append(linkText);
  85. } else {
  86. newText.append(link);
  87. }
  88. newText.append(SEPARATOR).append(linkTarget);
  89. newText.append("]");
  90. text = text.substring(0, linkStart) + newText.toString() + text.substring(linkEnd + LINK_END.length());
  91. linkStart = text.indexOf(LINK_START);
  92. }
  93. page.setConvertedText(text);
  94. // Lastly, we update the page name
  95. formatPageName(page);
  96. }
  97. /**
  98. * "Normalizes" a link by doing the following:
  99. *
  100. * <li>Replacing all backslashes with forward slashes
  101. * (otherwise Confluence strips them from the links)
  102. * <li>Replacing spaces with "+"
  103. * <li>Changing the protocol file: into http: (file: does not seem to work)
  104. * <li>Adding the protocol http: to links starting with "//"
  105. * </ul>
  106. * @param linkTarget the link to be normalized
  107. * @return The normalized string
  108. */
  109. public static String normalizeLink(String linkTarget) {
  110. assert linkTarget != null;
  111. linkTarget = linkTarget.replaceAll("\\\\", "/");
  112. /* linkTarget = linkTarget.replaceAll("?", "%C3%A5");
  113. linkTarget = linkTarget.replaceAll("?", "%C3%A4");
  114. linkTarget = linkTarget.replaceAll("?", "%C3%B6");
  115. linkTarget = linkTarget.replaceAll("?", "%C3%85");
  116. linkTarget = linkTarget.replaceAll("?", "%C3%84");
  117. linkTarget = linkTarget.replaceAll("?", "%C3%96");
  118. linkTarget = linkTarget.replaceAll(" ", "+");
  119. */
  120. if (linkTarget.startsWith("file:")) {
  121. linkTarget = "http:" + linkTarget.substring(5);
  122. }
  123. if (linkTarget.startsWith("//")) {
  124. linkTarget = "http:" + linkTarget;
  125. }
  126. try {
  127. linkTarget = URLEncoder.encode(linkTarget, "UTF-8");
  128. } catch (UnsupportedEncodingException ignored) {
  129. log.error("Could not URL-encode target!", ignored);
  130. }
  131. // Now the encoder has ruined the colons and slashes :P. Fix that.
  132. linkTarget = linkTarget.replaceAll("%3A", ":");
  133. linkTarget = linkTarget.replaceAll("%2F", "/");
  134. return linkTarget;
  135. }
  136. /**
  137. * Makes the page name prettier by removing the file name extension,
  138. * replacing underscores with spaces and finally converting the first
  139. * character into upper case. E.g., "my_page.txt" is converted into "My page".
  140. * @param page A page with the name set.
  141. */
  142. private void formatPageName(Page page) {
  143. assert page != null;
  144. assert page.getName() != null;
  145. String name = page.getName();
  146. // Strip trailing file name extension.
  147. if (name.endsWith(".txt")) {
  148. name = name.substring(0, name.length()-4);
  149. }
  150. // Replace underscores with spaces
  151. name = name.replaceAll("_", " ");
  152. // Casify the name
  153. name = Character.toUpperCase(name.charAt(0)) + name.substring(1);
  154. page.setName(name);
  155. }
  156. /**
  157. * Determines if a link is a DokuWiki page reference or not.
  158. * Page references have the form [dir:][name] with one or more
  159. * [dir:] components, e.g., "path:to:a:page".
  160. *
  161. * The problem is that regular URL:s also contain colons; "mailto:foo" is not
  162. * a page reference, but "mail:foo" is. This method checks for some standard
  163. * protocol names and assumes that links that contain colons but do not start with
  164. * one of the protocol names are page references.
  165. *
  166. * To find out what protocols your DokuWiki contains, run this in a command prompt
  167. * at the wiki base document directory:
  168. * grep -ohr "\[\[[0-9a-zA-Z]\*:" * | sort | uniq
  169. * Look through the resulting list and eliminate the matches that are DokuWiki name spaces.
  170. * The rest are protocols.
  171. *
  172. * @param target The link text.
  173. * @return True if and only if the text is a page reference.
  174. */
  175. public static boolean isPageReference(String target) {
  176. assert target != null;
  177. int colon = target.indexOf(':');
  178. if (colon < 0) {
  179. return true; // No colon in the string -- must be a local reference!
  180. }
  181. for (String protocol : protocols) {
  182. if (target.startsWith(protocol)) {
  183. return false; // This target uses an approved protocol
  184. }
  185. }
  186. return true;
  187. }
  188. }